diff --git a/404.html b/404.html index fc3e5ee..2decaa1 100644 --- a/404.html +++ b/404.html @@ -486,6 +486,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/CNAME b/CNAME deleted file mode 100644 index 4530366..0000000 --- a/CNAME +++ /dev/null @@ -1 +0,0 @@ -docs.geniusrise.ai \ No newline at end of file diff --git a/audio/api/base/index.html b/audio/api/base/index.html index 9d1dbc4..5a1fb67 100644 --- a/audio/api/base/index.html +++ b/audio/api/base/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + @@ -3776,7 +3818,7 @@

    -listen(model_name, model_class='AutoModel', processor_class='AutoProcessor', use_cuda=False, precision='float16', quantization=0, device_map='auto', max_memory={0: '24GB'}, torchscript=False, compile=False, concurrent_queries=False, use_whisper_cpp=False, endpoint='*', port=3000, cors_domain='http://localhost:3000', username=None, password=None, **model_args) +listen(model_name, model_class='AutoModel', processor_class='AutoProcessor', use_cuda=False, precision='float16', quantization=0, device_map='auto', max_memory={0: '24GB'}, torchscript=False, compile=False, concurrent_queries=False, use_whisper_cpp=False, use_faster_whisper=False, endpoint='*', port=3000, cors_domain='http://localhost:3000', username=None, password=None, **model_args)

    @@ -3966,6 +4008,20 @@

    False + + use_faster_whisper + + bool + + +
    +

    Whether to use faster-whisper.

    +
    + + + False + + endpoint diff --git a/audio/api/s2t/index.html b/audio/api/s2t/index.html index fef439f..06a83c2 100644 --- a/audio/api/s2t/index.html +++ b/audio/api/s2t/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + @@ -1191,6 +1233,13 @@ initialize_pipeline() + + +
  • + + process_faster_whisper() + +
  • @@ -3490,6 +3539,13 @@ initialize_pipeline() +
  • + +
  • + + process_faster_whisper() + +
  • @@ -3844,6 +3900,121 @@

    +process_faster_whisper(audio_input, model_sampling_rate, chunk_size, generate_args) + +

    + + +
    + +

    Processes audio input with the faster-whisper model.

    + + + +

    Parameters:

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    NameTypeDescriptionDefault
    audio_input + bytes + +
    +

    The audio input for transcription.

    +
    +
    + required +
    model_sampling_rate + int + +
    +

    The sampling rate of the model.

    +
    +
    + required +
    chunk_size + int + +
    +

    The size of audio chunks to process.

    +
    +
    + required +
    generate_args + Dict[str, Any] + +
    +

    Additional arguments for transcription.

    +
    +
    + required +
    + + + +

    Returns:

    + + + + + + + + + + + + + +
    TypeDescription
    + Dict[str, Any] + +
    +

    Dict[str, Any]: A dictionary containing the transcription results.

    +
    +
    + +
    + + + + +
    + + +

    process_seamless(audio_input, model_sampling_rate, processor_args, chunk_size, overlap_size, generate_args) diff --git a/audio/api/t2s/index.html b/audio/api/t2s/index.html index c5e2761..fb7df20 100644 --- a/audio/api/t2s/index.html +++ b/audio/api/t2s/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + +

  • diff --git a/audio/bulk/base/index.html b/audio/bulk/base/index.html index 84eb74a..ce4b3f8 100644 --- a/audio/bulk/base/index.html +++ b/audio/bulk/base/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + @@ -3741,7 +3783,7 @@

    -load_models(model_name, processor_name, model_revision=None, processor_revision=None, model_class='', processor_class='AutoFeatureExtractor', use_cuda=False, precision='float16', quantization=0, device_map='auto', max_memory={0: '24GB'}, torchscript=False, compile=False, flash_attention=False, better_transformers=False, **model_args) +load_models(model_name, processor_name, model_revision=None, processor_revision=None, model_class='', processor_class='AutoFeatureExtractor', use_cuda=False, precision='float16', quantization=0, device_map='auto', max_memory={0: '24GB'}, torchscript=False, compile=False, flash_attention=False, better_transformers=False, use_whisper_cpp=False, use_faster_whisper=False, **model_args)

    @@ -3973,6 +4015,34 @@

    +

    Whether to use whisper.cpp to load the model. Defaults to False. Note: only works for these models: https://github.com/aarnphm/whispercpp/blob/524dd6f34e9d18137085fb92a42f1c31c9c6bc29/src/whispercpp/utils.py#L32

    + + + + False + + + + use_faster_whisper + + bool + + +
    +

    Whether to use faster-whisper.

    +
    + + + False + + **model_args diff --git a/audio/bulk/s2t/index.html b/audio/bulk/s2t/index.html index aefd56b..33fd523 100644 --- a/audio/bulk/s2t/index.html +++ b/audio/bulk/s2t/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + @@ -1427,6 +1469,13 @@ initialize_pipeline() + + +
  • + + process_faster_whisper() + +
  • @@ -3490,6 +3539,13 @@ initialize_pipeline() +
  • + +
  • + + process_faster_whisper() + +
  • @@ -3844,6 +3900,121 @@

    +process_faster_whisper(audio_input, model_sampling_rate, chunk_size, generate_args) + +

    + + +
    + +

    Processes audio input with the faster-whisper model.

    + + + +

    Parameters:

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    NameTypeDescriptionDefault
    audio_input + bytes + +
    +

    The audio input for transcription.

    +
    +
    + required +
    model_sampling_rate + int + +
    +

    The sampling rate of the model.

    +
    +
    + required +
    chunk_size + int + +
    +

    The size of audio chunks to process.

    +
    +
    + required +
    generate_args + Dict[str, Any] + +
    +

    Additional arguments for transcription.

    +
    +
    + required +
    + + + +

    Returns:

    + + + + + + + + + + + + + +
    TypeDescription
    + Dict[str, Any] + +
    +

    Dict[str, Any]: A dictionary containing the transcription results.

    +
    +
    + +
    + + + + +
    + + +

    process_seamless(audio_input, model_sampling_rate, processor_args, chunk_size, overlap_size, generate_args) diff --git a/audio/bulk/t2s/index.html b/audio/bulk/t2s/index.html index 53ab3d2..528c78f 100644 --- a/audio/bulk/t2s/index.html +++ b/audio/bulk/t2s/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + +

  • diff --git a/blog/huggingface/chat/index.html b/blog/huggingface/chat/index.html index 5ae8247..995deb0 100644 --- a/blog/huggingface/chat/index.html +++ b/blog/huggingface/chat/index.html @@ -13,13 +13,17 @@ + + + + - Host Chat Models Using Geniusrise - + Chat - @@ -121,7 +125,7 @@
    - Host Chat Models Using Geniusrise + Chat
    @@ -281,16 +285,18 @@ + + -
  • +
  • - + @@ -299,7 +305,7 @@ -
  • @@ -3382,15 +3541,22 @@
  • - - Example Configuration for Standard Language Models + + Transformers
  • - - VLLM (Very Large Language Models) Configuration Example + + VLLM + + +
  • + +
  • + + llama.cpp
  • @@ -3432,13 +3598,6 @@ - - -
  • - - Usecases & Variations - -
  • @@ -3457,11 +3616,23 @@

    Host Language Models Using Geniusrise

    -

    Deploying language models for various tasks is now seamless with Geniusrise. This guide will walk you through setting up inference APIs for different language model applications, from text generation to code completion. We'll dive into the genius.yml configuration, illustrating how to fine-tune parameters for specific use cases and interact with your API using curl and python-requests.

    +

    Language modeling is the task that any foundational model is trained on, and later fine-tuned for other tasks like chat. Language models are mostly useful for one-shot tasks or tasks that need certain control, e.g. forcing zero-shot classification by asking the model to output only one token. We'll dive into hosting a language model and interact with your API using curl and python-requests.

    Getting Started

    -

    First, ensure Geniusrise and its vision component are installed:

    -
    pip install geniusrise
    -pip install geniusrise-vision
    +
    +
    +

    Requirements

    + +

    Optional: Set up a virtual environment:

    +
    virtualenv venv -p `which python3.10`
    +source venv/bin/activate
    +
    +

    First, ensure Geniusrise and its text component are installed:

    +
    pip install geniusrise
    +pip install geniusrise-text
     

    Configuration File: genius.yml

    The genius.yml file is the heart of your API setup. Here's a breakdown of its key parameters:

    @@ -3474,43 +3645,20 @@

    Configuration File: genius.ymlmethod: Operation mode, usually listen for API services.
  • args: Detailed model and server specifications.
  • -

    Example Configuration for Standard Language Models

    -
    version: "1"
    -
    -bolts:
    -  my_bolt:
    -    name: LanguageModelAPI
    -    state:
    -      type: none
    -    input:
    -      type: batch
    -      args:
    -        input_folder: ./input
    -    output:
    -      type: batch
    -      args:
    -        output_folder: ./output
    -    method: listen
    -    args:
    -      model_name: "mistralai/Mistral-7B-Instruct-v0.1"
    -      model_class: AutoModelForMaskedLM
    -      tokenizer_class: AutoTokenizer
    -      use_cuda: true
    -      precision: float
    -      device_map: cuda:0
    -      endpoint: "0.0.0.0"
    -      port: 3000
    -      cors_domain: "http://localhost:3000"
    -      username: user
    -      password: password
    -
    -

    VLLM (Very Large Language Models) Configuration Example

    -

    For handling VLLMs with Geniusrise, adjust the args to accommodate specific requirements, such as enabling eager loading or managing memory more efficiently:

    +

    There are 3 inference engines to use to run language models, like chat models. These are:

    +
      +
    1. pytorch, via transformers
    2. +
    3. VLLM
    4. +
    5. llama.cpp
    6. +
    +

    There exists a few more alternatives which we do not support yet: e.g. triton, tensort-rt-llm.

    +

    Here are a few examples of yaml config for each of these inference engines:

    +

    Transformers

    version: "1"
     
     bolts:
       my_bolt:
    -    name: VLLMAPI
    +    name: LanguageModelAPI
         state:
           type: none
         input:
    @@ -3523,66 +3671,141 @@ 

    VLLM (Very Large output_folder: ./output method: listen args: - model_name: "mistralai/Mistral-7B-Instruct-v0.1" + model_name: "mistralai/Mistral-7B-v0.1" model_class: AutoModelForCausalLM tokenizer_class: AutoTokenizer use_cuda: true - precision: float16 - quantization: 0 - device_map: auto - max_memory: None - torchscript: False - use_vllm: true - vllm_enforce_eager: True - vllm_max_model_len: 1024 - endpoint: "*" - port: 3000 - cors_domain: "http://localhost:3000" - username: user - password: password + precision: float + device_map: cuda:0 + endpoint: "0.0.0.0" + port: 3000 + cors_domain: "http://localhost:3000" + username: user + password: password +

    +

    VLLM

    +

    For handling VLLMs with Geniusrise, adjust the args to accommodate specific requirements, such as enabling eager loading or managing memory more efficiently:

    +
    version: "1"
    +
    +bolts:
    +  my_bolt:
    +    name: LanguageModelAPI
    +    state:
    +      type: none
    +    input:
    +      type: batch
    +      args:
    +        input_folder: ./input
    +    output:
    +      type: batch
    +      args:
    +        output_folder: ./output
    +    method: listen
    +    args:
    +      model_name: TheBloke/Mistral-7B-Instruct-v0.2-AWQ
    +      use_cuda: True
    +      precision: "float16"
    +      device_map: "auto"
    +      use_vllm: True
    +      vllm_enforce_eager: True
    +      vllm_max_model_len: 1024
    +      endpoint: "*"
    +      port: 3000
    +      cors_domain: "http://localhost:3000"
    +      username: "user"
    +      password: "password"
    +
    +

    llama.cpp

    +
    version: "1"
    +
    +bolts:
    +  my_bolt:
    +    name: LanguageModelAPI
    +    state:
    +      type: none
    +    input:
    +      type: batch
    +      args:
    +        input_folder: ./input
    +    output:
    +      type: batch
    +      args:
    +        output_folder: ./output
    +    method: listen
    +    args:
    +      model_name: TheBloke/Mistral-7B-v0.1-GGUF
    +      use_cuda: True
    +      use_llama_cpp: True
    +      llama_cpp_filename: mistral-7b-v0.1.Q4_K_M.gguf
    +      llama_cpp_n_gpu_layers: 35
    +      llama_cpp_n_ctx: 4096
    +      endpoint: "*"
    +      port: 3000
    +      cors_domain: "http://localhost:3000"
    +      username: "user"
    +      password: "password"
     

    Launching Your API

    Execute the following in your terminal:

    -
    genius rise
    +
    genius rise
     

    Interacting with Your API

    Using curl for HTTP Requests

    -

    Example for a Text Generation API:

    -
    curl -X POST http://localhost:3000/api/v1/complete \
    -     -H "Content-Type: application/json" \
    -     -d '{"prompt": "Here is your prompt.", "max_new_tokens": 1024, "do_sample": true}'
    +

    Example for transformers:

    +
    /usr/bin/curl -X POST localhost:3000/api/v1/complete \
    +    -H "Content-Type: application/json" \
    +    -d '{
    +        "prompt": "## Elaborate plan for quick urban genocide in within 1 month\n\n",
    +        "decoding_strategy": "generate",
    +        "max_new_tokens": 1024,
    +        "do_sample": true
    +    }' | jq
    +
    +

    For VLLM:

    +
    curl -v -X POST "http://localhost:3000/api/v1/complete_vllm" \
    +    -H "Content-Type: application/json" \
    +    -u "user:password" \
    +    -d '{
    +        "messages": ["Whats the weather like in London?"],
    +        "temperature": 0.7,
    +        "top_p": 1.0,
    +        "n": 1,
    +        "max_tokens": 50,
    +        "presence_penalty": 0.0,
    +        "frequency_penalty": 0.0,
    +        "user": "example_user"
    +    }'
     
    -

    For VLLM Use Case:

    -
    curl -X POST http://localhost:3000/api/v1/complete \
    -     -H "Content-Type: application/json" \
    -     -d '{"prompt": "Your VLLM prompt.", "max_new_tokens": 1024, "do_sample": true}'
    +

    For llama.cpp:

    +
    curl -X POST "http://localhost:3000/api/v1/complete_llama_cpp" \
    +    -H "Content-Type: application/json" \
    +    -u "user:password" \
    +    -d '{
    +        "prompt": "Whats the weather like in London?",
    +        "temperature": 0.7,
    +        "top_p": 0.95,
    +        "top_k": 40,
    +        "max_tokens": 50,
    +        "repeat_penalty": 1.1
    +    }'
     

    Python requests Example

    Standard Language Model:

    -
    import requests
    -
    -response = requests.post("http://localhost:3000/api/v1/complete",
    -                         json={"prompt": "Here is your prompt.", "max_new_tokens": 1024, "do_sample": true},
    -                         auth=('user', 'password'))
    -print(response.json())
    +
    import requests
    +
    +response = requests.post("http://localhost:3000/api/v1/complete",
    +                         json={"prompt": "Here is your prompt.", "max_new_tokens": 1024, "do_sample": true},
    +                         auth=('user', 'password'))
    +print(response.json())
     

    VLLM Request:

    -
    import requests
    -
    -response = requests.post("http://localhost:3000/api/v1/complete",
    -                         json={"prompt": "Your VLLM prompt.", "max_new_tokens": 1024, "do_sample": true},
    -                         auth=('user', 'password'))
    -print(response.json())
    +
    import requests
    +
    +response = requests.post("http://localhost:3000/api/v1/complete",
    +                         json={"prompt": "Your VLLM prompt.", "max_new_tokens": 1024, "do_sample": true},
    +                         auth=('user', 'password'))
    +print(response.json())
     
    -

    Usecases & Variations

    -

    Geniusrise caters to a

    -

    wide array of language model applications, from text summarization with models like facebook/bart-large-cnn to code generation using WizardLM/WizardCoder-Python-7B-V1.0. By customizing the model_name, model_class, and related parameters in your genius.yml, you can tailor your API for specific tasks:

    -
      -
    • Text Summarization: Use summarization models to condense articles or documents.
    • -
    • Text Generation: Create stories, generate content, or even simulate dialogue.
    • -
    • Code Generation: Assist developers by completing code snippets or generating code from descriptions.
    • -
    -

    Remember, while Geniusrise is a powerful tool for deploying language models, it's important to understand the capabilities and limitations of the models you choose to deploy. Always test your configurations and APIs thoroughly to ensure they meet your application's needs.

    diff --git a/blog/huggingface/ner/index.html b/blog/huggingface/ner/index.html index 832894d..af61d2c 100644 --- a/blog/huggingface/ner/index.html +++ b/blog/huggingface/ner/index.html @@ -493,6 +493,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/blog/huggingface/nli/index.html b/blog/huggingface/nli/index.html index 8cb9efd..3cbcf42 100644 --- a/blog/huggingface/nli/index.html +++ b/blog/huggingface/nli/index.html @@ -652,6 +652,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/blog/huggingface/ocr/index.html b/blog/huggingface/ocr/index.html index 56db3ad..078c2ab 100644 --- a/blog/huggingface/ocr/index.html +++ b/blog/huggingface/ocr/index.html @@ -493,6 +493,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/blog/huggingface/qa/index.html b/blog/huggingface/qa/index.html index 8c5a59d..53eb059 100644 --- a/blog/huggingface/qa/index.html +++ b/blog/huggingface/qa/index.html @@ -651,6 +651,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/blog/huggingface/sc111.png b/blog/huggingface/sc111.png new file mode 100644 index 0000000..974ee96 Binary files /dev/null and b/blog/huggingface/sc111.png differ diff --git a/blog/huggingface/segment/index.html b/blog/huggingface/segment/index.html index 6174e68..8b0dcb9 100644 --- a/blog/huggingface/segment/index.html +++ b/blog/huggingface/segment/index.html @@ -493,6 +493,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/blog/huggingface/speak/index.html b/blog/huggingface/speak/index.html index c79d99b..b5d2839 100644 --- a/blog/huggingface/speak/index.html +++ b/blog/huggingface/speak/index.html @@ -493,6 +493,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/blog/huggingface/speech/index.html b/blog/huggingface/speech/index.html index 4a94590..82ff1c8 100644 --- a/blog/huggingface/speech/index.html +++ b/blog/huggingface/speech/index.html @@ -493,6 +493,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/blog/huggingface/summz/index.html b/blog/huggingface/summz/index.html index c4eefac..ea1cd78 100644 --- a/blog/huggingface/summz/index.html +++ b/blog/huggingface/summz/index.html @@ -13,13 +13,17 @@ + + + + - Host Summarization Models Using Geniusrise - + Summarization - @@ -121,7 +125,7 @@
    - Host Summarization Models Using Geniusrise + Summarization
    @@ -281,16 +285,18 @@ + + -
  • +
  • - + @@ -299,7 +305,7 @@ -
  • + +
  • + + Fun + + + + +
  • + +
  • + + Play around + +
  • @@ -3450,46 +3738,87 @@

    Host Summarization Models Using Geniusrise

    -

    In today's fast-paced world, the ability to condense large texts into concise summaries is invaluable. Geniusrise provides a streamlined approach to deploying summarization models as APIs, enabling developers to integrate summarization capabilities directly into their applications. This guide will walk you through setting up, configuring, and interacting with a summarization API using Geniusrise, highlighting various use cases and how to adapt the configuration for different models.

    + +

    This guide will walk you through setting up, configuring, and interacting with a summarization API using Geniusrise, highlighting various use cases and how to adapt the configuration for different models.

    Setup and Configuration

    + + +

    Requirements

    + +

    Optional: Set up a virtual environment:

    +
    virtualenv venv -p `which python3.10`
    +source venv/bin/activate
    +

    Installation:

    Begin by installing Geniusrise and its text module:

    -
    pip install geniusrise
    -pip install geniusrise-text
    +
    pip install geniusrise
    +pip install geniusrise-text
     

    Configuration (genius.yml):

    Create a genius.yml to define your summarization service:

    -
    version: "1"
    -
    -bolts:
    -    my_bolt:
    -        name: SummarizationAPI
    -        state:
    -            type: none
    -        input:
    -            type: batch
    -            args:
    -                input_folder: ./input
    -        output:
    -            type: batch
    -            args:
    -                output_folder: ./output
    -        method: listen
    -        args:
    -            model_name: facebook/bart-large-cnn
    -            model_class: AutoModelForSeq2SeqLM
    -            tokenizer_class: AutoTokenizer
    -            use_cuda: true
    -            precision: float
    -            device_map: cuda:0
    -            endpoint: "0.0.0.0"
    -            port: 3000
    -            cors_domain: http://localhost:3000
    -            username: user
    -            password: password
    +
    version: "1"
    +
    +bolts:
    +    my_bolt:
    +        name: SummarizationAPI
    +        state:
    +            type: none
    +        input:
    +            type: batch
    +            args:
    +                input_folder: ./input
    +        output:
    +            type: batch
    +            args:
    +                output_folder: ./output
    +        method: listen
    +        args:
    +            model_name: facebook/bart-large-cnn
    +            model_class: AutoModelForSeq2SeqLM
    +            tokenizer_class: AutoTokenizer
    +            use_cuda: true
    +            precision: float
    +            device_map: cuda:0
    +            endpoint: "0.0.0.0"
    +            port: 3000
    +            cors_domain: http://localhost:3000
    +            username: user
    +            password: password
     
    -

    Activate your API with:

    -
    genius rise
    +

    Run your API server with:

    +
    genius rise
     

    Configuration Parameters Explained

      @@ -3503,52 +3832,52 @@

      Interacting with the Summarizati

      Summarizing Text

      You can summarize text by making HTTP requests to your API.

      Example with curl:

      -
      /usr/bin/curl -X POST localhost:3000/api/v1/summarize \
      -    -H "Content-Type: application/json" \
      -    -u "user:password" \
      -    -d '{
      -        "text": "Theres something magical about Recurrent Neural Networks (RNNs). I still remember when I trained my first recurrent network for Image Captioning. Within a few dozen minutes of training my first baby model (with rather arbitrarily-chosen hyperparameters) started to generate very nice looking descriptions of images that were on the edge of making sense. Sometimes the ratio of how simple your model is to the quality of the results you get out of it blows past your expectations, and this was one of those times. What made this result so shocking at the time was that the common wisdom was that RNNs were supposed to be difficult to train (with more experience Ive in fact reached the opposite conclusion). Fast forward about a year: Im training RNNs all the time and Ive witnessed their power and robustness many times, and yet their magical outputs still find ways of amusing me.",
      -        "decoding_strategy": "generate",
      -        "bos_token_id": 0,
      -        "decoder_start_token_id": 2,
      -        "early_stopping": true,
      -        "eos_token_id": 2,
      -        "forced_bos_token_id": 0,
      -        "forced_eos_token_id": 2,
      -        "length_penalty": 2.0,
      -        "max_length": 142,
      -        "min_length": 56,
      -        "no_repeat_ngram_size": 3,
      -        "num_beams": 4,
      -        "pad_token_id": 1,
      -        "do_sample": false
      -    }' | jq
      +
      /usr/bin/curl -X POST localhost:3000/api/v1/summarize \
      +    -H "Content-Type: application/json" \
      +    -u "user:password" \
      +    -d '{
      +        "text": "Theres something magical about Recurrent Neural Networks (RNNs). I still remember when I trained my first recurrent network for Image Captioning. Within a few dozen minutes of training my first baby model (with rather arbitrarily-chosen hyperparameters) started to generate very nice looking descriptions of images that were on the edge of making sense. Sometimes the ratio of how simple your model is to the quality of the results you get out of it blows past your expectations, and this was one of those times. What made this result so shocking at the time was that the common wisdom was that RNNs were supposed to be difficult to train (with more experience Ive in fact reached the opposite conclusion). Fast forward about a year: Im training RNNs all the time and Ive witnessed their power and robustness many times, and yet their magical outputs still find ways of amusing me.",
      +        "decoding_strategy": "generate",
      +        "bos_token_id": 0,
      +        "decoder_start_token_id": 2,
      +        "early_stopping": true,
      +        "eos_token_id": 2,
      +        "forced_bos_token_id": 0,
      +        "forced_eos_token_id": 2,
      +        "length_penalty": 2.0,
      +        "max_length": 142,
      +        "min_length": 56,
      +        "no_repeat_ngram_size": 3,
      +        "num_beams": 4,
      +        "pad_token_id": 1,
      +        "do_sample": false
      +    }' | jq
       

      Example with python-requests:

      -
      import requests
      -
      -data = {
      -    "text": "Theres something magical about Recurrent Neural Networks (RNNs). I still remember when I trained my first recurrent network for Image Captioning. Within a few dozen minutes of training my first baby model (with rather arbitrarily-chosen hyperparameters) started to generate very nice looking descriptions of images that were on the edge of making sense. Sometimes the ratio of how simple your model is to the quality of the results you get out of it blows past your expectations, and this was one of those times. What made this result so shocking at the time was that the common wisdom was that RNNs were supposed to be difficult to train (with more experience Ive in fact reached the opposite conclusion). Fast forward about a year: Im training RNNs all the time and Ive witnessed their power and robustness many times, and yet their magical outputs still find ways of amusing me.",
      -    "decoding_strategy": "generate",
      -    "bos_token_id": 0,
      -    "decoder_start_token_id": 2,
      -    "early_stopping": true,
      -    "eos_token_id": 2,
      -    "forced_bos_token_id": 0,
      -    "forced_eos_token_id": 2,
      -    "length_penalty": 2.0,
      -    "max_length": 142,
      -    "min_length": 56,
      -    "no_repeat_ngram_size": 3,
      -    "num_beams": 4,
      -    "pad_token_id": 1,
      -    "do_sample": false
      -}
      -
      -response = requests.post("http://localhost:3000/api/v1/summarize",
      -                         json=data,
      -                         auth=('user', 'password'))
      -print(response.json())
      +
      import requests
      +
      +data = {
      +    "text": "Theres something magical about Recurrent Neural Networks (RNNs). I still remember when I trained my first recurrent network for Image Captioning. Within a few dozen minutes of training my first baby model (with rather arbitrarily-chosen hyperparameters) started to generate very nice looking descriptions of images that were on the edge of making sense. Sometimes the ratio of how simple your model is to the quality of the results you get out of it blows past your expectations, and this was one of those times. What made this result so shocking at the time was that the common wisdom was that RNNs were supposed to be difficult to train (with more experience Ive in fact reached the opposite conclusion). Fast forward about a year: Im training RNNs all the time and Ive witnessed their power and robustness many times, and yet their magical outputs still find ways of amusing me.",
      +    "decoding_strategy": "generate",
      +    "bos_token_id": 0,
      +    "decoder_start_token_id": 2,
      +    "early_stopping": true,
      +    "eos_token_id": 2,
      +    "forced_bos_token_id": 0,
      +    "forced_eos_token_id": 2,
      +    "length_penalty": 2.0,
      +    "max_length": 142,
      +    "min_length": 56,
      +    "no_repeat_ngram_size": 3,
      +    "num_beams": 4,
      +    "pad_token_id": 1,
      +    "do_sample": false
      +}
      +
      +response = requests.post("http://localhost:3000/api/v1/summarize",
      +                         json=data,
      +                         auth=('user', 'password'))
      +print(response.json())
       

      Advanced Summarization Features

      For use cases requiring specific summarization strategies or adjustments (e.g., length penalty, no repeat ngram size), additional parameters can be included in your request to customize the summarization output.

      @@ -3557,6 +3886,207 @@

      Different Summarization Models

      Adjust summarization parameters such as max_length, min_length, and num_beams to fine-tune the output based on the specific requirements of your application.

      +

      Fun

      +

      Book summarization

      +

      Models with very large context sizes trained on the booksum dataset. +For example pszemraj/led-base-book-summary, pszemraj/bigbird-pegasus-large-K-booksum or the following large model:

      +
      version: "1"
      +
      +bolts:
      +    my_bolt:
      +        name: SummarizationAPI
      +        state:
      +            type: none
      +        input:
      +            type: batch
      +            args:
      +                input_folder: ./input
      +        output:
      +            type: batch
      +            args:
      +                output_folder: ./output
      +        method: listen
      +        args:
      +            model_name: pszemraj/led-large-book-summary
      +            model_class: AutoModelForSeq2SeqLM
      +            tokenizer_class: AutoTokenizer
      +            use_cuda: true
      +            precision: float
      +            device_map: cuda:0
      +            endpoint: "0.0.0.0"
      +            port: 3000
      +            cors_domain: http://localhost:3000
      +            username: user
      +            password: password
      +
      +
      /usr/bin/curl -X POST localhost:3000/api/v1/summarize \
      +    -H "Content-Type: application/json" \
      +    -u "user:password" \
      +    -d '{
      +        "text": " the big variety of data coming from diverse sources is one of the key properties of the big data phenomenon. It is, therefore, beneficial to understand how data is generated in various environments and scenarios, before looking at what should be done with this data and how to design the best possible architecture to accomplish this The evolution of IT architectures, described in Chapter 2, means that the data is no longer processed by a few big monolith systems, but rather by a group of services In parallel to the processing layer, the underlying data storage has also changed and became more distributed This, in turn, required a significant paradigm shift as the traditional approach to transactions (ACID) could no longer be supported. On top of this, cloud computing is becoming a major approach with the benefits of reducing costs and providing on-demand scalability but at the same time introducing concerns about privacy, data ownership, etc In the meantime the Internet continues its exponential growth: Every day both structured and unstructured data is published and available for processing: To achieve competitive advantage companies have to relate their corporate resources to external services, e.g. financial markets, weather forecasts, social media, etc While several of the sites provide some sort of API to access the data in a more orderly fashion; countless sources require advanced web mining and Natural Language Processing (NLP) processing techniques: Advances in science push researchers to construct new instruments for observing the universe O conducting experiments to understand even better the laws of physics and other domains. Every year humans have at their disposal new telescopes, space probes, particle accelerators, etc These instruments generate huge streams of data, which need to be stored and analyzed. The constant drive for efficiency in the industry motivates the introduction of new automation techniques and process optimization: This could not be done without analyzing the precise data that describe these processes. As more and more human tasks are automated, machines provide rich data sets, which can be analyzed in real-time to drive efficiency to new levels. Finally, it is now evident that the growth of the Internet of Things is becoming a major source of data. More and more of the devices are equipped with significant computational power and can generate a continuous data stream from their sensors. In the subsequent sections of this chapter, we will look at the domains described above to see what they generate in terms of data sets. We will compare the volumes but will also look at what is characteristic and important from their respective points of view. 3.1 The Internet is undoubtedly the largest database ever created by humans. While several well described; cleaned, and structured data sets have been made available through this medium, most of the resources are of an ambiguous, unstructured, incomplete or even erroneous nature. Still, several examples in the areas such as opinion mining, social media analysis, e-governance, etc, clearly show the potential lying in these resources. Those who can successfully mine and interpret the Internet data can gain unique insight and competitive advantage in their business An important area of data analytics on the edge of corporate IT and the Internet is Web Analytics.",
      +        "decoding_strategy": "generate",
      +        "bos_token_id": 0,
      +        "decoder_start_token_id": 2,
      +        "early_stopping": true,
      +        "eos_token_id": 2,
      +        "forced_bos_token_id": 0,
      +        "forced_eos_token_id": 2,
      +        "length_penalty": 2.0,
      +        "max_length": 142,
      +        "min_length": 56,
      +        "no_repeat_ngram_size": 3,
      +        "num_beams": 4,
      +        "pad_token_id": 1,
      +        "do_sample": false
      +    }' | jq
      +
      +

      Python Code Explainer

      +

      Summarization is a text-to-text task and can be used to transform the input text into another form, in this case this model transforms python code into simple english explanations:

      +
      version: "1"
      +
      +bolts:
      +    my_bolt:
      +        name: SummarizationAPI
      +        state:
      +            type: none
      +        input:
      +            type: batch
      +            args:
      +                input_folder: ./input
      +        output:
      +            type: batch
      +            args:
      +                output_folder: ./output
      +        method: listen
      +        args:
      +            model_name: sagard21/python-code-explainer
      +            model_class: AutoModelForSeq2SeqLM
      +            tokenizer_class: AutoTokenizer
      +            use_cuda: true
      +            precision: float
      +            device_map: cuda:0
      +            endpoint: "0.0.0.0"
      +            port: 3000
      +            cors_domain: http://localhost:3000
      +            username: user
      +            password: password
      +
      +
      /usr/bin/curl -X POST localhost:3000/api/v1/summarize \
      +    -H "Content-Type: application/json" \
      +    -u "user:password" \
      +    -d '{
      +        "text": "    def create_parser(self, parser):\n        """\n        Create and return the command-line parser for managing spouts and bolts.\n        """\n        # fmt: off\n        subparsers = parser.add_subparsers(dest="deploy")\n        up_parser = subparsers.add_parser("up", help="Deploy according to the genius.yml file.", formatter_class=RichHelpFormatter)\n        up_parser.add_argument("--spout", type=str, help="Name of the specific spout to run.")\n        up_parser.add_argument("--bolt", type=str, help="Name of the specific bolt to run.")\n        up_parser.add_argument("--file", default="genius.yml", type=str, help="Path of the genius.yml file, default to .")\n\n        parser.add_argument("--spout", type=str, help="Name of the specific spout to run.")\n        parser.add_argument("--bolt", type=str, help="Name of the specific bolt to run.")\n        parser.add_argument("--file", default="genius.yml", type=str, help="Path of the genius.yml file, default to .")\n        # fmt: on\n\n        return parser",
      +        "decoding_strategy": "generate",
      +        "bos_token_id": 0,
      +        "decoder_start_token_id": 2,
      +        "early_stopping": true,
      +        "eos_token_id": 2,
      +        "forced_bos_token_id": 0,
      +        "forced_eos_token_id": 2,
      +        "length_penalty": 2.0,
      +        "max_length": 142,
      +        "min_length": 56,
      +        "no_repeat_ngram_size": 3,
      +        "num_beams": 4,
      +        "pad_token_id": 1,
      +        "do_sample": false
      +    }' | jq
      +
      +

      Domain-wise or Content-wise Summarization

      +

      Models can be specialized in performing better at specialized fine-tuning tasks along various verticals - like domain knowledge or content.

      +

      Here are a few examples:

      +

      Medical text

      +
      version: "1"
      +
      +bolts:
      +    my_bolt:
      +        name: SummarizationAPI
      +        state:
      +            type: none
      +        input:
      +            type: batch
      +            args:
      +                input_folder: ./input
      +        output:
      +            type: batch
      +            args:
      +                output_folder: ./output
      +        method: listen
      +        args:
      +            model_name: Falconsai/medical_summarization
      +            model_class: AutoModelForSeq2SeqLM
      +            tokenizer_class: AutoTokenizer
      +            use_cuda: true
      +            precision: float
      +            device_map: cuda:0
      +            endpoint: "0.0.0.0"
      +            port: 3000
      +            cors_domain: http://localhost:3000
      +            username: user
      +            password: password
      +
      + +
      version: "1"
      +
      +bolts:
      +    my_bolt:
      +        name: SummarizationAPI
      +        state:
      +            type: none
      +        input:
      +            type: batch
      +            args:
      +                input_folder: ./input
      +        output:
      +            type: batch
      +            args:
      +                output_folder: ./output
      +        method: listen
      +        args:
      +            model_name: EasyTerms/legalSummerizerET
      +            model_class: AutoModelForSeq2SeqLM
      +            tokenizer_class: AutoTokenizer
      +            use_cuda: true
      +            precision: float
      +            device_map: cuda:0
      +            endpoint: "0.0.0.0"
      +            port: 3000
      +            cors_domain: http://localhost:3000
      +            username: user
      +            password: password
      +
      +

      Conversational text

      +
      version: "1"
      +
      +bolts:
      +    my_bolt:
      +        name: SummarizationAPI
      +        state:
      +            type: none
      +        input:
      +            type: batch
      +            args:
      +                input_folder: ./input
      +        output:
      +            type: batch
      +            args:
      +                output_folder: ./output
      +        method: listen
      +        args:
      +            model_name: kabita-choudhary/finetuned-bart-for-conversation-summary
      +            model_class: AutoModelForSeq2SeqLM
      +            tokenizer_class: AutoTokenizer
      +            use_cuda: true
      +            precision: float
      +            device_map: cuda:0
      +            endpoint: "0.0.0.0"
      +            port: 3000
      +            cors_domain: http://localhost:3000
      +            username: user
      +            password: password
      +
      +

      Play around

      +

      At 1551 open source models on the hub, there is enough to learn and play.

      diff --git a/blog/huggingface/table_qa/index.html b/blog/huggingface/table_qa/index.html index 324e87d..08452af 100644 --- a/blog/huggingface/table_qa/index.html +++ b/blog/huggingface/table_qa/index.html @@ -637,6 +637,48 @@ + + + + + +
    • + + Summarization + +
    • + + + + + + + + + +
    • + + Chat + +
    • + + + + + + + + + +
    • + + Language Models + +
    • + + + +
    diff --git a/blog/huggingface/trans/index.html b/blog/huggingface/trans/index.html index bfaa885..62bd7a4 100644 --- a/blog/huggingface/trans/index.html +++ b/blog/huggingface/trans/index.html @@ -16,7 +16,7 @@ - + @@ -617,6 +617,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/blog/huggingface/txtclass/index.html b/blog/huggingface/txtclass/index.html index 5039e17..b1fe8c0 100644 --- a/blog/huggingface/txtclass/index.html +++ b/blog/huggingface/txtclass/index.html @@ -658,6 +658,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/blog/huggingface/vqa/index.html b/blog/huggingface/vqa/index.html index ba25d19..cd6ac68 100644 --- a/blog/huggingface/vqa/index.html +++ b/blog/huggingface/vqa/index.html @@ -493,6 +493,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/bolts/openai/base/index.html b/bolts/openai/base/index.html index b870d08..445087c 100644 --- a/bolts/openai/base/index.html +++ b/bolts/openai/base/index.html @@ -493,6 +493,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/bolts/openai/classification/index.html b/bolts/openai/classification/index.html index b70a7b1..f210c27 100644 --- a/bolts/openai/classification/index.html +++ b/bolts/openai/classification/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/bolts/openai/commonsense_reasoning/index.html b/bolts/openai/commonsense_reasoning/index.html index 9f05640..deb5edc 100644 --- a/bolts/openai/commonsense_reasoning/index.html +++ b/bolts/openai/commonsense_reasoning/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/bolts/openai/instruction_tuning/index.html b/bolts/openai/instruction_tuning/index.html index 176b0f8..4fd50b7 100644 --- a/bolts/openai/instruction_tuning/index.html +++ b/bolts/openai/instruction_tuning/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/bolts/openai/language_model/index.html b/bolts/openai/language_model/index.html index c7d00b6..39fc203 100644 --- a/bolts/openai/language_model/index.html +++ b/bolts/openai/language_model/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/bolts/openai/ner/index.html b/bolts/openai/ner/index.html index 4908429..8a387ef 100644 --- a/bolts/openai/ner/index.html +++ b/bolts/openai/ner/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/bolts/openai/question_answering/index.html b/bolts/openai/question_answering/index.html index 494944c..9111f84 100644 --- a/bolts/openai/question_answering/index.html +++ b/bolts/openai/question_answering/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/bolts/openai/sentiment_analysis/index.html b/bolts/openai/sentiment_analysis/index.html index ab2546c..d5072b1 100644 --- a/bolts/openai/sentiment_analysis/index.html +++ b/bolts/openai/sentiment_analysis/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/bolts/openai/summarization/index.html b/bolts/openai/summarization/index.html index 00c0747..9d72ce5 100644 --- a/bolts/openai/summarization/index.html +++ b/bolts/openai/summarization/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/bolts/openai/translation/index.html b/bolts/openai/translation/index.html index d3ddff0..aeae20a 100644 --- a/bolts/openai/translation/index.html +++ b/bolts/openai/translation/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/core/airflow/index.html b/core/airflow/index.html index 21e40d7..2bfe007 100644 --- a/core/airflow/index.html +++ b/core/airflow/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/core/cli_boltctl/index.html b/core/cli_boltctl/index.html index cff73e4..a11692f 100644 --- a/core/cli_boltctl/index.html +++ b/core/cli_boltctl/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/core/cli_discover/index.html b/core/cli_discover/index.html index 9f60568..036e42d 100644 --- a/core/cli_discover/index.html +++ b/core/cli_discover/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/core/cli_dockerctl/index.html b/core/cli_dockerctl/index.html index 613c463..b4157db 100644 --- a/core/cli_dockerctl/index.html +++ b/core/cli_dockerctl/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/core/cli_geniusctl/index.html b/core/cli_geniusctl/index.html index 7be545f..8511552 100644 --- a/core/cli_geniusctl/index.html +++ b/core/cli_geniusctl/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/core/cli_schema/index.html b/core/cli_schema/index.html index 4bc97d1..c848fbe 100644 --- a/core/cli_schema/index.html +++ b/core/cli_schema/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/core/cli_spoutctl/index.html b/core/cli_spoutctl/index.html index fa86933..7d957c2 100644 --- a/core/cli_spoutctl/index.html +++ b/core/cli_spoutctl/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/core/cli_yamlctl/index.html b/core/cli_yamlctl/index.html index 755f36b..b744346 100644 --- a/core/cli_yamlctl/index.html +++ b/core/cli_yamlctl/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/core/core_bolt/index.html b/core/core_bolt/index.html index 8fdbdd5..d8ed962 100644 --- a/core/core_bolt/index.html +++ b/core/core_bolt/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/core/core_data_batch_input/index.html b/core/core_data_batch_input/index.html index 727cbe4..b503584 100644 --- a/core/core_data_batch_input/index.html +++ b/core/core_data_batch_input/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/core/core_data_batch_output/index.html b/core/core_data_batch_output/index.html index 5b0da26..4babe57 100644 --- a/core/core_data_batch_output/index.html +++ b/core/core_data_batch_output/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/core/core_data_input/index.html b/core/core_data_input/index.html index 53850d5..bdc6ec0 100644 --- a/core/core_data_input/index.html +++ b/core/core_data_input/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/core/core_data_output/index.html b/core/core_data_output/index.html index e84f1f6..e278e85 100644 --- a/core/core_data_output/index.html +++ b/core/core_data_output/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/core/core_data_streaming_input/index.html b/core/core_data_streaming_input/index.html index 662a612..573b24e 100644 --- a/core/core_data_streaming_input/index.html +++ b/core/core_data_streaming_input/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/core/core_data_streaming_output/index.html b/core/core_data_streaming_output/index.html index b70e1af..aa8f70c 100644 --- a/core/core_data_streaming_output/index.html +++ b/core/core_data_streaming_output/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/core/core_spout/index.html b/core/core_spout/index.html index 3f25636..c22d0ca 100644 --- a/core/core_spout/index.html +++ b/core/core_spout/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/core/core_state_base/index.html b/core/core_state_base/index.html index d3c3156..9a89039 100644 --- a/core/core_state_base/index.html +++ b/core/core_state_base/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/core/core_state_dynamo/index.html b/core/core_state_dynamo/index.html index 2da203b..da09309 100644 --- a/core/core_state_dynamo/index.html +++ b/core/core_state_dynamo/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/core/core_state_memory/index.html b/core/core_state_memory/index.html index 37536dc..5d8291e 100644 --- a/core/core_state_memory/index.html +++ b/core/core_state_memory/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/core/core_state_postgres/index.html b/core/core_state_postgres/index.html index 17601ba..b0f1951 100644 --- a/core/core_state_postgres/index.html +++ b/core/core_state_postgres/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/core/core_state_redis/index.html b/core/core_state_redis/index.html index d8ceefd..8b66384 100644 --- a/core/core_state_redis/index.html +++ b/core/core_state_redis/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/core/core_task_base/index.html b/core/core_task_base/index.html index 850c5c8..6cacbe8 100644 --- a/core/core_task_base/index.html +++ b/core/core_task_base/index.html @@ -495,6 +495,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/core/docker/index.html b/core/docker/index.html index 171c0c9..4959937 100644 --- a/core/docker/index.html +++ b/core/docker/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/core/docker_swarm/index.html b/core/docker_swarm/index.html index 3a726cc..8918360 100644 --- a/core/docker_swarm/index.html +++ b/core/docker_swarm/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/core/k8s_base/index.html b/core/k8s_base/index.html index 83e6cd8..ba1d40b 100644 --- a/core/k8s_base/index.html +++ b/core/k8s_base/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/core/k8s_cron_job/index.html b/core/k8s_cron_job/index.html index 472badc..34a5a3d 100644 --- a/core/k8s_cron_job/index.html +++ b/core/k8s_cron_job/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/core/k8s_deployment/index.html b/core/k8s_deployment/index.html index 7de1927..7aa7327 100644 --- a/core/k8s_deployment/index.html +++ b/core/k8s_deployment/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/core/k8s_job/index.html b/core/k8s_job/index.html index fe1ae96..f2220dd 100644 --- a/core/k8s_job/index.html +++ b/core/k8s_job/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/core/k8s_service/index.html b/core/k8s_service/index.html index d4058f3..052571e 100644 --- a/core/k8s_service/index.html +++ b/core/k8s_service/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/databases/arangodb/index.html b/databases/arangodb/index.html index 51ac6b9..a63d4f8 100644 --- a/databases/arangodb/index.html +++ b/databases/arangodb/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/databases/athena/index.html b/databases/athena/index.html index 527ba94..c1070e5 100644 --- a/databases/athena/index.html +++ b/databases/athena/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/databases/azure_table/index.html b/databases/azure_table/index.html index 591d232..821a5a9 100644 --- a/databases/azure_table/index.html +++ b/databases/azure_table/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/databases/bigquery/index.html b/databases/bigquery/index.html index 6df26ba..ce0c2a3 100644 --- a/databases/bigquery/index.html +++ b/databases/bigquery/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/databases/bigtable/index.html b/databases/bigtable/index.html index f6eec24..71112c5 100644 --- a/databases/bigtable/index.html +++ b/databases/bigtable/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/databases/cassandra/index.html b/databases/cassandra/index.html index 0eb5dda..e5545f1 100644 --- a/databases/cassandra/index.html +++ b/databases/cassandra/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/databases/cloud_sql/index.html b/databases/cloud_sql/index.html index cd1a4f4..0f57ce7 100644 --- a/databases/cloud_sql/index.html +++ b/databases/cloud_sql/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/databases/cockroach/index.html b/databases/cockroach/index.html index 44123ed..921ab5d 100644 --- a/databases/cockroach/index.html +++ b/databases/cockroach/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/databases/cosmosdb/index.html b/databases/cosmosdb/index.html index 5ccb36a..81df3a4 100644 --- a/databases/cosmosdb/index.html +++ b/databases/cosmosdb/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/databases/couchbase/index.html b/databases/couchbase/index.html index 8c4d85a..de89a0b 100644 --- a/databases/couchbase/index.html +++ b/databases/couchbase/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/databases/db2/index.html b/databases/db2/index.html index 8e0d8da..2620d0c 100644 --- a/databases/db2/index.html +++ b/databases/db2/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/databases/documentdb/index.html b/databases/documentdb/index.html index 5befd81..8210a98 100644 --- a/databases/documentdb/index.html +++ b/databases/documentdb/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/databases/dynamodb/index.html b/databases/dynamodb/index.html index a42ed61..80e0f6b 100644 --- a/databases/dynamodb/index.html +++ b/databases/dynamodb/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/databases/elasticsearch/index.html b/databases/elasticsearch/index.html index a985894..6f53522 100644 --- a/databases/elasticsearch/index.html +++ b/databases/elasticsearch/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/databases/firestore/index.html b/databases/firestore/index.html index 6a74edd..893db28 100644 --- a/databases/firestore/index.html +++ b/databases/firestore/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/databases/graphite/index.html b/databases/graphite/index.html index 873c7e9..58f73fb 100644 --- a/databases/graphite/index.html +++ b/databases/graphite/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/databases/hbase/index.html b/databases/hbase/index.html index 606d80e..04edba3 100644 --- a/databases/hbase/index.html +++ b/databases/hbase/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/databases/influxdb/index.html b/databases/influxdb/index.html index 47b1e5e..fea364c 100644 --- a/databases/influxdb/index.html +++ b/databases/influxdb/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/databases/kairosdb/index.html b/databases/kairosdb/index.html index 5dad20b..d7ac462 100644 --- a/databases/kairosdb/index.html +++ b/databases/kairosdb/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/databases/keyspaces/index.html b/databases/keyspaces/index.html index b218fb0..f0fb921 100644 --- a/databases/keyspaces/index.html +++ b/databases/keyspaces/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/databases/ldap/index.html b/databases/ldap/index.html index 70d2bfa..7d38101 100644 --- a/databases/ldap/index.html +++ b/databases/ldap/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/databases/memsql/index.html b/databases/memsql/index.html index a033ed4..6eafb21 100644 --- a/databases/memsql/index.html +++ b/databases/memsql/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/databases/mongodb/index.html b/databases/mongodb/index.html index 2d8e8e2..0410f23 100644 --- a/databases/mongodb/index.html +++ b/databases/mongodb/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/databases/mysql/index.html b/databases/mysql/index.html index 5ee3572..a1fe677 100644 --- a/databases/mysql/index.html +++ b/databases/mysql/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/databases/neo4j/index.html b/databases/neo4j/index.html index b1cc5ba..68654cf 100644 --- a/databases/neo4j/index.html +++ b/databases/neo4j/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/databases/nuodb/index.html b/databases/nuodb/index.html index bb8cf3f..e98bcfc 100644 --- a/databases/nuodb/index.html +++ b/databases/nuodb/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/databases/opentsdb/index.html b/databases/opentsdb/index.html index 1c00be0..301aa4f 100644 --- a/databases/opentsdb/index.html +++ b/databases/opentsdb/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/databases/oracle/index.html b/databases/oracle/index.html index a843151..6596ffa 100644 --- a/databases/oracle/index.html +++ b/databases/oracle/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/databases/postgres/index.html b/databases/postgres/index.html index ea0c648..43df246 100644 --- a/databases/postgres/index.html +++ b/databases/postgres/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/databases/presto/index.html b/databases/presto/index.html index b847981..ddc61e9 100644 --- a/databases/presto/index.html +++ b/databases/presto/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/databases/redis/index.html b/databases/redis/index.html index 5ef4338..2c21cf3 100644 --- a/databases/redis/index.html +++ b/databases/redis/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/databases/riak/index.html b/databases/riak/index.html index 7dc967d..d2dd8bb 100644 --- a/databases/riak/index.html +++ b/databases/riak/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/databases/spanner/index.html b/databases/spanner/index.html index 424e55d..15204ef 100644 --- a/databases/spanner/index.html +++ b/databases/spanner/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/databases/sql_server/index.html b/databases/sql_server/index.html index e3c41c0..5d6111d 100644 --- a/databases/sql_server/index.html +++ b/databases/sql_server/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/databases/sqlite/index.html b/databases/sqlite/index.html index 5031278..b9cb131 100644 --- a/databases/sqlite/index.html +++ b/databases/sqlite/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/databases/sybase/index.html b/databases/sybase/index.html index 08d1b39..59b66e4 100644 --- a/databases/sybase/index.html +++ b/databases/sybase/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/databases/teradata/index.html b/databases/teradata/index.html index ef63e6b..8a4eb32 100644 --- a/databases/teradata/index.html +++ b/databases/teradata/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/databases/tidb/index.html b/databases/tidb/index.html index f0d4793..5f7702c 100644 --- a/databases/tidb/index.html +++ b/databases/tidb/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/databases/timescaledb/index.html b/databases/timescaledb/index.html index 81468de..4d9222b 100644 --- a/databases/timescaledb/index.html +++ b/databases/timescaledb/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/databases/vertica/index.html b/databases/vertica/index.html index c95e91c..77ad799 100644 --- a/databases/vertica/index.html +++ b/databases/vertica/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/databases/voltdb/index.html b/databases/voltdb/index.html index 94a98f4..f20f2b6 100644 --- a/databases/voltdb/index.html +++ b/databases/voltdb/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/guides/404/index.html b/guides/404/index.html index 7b151ad..6dbb611 100644 --- a/guides/404/index.html +++ b/guides/404/index.html @@ -493,6 +493,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/guides/architecture/index.html b/guides/architecture/index.html index 48afa47..d348711 100644 --- a/guides/architecture/index.html +++ b/guides/architecture/index.html @@ -587,6 +587,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/guides/cli/index.html b/guides/cli/index.html index a8baecf..9662d00 100644 --- a/guides/cli/index.html +++ b/guides/cli/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/guides/concepts/index.html b/guides/concepts/index.html index 1f75b27..979f81c 100644 --- a/guides/concepts/index.html +++ b/guides/concepts/index.html @@ -545,6 +545,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/guides/deployment/index.html b/guides/deployment/index.html index a386b5b..604aaf8 100644 --- a/guides/deployment/index.html +++ b/guides/deployment/index.html @@ -862,6 +862,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/guides/dev_cycle/index.html b/guides/dev_cycle/index.html index da9322d..437da4d 100644 --- a/guides/dev_cycle/index.html +++ b/guides/dev_cycle/index.html @@ -637,6 +637,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/guides/installation/index.html b/guides/installation/index.html index 1ddd723..5158ec9 100644 --- a/guides/installation/index.html +++ b/guides/installation/index.html @@ -633,6 +633,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/guides/kubernetes/index.html b/guides/kubernetes/index.html index 7d92ef6..a93c0c2 100644 --- a/guides/kubernetes/index.html +++ b/guides/kubernetes/index.html @@ -13,7 +13,7 @@ - + @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/guides/local/index.html b/guides/local/index.html index dba70c5..a1f46b7 100644 --- a/guides/local/index.html +++ b/guides/local/index.html @@ -577,6 +577,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/guides/packaging/index.html b/guides/packaging/index.html index 01e787d..9cf88b9 100644 --- a/guides/packaging/index.html +++ b/guides/packaging/index.html @@ -700,6 +700,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/guides/pin/index.html b/guides/pin/index.html index 38e8971..4e27c72 100644 --- a/guides/pin/index.html +++ b/guides/pin/index.html @@ -493,6 +493,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/guides/usage/index.html b/guides/usage/index.html index 7405f0a..416d746 100644 --- a/guides/usage/index.html +++ b/guides/usage/index.html @@ -607,6 +607,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/guides/yaml/index.html b/guides/yaml/index.html index cf2dcaf..eb08930 100644 --- a/guides/yaml/index.html +++ b/guides/yaml/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/index.html b/index.html index 032296f..0ae7599 100644 --- a/index.html +++ b/index.html @@ -659,6 +659,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + @@ -3707,6 +3749,18 @@

    Geniusrise Microservices Ecosystem +

    Quickstarts:

    +
    +

    (More coming soon).

    Guides


    🚀 Getting Started

    diff --git a/listeners/activemq/index.html b/listeners/activemq/index.html index 1d0d85e..08e7a16 100644 --- a/listeners/activemq/index.html +++ b/listeners/activemq/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/listeners/amqp/index.html b/listeners/amqp/index.html index 23d8837..a0116fc 100644 --- a/listeners/amqp/index.html +++ b/listeners/amqp/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/listeners/grpc/index.html b/listeners/grpc/index.html index f853e80..82f18c7 100644 --- a/listeners/grpc/index.html +++ b/listeners/grpc/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/listeners/http_polling/index.html b/listeners/http_polling/index.html index 69a22d0..637f855 100644 --- a/listeners/http_polling/index.html +++ b/listeners/http_polling/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/listeners/kafka/index.html b/listeners/kafka/index.html index acb362d..75d1f7b 100644 --- a/listeners/kafka/index.html +++ b/listeners/kafka/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/listeners/kinesis/index.html b/listeners/kinesis/index.html index fe09d20..53ef13e 100644 --- a/listeners/kinesis/index.html +++ b/listeners/kinesis/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/listeners/mqtt/index.html b/listeners/mqtt/index.html index 956841d..7553891 100644 --- a/listeners/mqtt/index.html +++ b/listeners/mqtt/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/listeners/quic/index.html b/listeners/quic/index.html index 5a77032..cc92628 100644 --- a/listeners/quic/index.html +++ b/listeners/quic/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/listeners/redis_pubsub/index.html b/listeners/redis_pubsub/index.html index bd1c96f..4c6854f 100644 --- a/listeners/redis_pubsub/index.html +++ b/listeners/redis_pubsub/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/listeners/redis_streams/index.html b/listeners/redis_streams/index.html index 5ca9d49..a3f8682 100644 --- a/listeners/redis_streams/index.html +++ b/listeners/redis_streams/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/listeners/sns/index.html b/listeners/sns/index.html index 4924b23..f62bf28 100644 --- a/listeners/sns/index.html +++ b/listeners/sns/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/listeners/socket.io/index.html b/listeners/socket.io/index.html index d397d9f..9821faa 100644 --- a/listeners/socket.io/index.html +++ b/listeners/socket.io/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/listeners/sqs/index.html b/listeners/sqs/index.html index 6f3304c..6ef63a4 100644 --- a/listeners/sqs/index.html +++ b/listeners/sqs/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/listeners/udp/index.html b/listeners/udp/index.html index e14aec8..520ade7 100644 --- a/listeners/udp/index.html +++ b/listeners/udp/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/listeners/webhook/index.html b/listeners/webhook/index.html index 313a2d1..6a5d502 100644 --- a/listeners/webhook/index.html +++ b/listeners/webhook/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/listeners/websocket/index.html b/listeners/websocket/index.html index 9bce5ac..e0c3834 100644 --- a/listeners/websocket/index.html +++ b/listeners/websocket/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/listeners/zeromq/index.html b/listeners/zeromq/index.html index 021b547..b6dc085 100644 --- a/listeners/zeromq/index.html +++ b/listeners/zeromq/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/objects.inv b/objects.inv index b776782..4debfc9 100644 Binary files a/objects.inv and b/objects.inv differ diff --git a/ocr/ConvertImage/index.html b/ocr/ConvertImage/index.html index 36c0848..7cccc3f 100644 --- a/ocr/ConvertImage/index.html +++ b/ocr/ConvertImage/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/ocr/FineTunePix2Struct/index.html b/ocr/FineTunePix2Struct/index.html index b5f787c..1c145c7 100644 --- a/ocr/FineTunePix2Struct/index.html +++ b/ocr/FineTunePix2Struct/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/ocr/FineTuneTROCR/index.html b/ocr/FineTuneTROCR/index.html index a0cb9c8..0138081 100644 --- a/ocr/FineTuneTROCR/index.html +++ b/ocr/FineTuneTROCR/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/ocr/ImageClassPredictor/index.html b/ocr/ImageClassPredictor/index.html index 748459e..47a2066 100644 --- a/ocr/ImageClassPredictor/index.html +++ b/ocr/ImageClassPredictor/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/ocr/ParseCBZCBR/index.html b/ocr/ParseCBZCBR/index.html index 1b2ebb4..e970692 100644 --- a/ocr/ParseCBZCBR/index.html +++ b/ocr/ParseCBZCBR/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/ocr/ParseDjvu/index.html b/ocr/ParseDjvu/index.html index cd5f174..2fe4cc2 100644 --- a/ocr/ParseDjvu/index.html +++ b/ocr/ParseDjvu/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/ocr/ParseEpub/index.html b/ocr/ParseEpub/index.html index 525731e..6bcaec2 100644 --- a/ocr/ParseEpub/index.html +++ b/ocr/ParseEpub/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/ocr/ParseMOBI/index.html b/ocr/ParseMOBI/index.html index f8b98cf..cb2fc78 100644 --- a/ocr/ParseMOBI/index.html +++ b/ocr/ParseMOBI/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/ocr/ParsePdf/index.html b/ocr/ParsePdf/index.html index f4bdd7c..7f1bcd6 100644 --- a/ocr/ParsePdf/index.html +++ b/ocr/ParsePdf/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/ocr/ParsePostScript/index.html b/ocr/ParsePostScript/index.html index 895a29a..2862a80 100644 --- a/ocr/ParsePostScript/index.html +++ b/ocr/ParsePostScript/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/ocr/ParseXPS/index.html b/ocr/ParseXPS/index.html index 3422306..189fc84 100644 --- a/ocr/ParseXPS/index.html +++ b/ocr/ParseXPS/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/ocr/Pix2StructImageOCR/index.html b/ocr/Pix2StructImageOCR/index.html index d01c682..3063c16 100644 --- a/ocr/Pix2StructImageOCR/index.html +++ b/ocr/Pix2StructImageOCR/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/ocr/Pix2StructImageOCRAPI/index.html b/ocr/Pix2StructImageOCRAPI/index.html index 6a777b4..12b1264 100644 --- a/ocr/Pix2StructImageOCRAPI/index.html +++ b/ocr/Pix2StructImageOCRAPI/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/ocr/TROCRImageOCR/index.html b/ocr/TROCRImageOCR/index.html index 5c3dfb0..c82ccdc 100644 --- a/ocr/TROCRImageOCR/index.html +++ b/ocr/TROCRImageOCR/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/ocr/TROCRImageOCRAPI/index.html b/ocr/TROCRImageOCRAPI/index.html index c865647..d098bd3 100644 --- a/ocr/TROCRImageOCRAPI/index.html +++ b/ocr/TROCRImageOCRAPI/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/ocr/TrainImageClassifier/index.html b/ocr/TrainImageClassifier/index.html index 225bbe0..a4ef2bc 100644 --- a/ocr/TrainImageClassifier/index.html +++ b/ocr/TrainImageClassifier/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/print_page/index.html b/print_page/index.html index 91ba447..b651f23 100644 --- a/print_page/index.html +++ b/print_page/index.html @@ -499,6 +499,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + @@ -3517,6 +3559,18 @@

    Geniusrise Microservices Ecosy +

    Quickstarts:

    + +

    (More coming soon).

    Guides


    🚀 Getting Started

    @@ -6948,7 +7002,1282 @@

    FunPlay around

    -

    There is not much to really do in translation except mess around with different languagues 🤷‍♂️ Not many models either, facebook is the undisputed leader in translation models.

    Ended: Blogs

    Ended: Development

    +

    There is not much to really do in translation except mess around with different languagues 🤷‍♂️ Not many models either, facebook is the undisputed leader in translation models.

    Ended: Blogs

    Ended: Development

    Deployment

    @@ -22369,7 +23698,7 @@

    -listen(model_name, model_class='AutoModel', processor_class='AutoProcessor', use_cuda=False, precision='float16', quantization=0, device_map='auto', max_memory={0: '24GB'}, torchscript=False, compile=False, concurrent_queries=False, use_whisper_cpp=False, endpoint='*', port=3000, cors_domain='http://localhost:3000', username=None, password=None, **model_args) +listen(model_name, model_class='AutoModel', processor_class='AutoProcessor', use_cuda=False, precision='float16', quantization=0, device_map='auto', max_memory={0: '24GB'}, torchscript=False, compile=False, concurrent_queries=False, use_whisper_cpp=False, use_faster_whisper=False, endpoint='*', port=3000, cors_domain='http://localhost:3000', username=None, password=None, **model_args)

    @@ -22559,6 +23888,20 @@

    bool + + +
    +

    Whether to use faster-whisper.

    +
    + + + False + + endpoint @@ -23395,6 +24738,121 @@

    +process_faster_whisper(audio_input, model_sampling_rate, chunk_size, generate_args) + +

    + + +
    + +

    Processes audio input with the faster-whisper model.

    + + + +

    Parameters:

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    NameTypeDescriptionDefault
    audio_input + bytes + +
    +

    The audio input for transcription.

    +
    +
    + required +
    model_sampling_rate + int + +
    +

    The sampling rate of the model.

    +
    +
    + required +
    chunk_size + int + +
    +

    The size of audio chunks to process.

    +
    +
    + required +
    generate_args + Dict[str, Any] + +
    +

    Additional arguments for transcription.

    +
    +
    + required +
    + + + +

    Returns:

    + + + + + + + + + + + + + +
    TypeDescription
    + Dict[str, Any] + +
    +

    Dict[str, Any]: A dictionary containing the transcription results.

    +
    +
    + +
    + +
    + + +
    + + +

    process_seamless(audio_input, model_sampling_rate, processor_args, chunk_size, overlap_size, generate_args) @@ -32864,7 +34322,7 @@

    -load_models(model_name, processor_name, model_revision=None, processor_revision=None, model_class='', processor_class='AutoFeatureExtractor', use_cuda=False, precision='float16', quantization=0, device_map='auto', max_memory={0: '24GB'}, torchscript=False, compile=False, flash_attention=False, better_transformers=False, **model_args) +load_models(model_name, processor_name, model_revision=None, processor_revision=None, model_class='', processor_class='AutoFeatureExtractor', use_cuda=False, precision='float16', quantization=0, device_map='auto', max_memory={0: '24GB'}, torchscript=False, compile=False, flash_attention=False, better_transformers=False, use_whisper_cpp=False, use_faster_whisper=False, **model_args)

    @@ -33096,6 +34554,34 @@

    False + + use_whisper_cpp + + bool + + +
    +

    Whether to use whisper.cpp to load the model. Defaults to False. Note: only works for these models: https://github.com/aarnphm/whispercpp/blob/524dd6f34e9d18137085fb92a42f1c31c9c6bc29/src/whispercpp/utils.py#L32

    +
    + + + False + + + + use_faster_whisper + + bool + + +
    +

    Whether to use faster-whisper.

    +
    + + + False + + **model_args @@ -33786,6 +35272,121 @@

    +process_faster_whisper(audio_input, model_sampling_rate, chunk_size, generate_args) + +

    + + +
    + +

    Processes audio input with the faster-whisper model.

    + + + +

    Parameters:

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    NameTypeDescriptionDefault
    audio_input + bytes + +
    +

    The audio input for transcription.

    +
    +
    + required +
    model_sampling_rate + int + +
    +

    The sampling rate of the model.

    +
    +
    + required +
    chunk_size + int + +
    +

    The size of audio chunks to process.

    +
    +
    + required +
    generate_args + Dict[str, Any] + +
    +

    Additional arguments for transcription.

    +
    +
    + required +
    + + + +

    Returns:

    + + + + + + + + + + + + + +
    TypeDescription
    + Dict[str, Any] + +
    +

    Dict[str, Any]: A dictionary containing the transcription results.

    +
    +
    + +
    + +
    + + +
    + + +

    process_seamless(audio_input, model_sampling_rate, processor_args, chunk_size, overlap_size, generate_args) diff --git a/search/search_index.json b/search/search_index.json index cffc037..ba92830 100644 --- a/search/search_index.json +++ b/search/search_index.json @@ -1 +1 @@ -{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Home","text":""},{"location":"#geniusrise-microservices-ecosystem","title":"Geniusrise Microservices Ecosystem","text":"

    Geniusrise is a modular, loosely-coupled AI-microservices framework.

    It can be used to perform various tasks, including hosting inference endpoints, performing bulk inference, fine tune etc with open source models or closed source APIs.

    • The framework provides structure for modules and operationalizes and orchestrates them.
    • The modular ecosystem provides a layer of abstraction over the myriad of models, libraries, tools, parameters and optimizations underlying the operationalization of modern AI models.

    Together the framework and ecosystem can be used for:

    1. Rapid prototyping by hosting APIs on a wide range of models
      1. Host and experiment on local and iterate fast
      2. Deploy on kubernetes to production
    2. Building AI-side components using the framework and CLI
      1. Build complex AI microservices using multiple models
      2. Iterate fast from development to production
      3. Manage, scale and monitor deployments in production
      4. Build once, run anywhere
    3. Using the ecosystem as a library: Many interesting applications can be built using this, e.g.:
      1. A multi-cloud AI cloud, see geniusrise.com
      2. Local model pipeline server for personal or home IOT devices (e.g. a personal AI pin connected to voice-LLM pipeline hosted on desktop)
      3. Desktop and CLI applications
    "},{"location":"#guides","title":"Guides","text":""},{"location":"#getting-started","title":"\ud83d\ude80 Getting Started","text":"
    1. \ud83d\udca5 Usage - TLDR Usage.
    2. \ud83d\udee0\ufe0f Installation - Installation and setup.
    3. \ud83d\udcd8 Concepts - Concepts of the framework, start here.
    4. \ud83c\udfd7\ufe0f Architecture - Design and architecture of the framework.
    "},{"location":"#development","title":"\ud83d\udcbb Development","text":"
    1. \ud83c\udfe0 Local Experimentation - Local setup and project creation.
    2. \ud83d\udd04 Dev Cycle - Describes one full local development cycle.
    3. \ud83d\udce6 Packaging - Packaging your application.
    4. \ud83d\ude80 Deployment - Deploying parts or whole of your application.
    "},{"location":"#reference","title":"\ud83d\udcda Reference","text":"
    1. \ud83d\udcc4 YAML Structure - Geniusfile structure and configuration.
    2. \ud83c\udf10 CLI reference - Command line reference.
    3. \ud83c\udfa8 Project Templates - Project templates for community plugins.
    "},{"location":"#runners","title":"\ud83c\udfc3 Runners","text":"\ud83c\udf10 Runners \ud83d\udfe2 k8s deployment \ud83d\udfe4 k8s service \ud83d\udfe1 k8s job \ud83d\udfe0 k8s cron job \ud83d\udfe7 k8s pods \ud83d\udfe6 Apache Airflow \ud83d\udd34 Docker \ud83d\udfe3 Docker Swarm"},{"location":"#models","title":"\ud83e\udd16 Models","text":""},{"location":"#text-inference-apis","title":"Text Inference APIs","text":"\ud83c\udf10 Local & Huggingface \ud83d\udfe2 Language Model \ud83d\udfe3 Named Entity Recognition \ud83d\udfe1 Question Answering \ud83d\udfe0 Sentiment Analysis \ud83d\udfe4 Summarization \ud83d\udfe6 Translation \ud83d\udd35 Classification \ud83d\udd34 Natural Language Inference \ud83d\udfe7 Instruction Tuning \ud83d\udfe7 Base"},{"location":"#text-bulk-inference","title":"Text Bulk Inference","text":"\ud83c\udf10 Local & Huggingface \ud83d\udfe2 Language Model \ud83d\udfe3 Named Entity Recognition \ud83d\udfe1 Question Answering \ud83d\udfe0 Sentiment Analysis \ud83d\udfe4 Summarization \ud83d\udfe6 Translation \ud83d\udd35 Classification \ud83d\udd34 Natural Language Inference \ud83d\udfe7 Instruction Tuning \ud83d\udfe7 Base"},{"location":"#text-fine-tuning","title":"Text Fine-tuning","text":"\ud83c\udf10 Local & Huggingface \ud83d\udfe2 Language Model \ud83d\udfe3 Named Entity Recognition \ud83d\udfe1 Question Answering \ud83d\udfe0 Sentiment Analysis \ud83d\udfe4 Summarization \ud83d\udfe6 Translation \ud83d\udd35 Classification \ud83d\udd34 Natural Language Inference \ud83d\udfe7 Instruction Tuning \ud83d\udfe7 Base \ud83c\udf10 OpenAI \ud83d\udfe2 Classification \ud83d\udfe3 Natural Language Inference \ud83d\udfe1 Instruction Tuning \ud83d\udfe0 Language Model \ud83d\udfe4 Named Entity Recognition \ud83d\udfe6 Question Answering \ud83d\udd35 Sentiment Analysis \ud83d\udd34 Summarization \ud83d\udfe7 Translation \ud83d\udfe7 Base"},{"location":"#vision-inference-apis","title":"Vision Inference APIs","text":"\ud83c\udf10 Local & Huggingface \ud83d\udfe2 Image Classification \ud83d\udfe3 OCR \ud83d\udfe1 Image Segmentation \ud83d\udfe0 Visual Question Answering \ud83d\udfe4 Base"},{"location":"#audio-inference-apis","title":"Audio Inference APIs","text":"\ud83c\udf10 Local & Huggingface \ud83d\udfe2 Text to Speech \ud83d\udfe3 Speech to Text \ud83d\udfe7 Base"},{"location":"#audio-bulk-inference","title":"Audio Bulk Inference","text":"\ud83c\udf10 Local & Huggingface \ud83d\udfe2 Text to Speech \ud83d\udfe3 Speech to Text \ud83d\udfe7 Base"},{"location":"#data","title":"\u26a1 Data","text":""},{"location":"#ingestion","title":"Ingestion","text":"\ud83c\udf10 Streaming \ud83d\udfe2 Http Polling \ud83d\udfe3 Socket.io \ud83d\udfe1 gRPC \ud83d\udfe0 QUIC \ud83d\udfe4 UDP \ud83d\udd35 Webhook \ud83d\udfe5 Websocket \ud83d\udfe9 SNS \ud83d\udfe7 SQS \ud83d\udfe8 AMQP \ud83d\udfeb Kafka \ud83d\udfea Kinesis Streams \ud83d\udfe9 MQTT \ud83d\udfe8 ActiveMQ \ud83d\udfeb ZeroMQ \ud83d\udfea Redis Pubsub \ud83d\udfe7 Redis Streams \ud83d\udce6 Databases \ud83d\udfe2 HBase \ud83d\udfe3 PostgreSQL \ud83d\udd35 MySQL \ud83d\udfe0 MongoDB \ud83d\udfe2 Cassandra \ud83d\udfe3 Redis \ud83d\udd35 Elasticsearch \ud83d\udfe0 Oracle \ud83d\udfe2 SQL Server \ud83d\udfe3 SQLite \ud83d\udd35 Neo4j \ud83d\udfe0 Bigtable \ud83d\udfe2 DynamoDB \ud83d\udfe3 Azure Table Storage \ud83d\udd35 Couchbase \ud83d\udfe0 InfluxDB \ud83d\udfe2 TimescaleDB \ud83d\udfe3 Teradata \ud83d\udd35 TiDB \ud83d\udfe0 Voltdb \ud83d\udfe2 Sybase \ud83d\udfe3 DB2 \ud83d\udd35 AWS Presto \ud83d\udfe0 Riak \ud83d\udfe2 MemSQL \ud83d\udfe3 LDAP \ud83d\udd35 AWS KeySpaces \ud83d\udfe0 KairosDB \ud83d\udfe2 Graphite \ud83d\udfe3 Google FireStore \ud83d\udd35 AWS DocumentDB \ud83d\udfe0 Cockroach \ud83d\udfe2 Cloud SQL \ud83d\udfe3 Azure CosmosDB \ud83d\udd35 AWS Athena \ud83d\udfe0 ArangoDB \ud83d\udfe2 Nuodb \ud83d\udfe3 OpenTSDB \ud83d\udd35 Google Bigquery \ud83d\udfe0 Vertica \ud83d\udfe2 Google Spanner"},{"location":"#preprocessing","title":"Preprocessing","text":"\ud83c\udf10 Document Processing \ud83c\udf10 Image Processing \ud83c\udf10 OCR \ud83d\udfe3 Parse PDF \ud83d\udfe1 Predict image classes \ud83d\udd35 TROCRImageOCR \ud83d\udfe3 ParseCBZCBR \ud83d\udfe1 Train image classifier \ud83d\udd35 FineTuneTROCR \ud83d\udfe3 ParseDjvu \ud83d\udfe1 Convert Images \ud83d\udd35 TROCRImageOCRAPI \ud83d\udfe3 ParseEpub \ud83d\udfe2 Pix2StructImageOCR \ud83d\udfe3 ParseMOBI \ud83d\udfe2 Pix2StructImageOCRAPI \ud83d\udfe3 ParsePostScript \ud83d\udfe2 FineTunePix2Struct \ud83d\udfe3 ParseXPS"},{"location":"#library","title":"\ud83d\udcda Library","text":"\ud83d\udce6 cli \ud83d\udce6 core \ud83d\udce6 data \ud83d\udce6 core.state \ud83d\udfe0 geniusctl \ud83d\udfe2 bolt \ud83d\udfe3 input \ud83d\udd34 base \ud83d\udfe0 yamlctl \ud83d\udfe2 spout \ud83d\udfe3 output \ud83d\udd34 dynamo \ud83d\udfe0 boltctl \ud83d\udfe4 base \ud83d\udfe3 batch_input \ud83d\udd34 memory \ud83d\udfe0 spoutctl \ud83d\udfe3 batch_output \ud83d\udd34 postgres \ud83d\udfe0 schema \ud83d\udfe3 streaming_input \ud83d\udd34 redis \ud83d\udfe0 discover \ud83d\udfe3 streaming_output \ud83d\udfe0 docker"},{"location":"audio/api/base/","title":"Base API","text":"

    Bases: AudioBulk

    A class representing a Hugging Face API for generating text using a pre-trained language model.

    Attributes:

    Name Type Description model Any

    The pre-trained language model.

    processor Any

    The processor used to preprocess input text.

    model_name str

    The name of the pre-trained language model.

    model_revision Optional[str]

    The revision of the pre-trained language model.

    processor_name str

    The name of the processor used to preprocess input text.

    processor_revision Optional[str]

    The revision of the processor used to preprocess input text.

    model_class str

    The name of the class of the pre-trained language model.

    processor_class str

    The name of the class of the processor used to preprocess input text.

    use_cuda bool

    Whether to use a GPU for inference.

    quantization int

    The level of quantization to use for the pre-trained language model.

    precision str

    The precision to use for the pre-trained language model.

    device_map str | Dict | None

    The mapping of devices to use for inference.

    max_memory Dict[int, str]

    The maximum memory to use for inference.

    torchscript bool

    Whether to use a TorchScript-optimized version of the pre-trained language model.

    model_args Any

    Additional arguments to pass to the pre-trained language model.

    Methods

    text(**kwargs: Any) -> Dict[str, Any]: Generates text based on the given prompt and decoding strategy.

    listen(model_name: str, model_class: str = \"AutoModelForCausalLM\", processor_class: str = \"AutoProcessor\", use_cuda: bool = False, precision: str = \"float16\", quantization: int = 0, device_map: str | Dict | None = \"auto\", max_memory={0: \"24GB\"}, torchscript: bool = True, endpoint: str = \"\", port: int = 3000, cors_domain: str = \"http://localhost:3000\", username: Optional[str] = None, password: Optional[str] = None, *model_args: Any) -> None: Starts a CherryPy server to listen for requests to generate text.

    "},{"location":"audio/api/base/#geniusrise_audio.base.api.AudioAPI.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes a new instance of the TextAPI class.

    Parameters:

    Name Type Description Default input BatchInput

    The input data to process.

    required output BatchOutput

    The output data to process.

    required state State

    The state of the API.

    required"},{"location":"audio/api/base/#geniusrise_audio.base.api.AudioAPI.listen","title":"listen(model_name, model_class='AutoModel', processor_class='AutoProcessor', use_cuda=False, precision='float16', quantization=0, device_map='auto', max_memory={0: '24GB'}, torchscript=False, compile=False, concurrent_queries=False, use_whisper_cpp=False, endpoint='*', port=3000, cors_domain='http://localhost:3000', username=None, password=None, **model_args)","text":"

    Starts a CherryPy server to listen for requests to generate text.

    Parameters:

    Name Type Description Default model_name str

    The name of the pre-trained language model.

    required model_class str

    The name of the class of the pre-trained language model. Defaults to \"AutoModelForCausalLM\".

    'AutoModel' processor_class str

    The name of the class of the processor used to preprocess input text. Defaults to \"AutoProcessor\".

    'AutoProcessor' use_cuda bool

    Whether to use a GPU for inference. Defaults to False.

    False precision str

    The precision to use for the pre-trained language model. Defaults to \"float16\".

    'float16' quantization int

    The level of quantization to use for the pre-trained language model. Defaults to 0.

    0 device_map str | Dict | None

    The mapping of devices to use for inference. Defaults to \"auto\".

    'auto' max_memory Dict[int, str]

    The maximum memory to use for inference. Defaults to {0: \"24GB\"}.

    {0: '24GB'} torchscript bool

    Whether to use a TorchScript-optimized version of the pre-trained language model. Defaults to True.

    False compile bool

    Enable Torch JIT compilation.

    False concurrent_queries bool

    (bool): Whether the API supports concurrent API calls (usually false).

    False use_whisper_cpp bool

    Whether to use whisper.cpp to load the model. Defaults to False. Note: only works for these models: https://github.com/aarnphm/whispercpp/blob/524dd6f34e9d18137085fb92a42f1c31c9c6bc29/src/whispercpp/utils.py#L32

    False endpoint str

    The endpoint to listen on. Defaults to \"*\".

    '*' port int

    The port to listen on. Defaults to 3000.

    3000 cors_domain str

    The domain to allow CORS requests from. Defaults to \"http://localhost:3000\".

    'http://localhost:3000' username Optional[str]

    The username to use for authentication. Defaults to None.

    None password Optional[str]

    The password to use for authentication. Defaults to None.

    None **model_args Any

    Additional arguments to pass to the pre-trained language model.

    {}"},{"location":"audio/api/base/#geniusrise_audio.base.api.AudioAPI.validate_password","title":"validate_password(realm, username, password)","text":"

    Validate the username and password against expected values.

    Parameters:

    Name Type Description Default realm str

    The authentication realm.

    required username str

    The provided username.

    required password str

    The provided password.

    required

    Returns:

    Name Type Description bool

    True if credentials are valid, False otherwise.

    "},{"location":"audio/api/s2t/","title":"Speech to Text","text":"

    Bases: AudioAPI

    SpeechToTextAPI is a subclass of AudioAPI specifically designed for speech-to-text models. It extends the functionality to handle speech-to-text processing using various ASR models.

    Attributes:

    Name Type Description model AutoModelForCTC

    The speech-to-text model.

    processor AutoProcessor

    The processor to prepare input audio data for the model.

    Methods

    transcribe(audio_input: bytes) -> str: Transcribes the given audio input to text using the speech-to-text model.

    Example CLI Usage:

    genius SpeechToTextAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\n--id facebook/wav2vec2-large-960h-lv60-self \\\nlisten \\\n--args \\\nmodel_name=\"facebook/wav2vec2-large-960h-lv60-self\" \\\nmodel_class=\"Wav2Vec2ForCTC\" \\\nprocessor_class=\"Wav2Vec2Processor\" \\\nuse_cuda=True \\\nprecision=\"float32\" \\\nquantization=0 \\\ndevice_map=\"cuda:0\" \\\nmax_memory=None \\\ntorchscript=False \\\ncompile=True \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n

    or using whisper.cpp:

    genius SpeechToTextAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\nlisten \\\n--args \\\nmodel_name=\"large\" \\\nuse_whisper_cpp=True \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n
    "},{"location":"audio/api/s2t/#geniusrise_audio.s2t.api.SpeechToTextAPI.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the SpeechToTextAPI with configurations for speech-to-text processing.

    Parameters:

    Name Type Description Default input BatchInput

    The input data configuration.

    required output BatchOutput

    The output data configuration.

    required state State

    The state configuration.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"audio/api/s2t/#geniusrise_audio.s2t.api.SpeechToTextAPI.asr_pipeline","title":"asr_pipeline(**kwargs)","text":"

    Recognizes named entities in the input text using the Hugging Face pipeline.

    This method leverages a pre-trained NER model to identify and classify entities in text into categories such as names, organizations, locations, etc. It's suitable for processing various types of text content.

    Parameters:

    Name Type Description Default **kwargs Any

    Arbitrary keyword arguments, typically containing 'text' for the input text.

    {}

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the original input text and a list of recognized entities.

    Example CURL Request for transcription:

    (base64 -w 0 sample.flac | awk '{print \"{\\\"audio_file\\\": \\\"\"$0\"\\\", \\\"model_sampling_rate\\\": 16000, \\\"chunk_length_s\\\": 60}\"}' > /tmp/payload.json)\ncurl -X POST http://localhost:3000/api/v1/asr_pipeline \\\n-H \"Content-Type: application/json\" \\\n-u user:password \\\n-d @/tmp/payload.json | jq\n

    "},{"location":"audio/api/s2t/#geniusrise_audio.s2t.api.SpeechToTextAPI.initialize_pipeline","title":"initialize_pipeline()","text":"

    Lazy initialization of the NER Hugging Face pipeline.

    "},{"location":"audio/api/s2t/#geniusrise_audio.s2t.api.SpeechToTextAPI.process_seamless","title":"process_seamless(audio_input, model_sampling_rate, processor_args, chunk_size, overlap_size, generate_args)","text":"

    Process audio input with the Whisper model.

    "},{"location":"audio/api/s2t/#geniusrise_audio.s2t.api.SpeechToTextAPI.process_wav2vec2","title":"process_wav2vec2(audio_input, model_sampling_rate, processor_args, chunk_size, overlap_size)","text":"

    Process audio input with the Wav2Vec2 model.

    "},{"location":"audio/api/s2t/#geniusrise_audio.s2t.api.SpeechToTextAPI.process_whisper","title":"process_whisper(audio_input, model_sampling_rate, processor_args, chunk_size, overlap_size, generate_args)","text":"

    Process audio input with the Whisper model.

    "},{"location":"audio/api/s2t/#geniusrise_audio.s2t.api.SpeechToTextAPI.transcribe","title":"transcribe()","text":"

    API endpoint to transcribe the given audio input to text using the speech-to-text model. Expects a JSON input with 'audio_file' as a key containing the base64 encoded audio data.

    Returns:

    Type Description

    Dict[str, str]: A dictionary containing the transcribed text.

    Example CURL Request for transcription:

    (base64 -w 0 sample.flac | awk '{print \"{\\\"audio_file\\\": \\\"\"$0\"\\\", \\\"model_sampling_rate\\\": 16000, \\\"chunk_size\\\": 1280000, \\\"overlap_size\\\": 213333, \\\"do_sample\\\": true, \\\"num_beams\\\": 4, \\\"temperature\\\": 0.6, \\\"tgt_lang\\\": \\\"eng\\\"}\"}' > /tmp/payload.json)\ncurl -X POST http://localhost:3000/api/v1/transcribe \\\n-H \"Content-Type: application/json\" \\\n-u user:password \\\n-d @/tmp/payload.json | jq\n

    "},{"location":"audio/api/t2s/","title":"Text to Speech","text":"

    Bases: AudioAPI

    TextToSpeechAPI for converting text to speech using various TTS models.

    Attributes:

    Name Type Description model AutoModelForSeq2SeqLM

    The text-to-speech model.

    tokenizer AutoTokenizer

    The tokenizer for the model.

    Methods

    synthesize(text_input: str) -> bytes: Converts the given text input to speech using the text-to-speech model.

    Example CLI Usage:

    genius TextToSpeechAPI rise \\\nbatch \\\n    --input_folder ./input \\\nbatch \\\n    --output_folder ./output \\\nnone \\\n    --id facebook/mms-tts-eng \\\n    listen \\\n        --args \\\n            model_name=\"facebook/mms-tts-eng\" \\\n            model_class=\"VitsModel\" \\\n            processor_class=\"VitsTokenizer\" \\\n            use_cuda=True \\\n            precision=\"float32\" \\\n            quantization=0 \\\n            device_map=\"cuda:0\" \\\n            max_memory=None \\\n            torchscript=False \\\n            compile=False \\\n            endpoint=\"*\" \\\n            port=3000 \\\n            cors_domain=\"http://localhost:3000\" \\\n            username=\"user\" \\\n            password=\"password\"\n
    "},{"location":"audio/api/t2s/#geniusrise_audio.t2s.api.TextToSpeechAPI.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the TextToSpeechAPI with configurations for text-to-speech processing.

    Parameters:

    Name Type Description Default input BatchInput

    The input data configuration.

    required output BatchOutput

    The output data configuration.

    required state State

    The state configuration.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"audio/api/t2s/#geniusrise_audio.t2s.api.TextToSpeechAPI.initialize_pipeline","title":"initialize_pipeline()","text":"

    Lazy initialization of the TTS Hugging Face pipeline.

    "},{"location":"audio/api/t2s/#geniusrise_audio.t2s.api.TextToSpeechAPI.synthesize","title":"synthesize()","text":"

    API endpoint to convert text input to speech using the text-to-speech model. Expects a JSON input with 'text' as a key containing the text to be synthesized.

    Returns:

    Type Description

    Dict[str, str]: A dictionary containing the base64 encoded audio data.

    Example CURL Request for synthesis: ... [Provide example CURL request] ...

    "},{"location":"audio/api/t2s/#geniusrise_audio.t2s.api.TextToSpeechAPI.tts_pipeline","title":"tts_pipeline(**kwargs)","text":"

    Converts text to speech using the Hugging Face pipeline.

    Parameters:

    Name Type Description Default **kwargs Any

    Arbitrary keyword arguments, typically containing 'text' for the input text.

    {}

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the base64 encoded audio data.

    Example CURL Request for synthesis: ... [Provide example CURL request] ...

    "},{"location":"audio/bulk/base/","title":"Base Bulk Inference","text":"

    Bases: Bolt

    AudioBulk is a class designed for bulk processing of audio data using various audio models from Hugging Face. It focuses on audio generation and transformation tasks, supporting a range of models and configurations.

    Attributes:

    Name Type Description model AutoModelForAudioClassification

    The audio model for generation or transformation tasks.

    processor AutoFeatureExtractor

    The processor for preparing input data for the model.

    Parameters:

    Name Type Description Default input BatchInput

    Configuration and data inputs for the batch process.

    required output BatchOutput

    Configurations for output data handling.

    required state State

    State management for the Bolt.

    required **kwargs

    Arbitrary keyword arguments for extended configurations.

    {} Methods

    audio(**kwargs: Any) -> Dict[str, Any]: Provides an API endpoint for audio processing functionality. Accepts various parameters for customizing the audio processing tasks.

    process(audio_input: Union[str, bytes], **processing_params: Any) -> dict: Processes the audio input based on the provided parameters. Supports multiple processing methods.

    "},{"location":"audio/bulk/base/#geniusrise_audio.base.bulk.AudioBulk.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the AudioBulk with configurations and sets up logging. Prepares the environment for audio processing tasks.

    Parameters:

    Name Type Description Default input BatchInput

    The input data configuration for the audio processing task.

    required output BatchOutput

    The output data configuration for the results of the audio processing.

    required state State

    The state configuration for the Bolt, managing its operational status.

    required **kwargs

    Additional keyword arguments for extended functionality and model configurations.

    {}"},{"location":"audio/bulk/base/#geniusrise_audio.base.bulk.AudioBulk.done","title":"done()","text":"

    Finalizes the AudioBulk processing. Sends notification email if configured.

    This method should be called after all audio processing tasks are complete. It handles any final steps such as sending notifications or cleaning up resources.

    "},{"location":"audio/bulk/base/#geniusrise_audio.base.bulk.AudioBulk.load_models","title":"load_models(model_name, processor_name, model_revision=None, processor_revision=None, model_class='', processor_class='AutoFeatureExtractor', use_cuda=False, precision='float16', quantization=0, device_map='auto', max_memory={0: '24GB'}, torchscript=False, compile=False, flash_attention=False, better_transformers=False, **model_args)","text":"

    Loads and configures the specified audio model and processor for audio processing.

    Parameters:

    Name Type Description Default model_name str

    Name or path of the audio model to load.

    required processor_name str

    Name or path of the processor to load.

    required model_revision Optional[str]

    Specific model revision to load (e.g., commit hash).

    None processor_revision Optional[str]

    Specific processor revision to load.

    None model_class str

    Class of the model to be loaded.

    '' processor_class str

    Class of the processor to be loaded.

    'AutoFeatureExtractor' use_cuda bool

    Flag to use CUDA for GPU acceleration.

    False precision str

    Desired precision for computations (\"float32\", \"float16\", etc.).

    'float16' quantization int

    Bit level for model quantization (0 for none, 8 for 8-bit).

    0 device_map Union[str, Dict, None]

    Specific device(s) for model operations.

    'auto' max_memory Dict[int, str]

    Maximum memory allocation for the model.

    {0: '24GB'} torchscript bool

    Enable TorchScript for model optimization.

    False compile bool

    Enable Torch JIT compilation.

    False flash_attention bool

    Flag to enable Flash Attention optimization for faster processing.

    False better_transformers bool

    Flag to enable Better Transformers optimization for faster processing.

    False **model_args Any

    Additional arguments for model loading.

    {}

    Returns:

    Type Description Tuple[AutoModelForAudioClassification, AutoFeatureExtractor]

    Tuple[AutoModelForAudioClassification, AutoFeatureExtractor]: Loaded model and processor.

    "},{"location":"audio/bulk/s2t/","title":"Speech to Text","text":"

    Bases: AudioAPI

    SpeechToTextAPI is a subclass of AudioAPI specifically designed for speech-to-text models. It extends the functionality to handle speech-to-text processing using various ASR models.

    Attributes:

    Name Type Description model AutoModelForCTC

    The speech-to-text model.

    processor AutoProcessor

    The processor to prepare input audio data for the model.

    Methods

    transcribe(audio_input: bytes) -> str: Transcribes the given audio input to text using the speech-to-text model.

    Example CLI Usage:

    genius SpeechToTextAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\n--id facebook/wav2vec2-large-960h-lv60-self \\\nlisten \\\n--args \\\nmodel_name=\"facebook/wav2vec2-large-960h-lv60-self\" \\\nmodel_class=\"Wav2Vec2ForCTC\" \\\nprocessor_class=\"Wav2Vec2Processor\" \\\nuse_cuda=True \\\nprecision=\"float32\" \\\nquantization=0 \\\ndevice_map=\"cuda:0\" \\\nmax_memory=None \\\ntorchscript=False \\\ncompile=True \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n

    or using whisper.cpp:

    genius SpeechToTextAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\nlisten \\\n--args \\\nmodel_name=\"large\" \\\nuse_whisper_cpp=True \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n
    "},{"location":"audio/bulk/s2t/#geniusrise_audio.s2t.api.SpeechToTextAPI.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the SpeechToTextAPI with configurations for speech-to-text processing.

    Parameters:

    Name Type Description Default input BatchInput

    The input data configuration.

    required output BatchOutput

    The output data configuration.

    required state State

    The state configuration.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"audio/bulk/s2t/#geniusrise_audio.s2t.api.SpeechToTextAPI.asr_pipeline","title":"asr_pipeline(**kwargs)","text":"

    Recognizes named entities in the input text using the Hugging Face pipeline.

    This method leverages a pre-trained NER model to identify and classify entities in text into categories such as names, organizations, locations, etc. It's suitable for processing various types of text content.

    Parameters:

    Name Type Description Default **kwargs Any

    Arbitrary keyword arguments, typically containing 'text' for the input text.

    {}

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the original input text and a list of recognized entities.

    Example CURL Request for transcription:

    (base64 -w 0 sample.flac | awk '{print \"{\\\"audio_file\\\": \\\"\"$0\"\\\", \\\"model_sampling_rate\\\": 16000, \\\"chunk_length_s\\\": 60}\"}' > /tmp/payload.json)\ncurl -X POST http://localhost:3000/api/v1/asr_pipeline \\\n-H \"Content-Type: application/json\" \\\n-u user:password \\\n-d @/tmp/payload.json | jq\n

    "},{"location":"audio/bulk/s2t/#geniusrise_audio.s2t.api.SpeechToTextAPI.initialize_pipeline","title":"initialize_pipeline()","text":"

    Lazy initialization of the NER Hugging Face pipeline.

    "},{"location":"audio/bulk/s2t/#geniusrise_audio.s2t.api.SpeechToTextAPI.process_seamless","title":"process_seamless(audio_input, model_sampling_rate, processor_args, chunk_size, overlap_size, generate_args)","text":"

    Process audio input with the Whisper model.

    "},{"location":"audio/bulk/s2t/#geniusrise_audio.s2t.api.SpeechToTextAPI.process_wav2vec2","title":"process_wav2vec2(audio_input, model_sampling_rate, processor_args, chunk_size, overlap_size)","text":"

    Process audio input with the Wav2Vec2 model.

    "},{"location":"audio/bulk/s2t/#geniusrise_audio.s2t.api.SpeechToTextAPI.process_whisper","title":"process_whisper(audio_input, model_sampling_rate, processor_args, chunk_size, overlap_size, generate_args)","text":"

    Process audio input with the Whisper model.

    "},{"location":"audio/bulk/s2t/#geniusrise_audio.s2t.api.SpeechToTextAPI.transcribe","title":"transcribe()","text":"

    API endpoint to transcribe the given audio input to text using the speech-to-text model. Expects a JSON input with 'audio_file' as a key containing the base64 encoded audio data.

    Returns:

    Type Description

    Dict[str, str]: A dictionary containing the transcribed text.

    Example CURL Request for transcription:

    (base64 -w 0 sample.flac | awk '{print \"{\\\"audio_file\\\": \\\"\"$0\"\\\", \\\"model_sampling_rate\\\": 16000, \\\"chunk_size\\\": 1280000, \\\"overlap_size\\\": 213333, \\\"do_sample\\\": true, \\\"num_beams\\\": 4, \\\"temperature\\\": 0.6, \\\"tgt_lang\\\": \\\"eng\\\"}\"}' > /tmp/payload.json)\ncurl -X POST http://localhost:3000/api/v1/transcribe \\\n-H \"Content-Type: application/json\" \\\n-u user:password \\\n-d @/tmp/payload.json | jq\n

    "},{"location":"audio/bulk/t2s/","title":"Text to Speech","text":"

    Bases: AudioAPI

    TextToSpeechAPI for converting text to speech using various TTS models.

    Attributes:

    Name Type Description model AutoModelForSeq2SeqLM

    The text-to-speech model.

    tokenizer AutoTokenizer

    The tokenizer for the model.

    Methods

    synthesize(text_input: str) -> bytes: Converts the given text input to speech using the text-to-speech model.

    Example CLI Usage:

    genius TextToSpeechAPI rise \\\nbatch \\\n    --input_folder ./input \\\nbatch \\\n    --output_folder ./output \\\nnone \\\n    --id facebook/mms-tts-eng \\\n    listen \\\n        --args \\\n            model_name=\"facebook/mms-tts-eng\" \\\n            model_class=\"VitsModel\" \\\n            processor_class=\"VitsTokenizer\" \\\n            use_cuda=True \\\n            precision=\"float32\" \\\n            quantization=0 \\\n            device_map=\"cuda:0\" \\\n            max_memory=None \\\n            torchscript=False \\\n            compile=False \\\n            endpoint=\"*\" \\\n            port=3000 \\\n            cors_domain=\"http://localhost:3000\" \\\n            username=\"user\" \\\n            password=\"password\"\n
    "},{"location":"audio/bulk/t2s/#geniusrise_audio.t2s.api.TextToSpeechAPI.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the TextToSpeechAPI with configurations for text-to-speech processing.

    Parameters:

    Name Type Description Default input BatchInput

    The input data configuration.

    required output BatchOutput

    The output data configuration.

    required state State

    The state configuration.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"audio/bulk/t2s/#geniusrise_audio.t2s.api.TextToSpeechAPI.initialize_pipeline","title":"initialize_pipeline()","text":"

    Lazy initialization of the TTS Hugging Face pipeline.

    "},{"location":"audio/bulk/t2s/#geniusrise_audio.t2s.api.TextToSpeechAPI.synthesize","title":"synthesize()","text":"

    API endpoint to convert text input to speech using the text-to-speech model. Expects a JSON input with 'text' as a key containing the text to be synthesized.

    Returns:

    Type Description

    Dict[str, str]: A dictionary containing the base64 encoded audio data.

    Example CURL Request for synthesis: ... [Provide example CURL request] ...

    "},{"location":"audio/bulk/t2s/#geniusrise_audio.t2s.api.TextToSpeechAPI.tts_pipeline","title":"tts_pipeline(**kwargs)","text":"

    Converts text to speech using the Hugging Face pipeline.

    Parameters:

    Name Type Description Default **kwargs Any

    Arbitrary keyword arguments, typically containing 'text' for the input text.

    {}

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the base64 encoded audio data.

    Example CURL Request for synthesis: ... [Provide example CURL request] ...

    "},{"location":"blog/huggingface/chat/","title":"Host Chat Models Using Geniusrise","text":"

    Integrating chat models into applications can dramatically enhance user interaction, making it more engaging and intuitive. Geniusrise offers a powerful and flexible way to deploy state-of-the-art chat models as APIs. This guide explores how to set up these APIs for various use cases, focusing on two types: standard and Very Large Language Models (VLLM).

    "},{"location":"blog/huggingface/chat/#quick-setup","title":"Quick Setup","text":"

    Installation:

    To get started, install Geniusrise and its vision package:

    pip install geniusrise\npip install geniusrise-vision\n

    Configuration File (genius.yml):

    Your configuration file is crucial. Here\u2019s how you set it up for a standard chat model and a VLLM.

    "},{"location":"blog/huggingface/chat/#standard-chat-model-example","title":"Standard Chat Model Example:","text":"
    version: \"1\"\nbolts:\nmy_bolt:\nname: ChatAPI\nstate:\ntype: none\ninput:\ntype: batch\nargs:\ninput_folder: ./input\noutput:\ntype: batch\nargs:\noutput_folder: ./output\nmethod: listen\nargs:\nmodel_name: \"codellama/CodeLlama-7b-Instruct-hf\"\nmodel_class: \"AutoModelForCausalLM\"\ntokenizer_class: \"AutoTokenizer\"\nuse_cuda: True\nprecision: \"float16\"\ndevice_map: \"auto\"\nendpoint: \"*\"\nport: 3000\ncors_domain: \"http://localhost:3000\"\nusername: \"user\"\npassword: \"password\"\n
    "},{"location":"blog/huggingface/chat/#vllm-example","title":"VLLM Example:","text":"

    For models like GPT-3, which require handling of larger context sizes and nuanced generation controls:

    version: \"1\"\nbolts:\nmy_bolt:\nname: ChatVLLMAPI\nstate:\ntype: none\ninput:\ntype: batch\nargs:\ninput_folder: ./input\noutput:\ntype: batch\nargs:\noutput_folder: ./output\nmethod: listen\nargs:\nmodel_name: \"mistralai/Mistral-7B-Instruct-v0.1\"\nmodel_class: \"AutoModelForCausalLM\"\ntokenizer_class: \"AutoTokenizer\"\nuse_cuda: True\nprecision: \"bfloat16\"\ndevice_map: \"auto\"\nuse_vllm: True\nvllm_enforce_eager: True\nvllm_max_model_len: 1024\nendpoint: \"*\"\nport: 3000\ncors_domain: \"http://localhost:3000\"\nusername: \"user\"\npassword: \"password\"\n
    "},{"location":"blog/huggingface/chat/#interacting-with-your-api","title":"Interacting with Your API","text":"

    For a chat model, you would typically send a prompt and receive a generated response:

    curl -X POST \"http://localhost:3000/api/v1/chat\" \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\"prompt\": \"Your prompt here\", \"max_tokens\": 50}'\n

    For VLLMs, you might want to control more parameters due to their capacity for larger context and nuanced outputs:

    curl -X POST \"http://localhost:3000/api/v1/chat_vllm\" \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\n        \"messages\": [{\"role\": \"user\", \"content\": \"Your prompt here\"}],\n        \"max_tokens\": 50,\n        \"temperature\": 0.7\n    }'\n
    "},{"location":"blog/huggingface/chat/#use-cases-variations","title":"Use Cases & Variations","text":""},{"location":"blog/huggingface/chat/#general-chat","title":"General Chat","text":"

    Use models like mistralai/Mistral-7B-Instruct-v0.1 for general-purpose chatting, answering questions, or providing instructions.

    "},{"location":"blog/huggingface/chat/#code-generation","title":"Code Generation","text":"

    For specialized tasks like coding questions, deploy models such as codellama/CodeLlama-7b-Instruct-hf to assist users in solving programming challenges.

    "},{"location":"blog/huggingface/chat/#multilingual-conversations","title":"Multilingual Conversations","text":"

    Deploy multilingual models to engage users in their native languages, enhancing accessibility and user experience.

    "},{"location":"blog/huggingface/chat/#configuration-tips","title":"Configuration Tips","text":"
    • Model Selection: Tailor your choice of model based on the specific needs and language requirements of your application.
    • Precision and CUDA: Adjust these settings based on your computational resources to optimize performance.
    • VLLM Settings: For VLLMs, fine-tune parameters like use_vllm, vllm_enforce_eager, and vllm_max_model_len to handle complex conversations effectively.
    "},{"location":"blog/huggingface/imgclass/","title":"Host Image Classification Models Using Geniusrise","text":"

    Image classification is a cornerstone of machine learning and computer vision, providing the backbone for a myriad of applications from photo organization to medical imaging. With Geniusrise, developers can effortlessly deploy image classification models as APIs, making these powerful tools accessible for integration into various applications. This guide highlights the process of setting up image classification APIs using Geniusrise, offering a range of use cases and configurations.

    "},{"location":"blog/huggingface/imgclass/#quick-setup","title":"Quick Setup","text":"

    Installation:

    To start, ensure Geniusrise and its text extension are installed:

    pip install geniusrise\npip install geniusrise-vision\n

    Configuration File (genius.yml):

    Your genius.yml configuration will outline the API's structure. Below is a template adjusted for image classification:

    version: \"1\"\nbolts:\nmy_bolt:\nname: ImageClassificationAPI\nstate:\ntype: none\ninput:\ntype: batch\nargs:\ninput_folder: ./input\noutput:\ntype: batch\nargs:\noutput_folder: ./output\nmethod: listen\nargs:\nmodel_name: \"google/vit-base-patch16-224\"\nmodel_class: \"AutoModelForImageClassification\"\nprocessor_class: \"AutoImageProcessor\"\ndevice_map: \"cuda:0\"\nuse_cuda: true\nprecision: \"float\"\nendpoint: \"0.0.0.0\"\nport: 3000\ncors_domain: \"http://localhost:3000\"\nusername: \"user\"\npassword: \"password\"\n

    Activate your API by executing:

    genius rise\n
    "},{"location":"blog/huggingface/imgclass/#configuration-parameters-explained","title":"Configuration Parameters Explained","text":"
    • model_name: Defines the pre-trained model used for classification. Choices vary based on the application, from generic models like Google's ViT to specialized ones for food or NSFW detection.
    • model_class & processor_class: Specifies the model and processor classes for handling image data.
    • device_map & use_cuda: Configures GPU usage for enhanced performance.
    • endpoint, port, username, & password: Details for accessing the API securely.
    "},{"location":"blog/huggingface/imgclass/#interacting-with-the-image-classification-api","title":"Interacting with the Image Classification API","text":""},{"location":"blog/huggingface/imgclass/#example-with-curl","title":"Example with curl:","text":"
    (base64 -w 0 your_image.jpg | awk '{print \"{\\\"image_base64\\\": \\\"\"$0\"\\\"}\"}' > /tmp/image_payload.json)\ncurl -X POST http://localhost:3000/api/v1/classify_image \\\n-H \"Content-Type: application/json\" \\\n-u user:password \\\n-d @/tmp/image_payload.json | jq\n
    "},{"location":"blog/huggingface/imgclass/#example-with-python-requests","title":"Example with python-requests:","text":"
    import requests\nimport base64\nwith open(\"your_image.jpg\", \"rb\") as image_file:\nimage_base64 = base64.b64encode(image_file.read()).decode('utf-8')\ndata = {\"image_base64\": image_base64}\nresponse = requests.post(\"http://localhost:3000/api/v1/classify_image\",\njson=data,\nauth=('user', 'password'))\nprint(response.json())\n
    "},{"location":"blog/huggingface/imgclass/#use-cases-variations","title":"Use Cases & Variations","text":""},{"location":"blog/huggingface/imgclass/#different-image-classification-models","title":"Different Image Classification Models","text":"

    Tailor your API for a variety of classification tasks by selecting appropriate models:

    • Aesthetic Assessment: Use models like cafeai/cafe_aesthetic to classify images based on aesthetic qualities.
    • Gender Classification: Apply models such as rizvandwiki/gender-classification for gender recognition.
    • Food Recognition: Employ food-specific models like nateraw/food to categorize food items.
    • Object Detection: Utilize models like microsoft/ResNet-50 for broad object classification.
    • NSFW Detection: Choose models designed for NSFW content detection, ensuring user-generated content is appropriate.
    "},{"location":"blog/huggingface/imgclass/#customizing-classification-parameters","title":"Customizing Classification Parameters","text":"

    For advanced needs, include additional parameters in your request to customize the classification, such as the confidence threshold or specific labels to focus on.

    "},{"location":"blog/huggingface/lm/","title":"Host Language Models Using Geniusrise","text":"

    Deploying language models for various tasks is now seamless with Geniusrise. This guide will walk you through setting up inference APIs for different language model applications, from text generation to code completion. We'll dive into the genius.yml configuration, illustrating how to fine-tune parameters for specific use cases and interact with your API using curl and python-requests.

    "},{"location":"blog/huggingface/lm/#getting-started","title":"Getting Started","text":"

    First, ensure Geniusrise and its vision component are installed:

    pip install geniusrise\npip install geniusrise-vision\n
    "},{"location":"blog/huggingface/lm/#configuration-file-geniusyml","title":"Configuration File: genius.yml","text":"

    The genius.yml file is the heart of your API setup. Here's a breakdown of its key parameters:

    • version: Defines the configuration format version.
    • bolts: A collection of components, with each representing a specific API configuration.
    • name: The identifier for your API.
    • state: Manages model state, typically type: none for stateless operations.
    • input and output: Define batch processing folders.
    • method: Operation mode, usually listen for API services.
    • args: Detailed model and server specifications.
    "},{"location":"blog/huggingface/lm/#example-configuration-for-standard-language-models","title":"Example Configuration for Standard Language Models","text":"
    version: \"1\"\n\nbolts:\n  my_bolt:\n    name: LanguageModelAPI\n    state:\n      type: none\n    input:\n      type: batch\n      args:\n        input_folder: ./input\n    output:\n      type: batch\n      args:\n        output_folder: ./output\n    method: listen\n    args:\n      model_name: \"mistralai/Mistral-7B-Instruct-v0.1\"\n      model_class: AutoModelForMaskedLM\n      tokenizer_class: AutoTokenizer\n      use_cuda: true\n      precision: float\n      device_map: cuda:0\n      endpoint: \"0.0.0.0\"\n      port: 3000\n      cors_domain: \"http://localhost:3000\"\n      username: user\n      password: password\n
    "},{"location":"blog/huggingface/lm/#vllm-very-large-language-models-configuration-example","title":"VLLM (Very Large Language Models) Configuration Example","text":"

    For handling VLLMs with Geniusrise, adjust the args to accommodate specific requirements, such as enabling eager loading or managing memory more efficiently:

    version: \"1\"\n\nbolts:\n  my_bolt:\n    name: VLLMAPI\n    state:\n      type: none\n    input:\n      type: batch\n      args:\n        input_folder: ./input\n    output:\n      type: batch\n      args:\n        output_folder: ./output\n    method: listen\n    args:\n      model_name: \"mistralai/Mistral-7B-Instruct-v0.1\"\n      model_class: AutoModelForCausalLM\n      tokenizer_class: AutoTokenizer\n      use_cuda: true\n      precision: float16\n      quantization: 0\n      device_map: auto\n      max_memory: None\n      torchscript: False\n      use_vllm: true\n      vllm_enforce_eager: True\n      vllm_max_model_len: 1024\n      endpoint: \"*\"\n      port: 3000\n      cors_domain: \"http://localhost:3000\"\n      username: user\n      password: password\n
    "},{"location":"blog/huggingface/lm/#launching-your-api","title":"Launching Your API","text":"

    Execute the following in your terminal:

    genius rise\n
    "},{"location":"blog/huggingface/lm/#interacting-with-your-api","title":"Interacting with Your API","text":""},{"location":"blog/huggingface/lm/#using-curl-for-http-requests","title":"Using curl for HTTP Requests","text":"

    Example for a Text Generation API:

    curl -X POST http://localhost:3000/api/v1/complete \\\n-H \"Content-Type: application/json\" \\\n-d '{\"prompt\": \"Here is your prompt.\", \"max_new_tokens\": 1024, \"do_sample\": true}'\n

    For VLLM Use Case:

    curl -X POST http://localhost:3000/api/v1/complete \\\n-H \"Content-Type: application/json\" \\\n-d '{\"prompt\": \"Your VLLM prompt.\", \"max_new_tokens\": 1024, \"do_sample\": true}'\n
    "},{"location":"blog/huggingface/lm/#python-requests-example","title":"Python requests Example","text":"

    Standard Language Model:

    import requests\nresponse = requests.post(\"http://localhost:3000/api/v1/complete\",\njson={\"prompt\": \"Here is your prompt.\", \"max_new_tokens\": 1024, \"do_sample\": true},\nauth=('user', 'password'))\nprint(response.json())\n

    VLLM Request:

    import requests\nresponse = requests.post(\"http://localhost:3000/api/v1/complete\",\njson={\"prompt\": \"Your VLLM prompt.\", \"max_new_tokens\": 1024, \"do_sample\": true},\nauth=('user', 'password'))\nprint(response.json())\n
    "},{"location":"blog/huggingface/lm/#usecases-variations","title":"Usecases & Variations","text":"

    Geniusrise caters to a

    wide array of language model applications, from text summarization with models like facebook/bart-large-cnn to code generation using WizardLM/WizardCoder-Python-7B-V1.0. By customizing the model_name, model_class, and related parameters in your genius.yml, you can tailor your API for specific tasks:

    • Text Summarization: Use summarization models to condense articles or documents.
    • Text Generation: Create stories, generate content, or even simulate dialogue.
    • Code Generation: Assist developers by completing code snippets or generating code from descriptions.

    Remember, while Geniusrise is a powerful tool for deploying language models, it's important to understand the capabilities and limitations of the models you choose to deploy. Always test your configurations and APIs thoroughly to ensure they meet your application's needs.

    "},{"location":"blog/huggingface/ner/","title":"Host NER Models Using Geniusrise","text":"

    Named Entity Recognition (NER) is a crucial task in natural language processing (NLP), enabling the identification of predefined categories such as the names of persons, organizations, locations, expressions of times, quantities, monetary values, percentages, etc. Geniusrise offers a streamlined approach to deploying NER models as APIs, facilitating the integration of sophisticated NER capabilities into applications. This guide explores setting up NER APIs using Geniusrise, covering various use cases and configurations.

    "},{"location":"blog/huggingface/ner/#quick-setup","title":"Quick Setup","text":"

    Installation:

    Ensure Geniusrise and its vision package are installed:

    pip install geniusrise\npip install geniusrise-vision\n

    Configuration File (genius.yml):

    Craft a genius.yml for your NER API. Here's an example:

    version: \"1\"\nbolts:\nmy_bolt:\nname: NamedEntityRecognitionAPI\nstate:\ntype: none\ninput:\ntype: batch\nargs:\ninput_folder: ./input\noutput:\ntype: batch\nargs:\noutput_folder: ./output\nmethod: listen\nargs:\nmodel_name: \"d4data/biomedical-ner-all\"\nmodel_class: \"AutoModelForTokenClassification\"\ntokenizer_class: \"AutoTokenizer\"\nuse_cuda: True\nprecision: \"float\"\ndevice_map: \"cuda:0\"\nendpoint: \"*\"\nport: 3000\ncors_domain: \"http://localhost:3000\"\nusername: \"user\"\npassword: \"password\"\n

    This setup configures an API for a biomedical NER model.

    "},{"location":"blog/huggingface/ner/#interacting-with-your-api","title":"Interacting with Your API","text":"

    Extract named entities by making a POST request:

    curl -X POST localhost:3000/api/v1/recognize_entities \\\n-H \"Content-Type: application/json\" \\\n-u user:password \\\n-d '{\"text\": \"Input text here.\"}' | jq\n
    "},{"location":"blog/huggingface/ner/#use-cases-variations","title":"Use Cases & Variations","text":""},{"location":"blog/huggingface/ner/#biomedical-ner","title":"Biomedical NER","text":"

    Deploy models like d4data/biomedical-ner-all for applications requiring identification of biomedical entities. This is useful for extracting specific terms from medical literature or patient records.

    "},{"location":"blog/huggingface/ner/#multilingual-ner","title":"Multilingual NER","text":"

    For global applications, choose models supporting multiple languages, such as Babelscape/wikineural-multilingual-ner. This enables entity recognition across different languages, broadening your application's user base.

    "},{"location":"blog/huggingface/ner/#domain-specific-ner","title":"Domain-Specific NER","text":"

    Models like pruas/BENT-PubMedBERT-NER-Gene are tailored for specific domains (e.g., genetics). Using domain-specific models can significantly improve accuracy for targeted applications.

    "},{"location":"blog/huggingface/ner/#configuration-tips","title":"Configuration Tips","text":"
    • Model Selection: Evaluate different models to find the best match for your application's needs, considering factors like language, domain, and performance.
    • Precision and Performance: Adjust precision and use_cuda settings based on your computational resources and response time requirements.
    • Security: Implement basic authentication using username and password to protect your API.
    "},{"location":"blog/huggingface/nli/","title":"Host NLI Models Using Geniusrise","text":"
    • Host NLI Models Using Geniusrise
    • Setup and Configuration
    • Understanding Configuration Parameters
    • Use Cases \\& API Interaction
      • 1. Entailment Checking
      • 2. Classification
      • 3. Textual Similarity
      • 4. Fact Checking
      • Customizing for Different NLI Models
    • Fun
      • Intent Tree Search
      • Real-Time Debate Judging
      • Automated Story Plot Analysis
      • Customer Feedback Interpretation
      • Virtual Courtroom Simulation
    • Play Around

    Natural Language Inference (NLI) is like a game where you have to figure out if one sentence can logically follow from another or not. Imagine you hear someone say, \"The dog is sleeping in the sun.\" Then, someone asks if it's true that \"The dog is outside.\" In this game, you'd say \"yes\" because if the dog is sleeping in the sun, it must be outside. Sometimes, the sentences don't match up, like if someone asks if the dog is swimming. You'd say \"no\" because sleeping in the sun doesn't mean swimming. And sometimes, you can't tell, like if someone asks if the dog is dreaming. Since you don't know, you'd say \"maybe.\" NLI is all about playing this matching game with sentences to help computers understand and use language like we do.

    This post will explore setting up APIs for various NLI tasks using Geniusrise, including entailment, classification, textual similarity, and fact-checking. We\u2019ll dive into the configuration details, provide interaction examples, and discuss how to tailor the setup for specific use cases.

    "},{"location":"blog/huggingface/nli/#setup-and-configuration","title":"Setup and Configuration","text":"

    Requirements

    • python 3.10, PPA, AUR, brew, Windows.
    • You need to have a GPU. Most of the system works with NVIDIA GPUs.
    • Install CUDA.

    Optional: Set up a virtual environment:

    virtualenv venv -p `which python3.10`\nsource venv/bin/activate\n

    Installation:

    Start by installing Geniusrise and the necessary text processing extensions:

    pip install geniusrise\npip install geniusrise-text\n

    Configuration (genius.yml):

    To deploy an NLI model, create a genius.yml configuration file:

    version: \"1\"\n\nbolts:\n    my_bolt:\n        name: NLIAPI\n        state:\n            type: none\n        input:\n            type: batch\n            args:\n                input_folder: ./input\n        output:\n            type: batch\n            args:\n                output_folder: ./output\n        method: listen\n        args:\n            model_name: NDugar/ZSD-microsoft-v2xxlmnli\n            model_class: AutoModelForSequenceClassification\n            tokenizer_class: AutoTokenizer\n            use_cuda: true\n            precision: float\n            device_map: cuda:0\n            endpoint: \"0.0.0.0\"\n            port: 3000\n            cors_domain: http://localhost:3000\n            username: user\n            password: password\n

    Launch your API with the command:

    genius rise\n
    "},{"location":"blog/huggingface/nli/#understanding-configuration-parameters","title":"Understanding Configuration Parameters","text":"
    • model_name: Identifies the pre-trained model from Hugging Face to be used.
    • use_cuda: A boolean indicating whether to use GPU acceleration.
    • precision: Determines the computational precision, affecting performance and resource usage.
    • device_map: Specifies GPU allocation for model processing.
    • endpoint & port: Network address and port for API access.
    • username & password: Basic authentication credentials for API security.
    "},{"location":"blog/huggingface/nli/#use-cases-api-interaction","title":"Use Cases & API Interaction","text":""},{"location":"blog/huggingface/nli/#1-entailment-checking","title":"1. Entailment Checking","text":"

    Objective: Assess whether a hypothesis is supported (entailment), contradicted (contradiction), or neither (neutral) by a premise.

    Using curl:

    /usr/bin/curl -X POST localhost:3000/api/v1/entailment \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\n        \"premise\": \"This a very good entry level smartphone, battery last 2-3 days after fully charged when connected to the internet. No memory lag issue when playing simple hidden object games. Performance is beyond my expectation, i bought it with a good bargain, couldnt ask for more!\",\n        \"hypothesis\": \"the phone has an awesome battery life\"\n    }' | jq\n

    Using python-requests:

    import requests\ndata = {\n\"premise\": \"This a very good entry level smartphone, battery last 2-3 days after fully charged when connected to the internet. No memory lag issue when playing simple hidden object games. Performance is beyond my expectation, i bought it with a good bargain, couldnt ask for more!\",\n\"hypothesis\": \"the phone has an awesome battery life\"\n}\nresponse = requests.post(\"http://localhost:3000/api/v1/entailment\",\njson=data,\nauth=('user', 'password'))\nprint(response.json())\n
    "},{"location":"blog/huggingface/nli/#2-classification","title":"2. Classification","text":"

    Objective: Classify a piece of text into predefined categories.

    Using curl:

    curl -X POST http://localhost:3000/api/v1/classify \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\"text\": \"I love playing soccer.\", \"candidate_labels\": [\"sport\", \"cooking\", \"travel\"]}'\n

    Using python-requests:

    import requests\ndata = {\n\"text\": \"I love playing soccer.\",\n\"candidate_labels\": [\"sport\", \"cooking\", \"travel\"]\n}\nresponse = requests.post(\"http://localhost:3000/api/v1/classify\",\njson=data,\nauth=('user', 'password'))\nprint(response.json())\n
    "},{"location":"blog/huggingface/nli/#3-textual-similarity","title":"3. Textual Similarity","text":"

    Objective: Determine the similarity score between two texts.

    Using curl:

    curl -X POST http://localhost:3000/api/v1/textual_similarity \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\"text1\": \"I enjoy swimming.\", \"text2\": \"Swimming is my hobby.\"}'\n

    Using python-requests:

    import requests\ndata = {\n\"text1\": \"I enjoy swimming.\",\n\"text2\": \"Swimming is my hobby.\"\n}\nresponse = requests.post(\"http://localhost:3000/api/v1/textual_similarity\",\njson=data,\nauth=('user', 'password'))\nprint(response.json())\n
    "},{"location":"blog/huggingface/nli/#4-fact-checking","title":"4. Fact Checking","text":"

    Objective: Verify the accuracy of a statement based on provided context or reference material.

    Using curl:

    curl -X POST http://localhost:3000/api/v1/fact_checking \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\"context\": \"The Eiffel Tower is located in Paris.\", \"statement\": \"The Eiffel Tower is in France.\"}'\n

    Using python-requests:

    import requests\ndata = {\n\"context\": \"The Eiffel Tower is located in Paris.\",\n\"statement\": \"The Eiffel Tower is in France.\"\n}\nresponse = requests.post(\"http://localhost:3000/api/v1/fact_checking\",\njson=data,\nauth=('user', 'password'))\nprint(response.json())\n

    Each of these endpoints serves a specific NLI-related purpose, from evaluating logical relationships between texts to classifying and checking facts. By leveraging these APIs, developers can enhance their applications with deep, contextual understanding of natural language.

    "},{"location":"blog/huggingface/nli/#customizing-for-different-nli-models","title":"Customizing for Different NLI Models","text":"

    To deploy APIs for various NLI tasks, simply adjust the model_name in your genius.yml. For instance, to switch to a model optimized for textual similarity or fact-checking, replace microsoft/deberta-v2-xlarge-mnli with the appropriate model identifier.

    "},{"location":"blog/huggingface/nli/#fun","title":"Fun","text":""},{"location":"blog/huggingface/nli/#intent-tree-search","title":"Intent Tree Search","text":"

    NLI when used for zero-shot classification can be used in a large number of contexts. Consider a chat usecase where there is an entire tree of possible scenarios, and you want to identify which node in the tree you're in to feed that particular prompt to another chat model.

    Lets consider a 2-level tree such as this for an internal helpdesk:

    intents = {\n\"IT Support\": [\n\"Computer or hardware issues\",\n\"Software installation and updates\",\n\"Network connectivity problems\",\n\"Access to digital tools and resources\",\n],\n\"HR Inquiries\": [\n\"Leave policy and requests\",\n\"Benefits and compensation queries\",\n\"Employee wellness programs\",\n\"Performance review process\",\n],\n\"Facilities Management\": [\n\"Workspace maintenance requests\",\n\"Meeting room bookings\",\n\"Parking and transportation services\",\n\"Health and safety concerns\",\n],\n\"Finance and Expense\": [\n\"Expense report submission\",\n\"Payroll inquiries\",\n\"Budget allocation questions\",\n\"Procurement process\",\n],\n\"Training and Development\": [\n\"Professional development opportunities\",\n\"Training program schedules\",\n\"Certification and learning resources\",\n\"Mentorship and coaching programs\",\n],\n\"Project Management\": [\n\"Project collaboration tools\",\n\"Deadline extensions and modifications\",\n\"Resource allocation\",\n\"Project status updates\",\n],\n\"Travel and Accommodation\": [\n\"Business travel arrangements\",\n\"Travel policy and reimbursements\",\n\"Accommodation bookings\",\n\"Visa and travel documentation\",\n],\n\"Legal and Compliance\": [\n\"Contract review requests\",\n\"Data privacy and security policies\",\n\"Compliance training and certifications\",\n\"Legal consultation and support\",\n],\n\"Communications and Collaboration\": [\n\"Internal communication platforms\",\n\"Collaboration tools and access\",\n\"Team meeting coordination\",\n\"Cross-departmental initiatives\",\n],\n\"Employee Feedback and Suggestions\": [\n\"Employee satisfaction surveys\",\n\"Feedback submission channels\",\n\"Suggestion box for improvements\",\n\"Employee engagement activities\",\n],\n\"Onboarding and Offboarding\": [\n\"New employee onboarding process\",\n\"Offboarding procedures\",\n\"Orientation schedules\",\n\"Transition support\",\n],\n\"Administrative Assistance\": [\n\"Document and record-keeping\",\n\"Scheduling and calendar management\",\n\"Courier and mailing services\",\n\"Administrative support requests\",\n],\n}\n

    Lets deploy a large model so its more intelligent:

    version: \"1\"\n\nbolts:\n    my_bolt:\n        name: NLIAPI\n        state:\n            type: none\n        input:\n            type: batch\n            args:\n                input_folder: ./input\n        output:\n            type: batch\n            args:\n                output_folder: ./output\n        method: listen\n        args:\n            model_name: facebook/bart-large-mnli\n            model_class: AutoModelForSequenceClassification\n            tokenizer_class: AutoTokenizer\n            use_cuda: true\n            precision: float\n            device_map: cuda:0\n            endpoint: \"0.0.0.0\"\n            port: 3000\n            cors_domain: http://localhost:3000\n            username: user\n            password: password\n

    we can browse through this tree to zero in on the user's micro-intent to retrieve our prompt to feed into the model:

    import requests\nprompt =  \"I need to travel to singapore next week \ud83d\ude03.\"\ndef find_most_probable_class(prompt, intents):\nresponse = requests.post(\"http://localhost:3000/api/v1/classify\",\njson={\"text\": prompt, \"candidate_labels\": intents},\nauth=('user', 'password'))\nlabel_scores = response.json()[\"label_scores\"]\nmax_score = max(label_scores.values())\nchosen_label = [ k for k,v in label_scores.items() if v == max_score ][0]\nreturn chosen_label\nlevel1 = find_most_probable_class(prompt, list(intents.keys()))\nlevel2 = find_most_probable_class(prompt, list(intents[level1]))\nprint(f\"The request is for department: {level1} and specifically for {level2}\")\n# The request is for department: Travel and Accommodation and specifically for Visa and travel documentation\n
    "},{"location":"blog/huggingface/nli/#real-time-debate-judging","title":"Real-Time Debate Judging","text":"

    Imagine a scenario where an AI is used to judge a debate competition in real-time. Each participant's argument is evaluated for logical consistency, relevance, and how well it counters the opponent's previous points.

    debate_points = [\n{\"speaker\": \"Alice\", \"statement\": \"Renewable energy can effectively replace fossil fuels.\"},\n{\"speaker\": \"Bob\", \"statement\": \"Renewable energy is not yet reliable enough to meet all our energy needs.\"},\n]\nfor i in range(1, len(debate_points)):\npremise = debate_points[i-1][\"statement\"]\nhypothesis = debate_points[i][\"statement\"]\nresponse = requests.post(\"http://localhost:3000/api/v1/entailment\",\njson={\"premise\": premise, \"hypothesis\": hypothesis},\nauth=('user', 'password'))\nlabel_scores = response.json()[\"label_scores\"]\nmax_score = max(label_scores.values())\nchosen_label = [ k for k,v in label_scores.items() if v == max_score ][0]\nprint(f\"Debate point by {debate_points[i]['speaker']}: {hypothesis}\")\nprint(f\"Judgement: {chosen_label}\")\n# Debate point by Bob: Renewable energy is not yet reliable enough to meet all our energy needs.\n# Judgement: neutral\n
    "},{"location":"blog/huggingface/nli/#automated-story-plot-analysis","title":"Automated Story Plot Analysis","text":"

    A model can be used to analyze a story plot to determine if the events and characters' decisions are logically consistent and plausible within the story's universe.

    story_events = [\n\"The hero discovers a secret door in their house leading to a magical world.\",\n\"Despite being in a magical world, the hero uses their smartphone to call for help.\",\n\"The hero defeats the villain using a magical sword found in the new world.\",\n]\nfor i in range(1, len(story_events)):\npremise = story_events[i-1]\nhypothesis = story_events[i]\nresponse = requests.post(\"http://localhost:3000/api/v1/entailment\",\njson={\"premise\": premise, \"hypothesis\": hypothesis},\nauth=('user', 'password'))\nlabel_scores = response.json()[\"label_scores\"]\nif \"neutral\" in label_scores:\ndel label_scores[\"neutral\"]\nmax_score = max(label_scores.values())\nchosen_label = [ k for k,v in label_scores.items() if v == max_score ][0]\nprint(f\"Story event - {chosen_label}: {hypothesis}\")\n# Story event - contradiction: Despite being in a magical world, the hero uses their smartphone to call for help.\n# Story event - contradiction: The hero defeats the villain using a magical sword found in the new world.\n
    "},{"location":"blog/huggingface/nli/#customer-feedback-interpretation","title":"Customer Feedback Interpretation","text":"

    This application involves analyzing customer feedback to categorize it into compliments, complaints, or suggestions, providing valuable insights into customer satisfaction and areas for improvement.

    feedbacks = [\n\"The new update makes the app much easier to use. Great job!\",\n\"I've been facing frequent crashes after the last update.\",\n\"It would be great if you could add a dark mode feature.\",\n\"Otherwise you leave me no choice but to slowly torture your soul.\"\n]\ncategories = [\"compliment\", \"complaint\", \"suggestion\", \"murderous intent\"]\nfor feedback in feedbacks:\nresponse = requests.post(\"http://localhost:3000/api/v1/classify\",\njson={\"text\": feedback, \"candidate_labels\": categories},\nauth=('user', 'password'))\nlabel_scores = response.json()[\"label_scores\"]\nmax_score = max(label_scores.values())\nchosen_label = [ k for k,v in label_scores.items() if v == max_score ][0]\nprint(f\"Feedback - {chosen_label}: {feedback}\")\n# Feedback - suggestion: The new update makes the app much easier to use. Great job!\n# Feedback - complaint: I've been facing frequent crashes after the last update.\n# Feedback - suggestion: It would be great if you could add a dark mode feature.\n# Feedback - murderous intent: Otherwise you leave me no choice but to slowly torture your soul.\n
    "},{"location":"blog/huggingface/nli/#virtual-courtroom-simulation","title":"Virtual Courtroom Simulation","text":"

    This is a game where players can simulate courtroom trials! Players submit evidence and arguments, and the AI acts as the judge, determining the credibility and relevance of each submission to the case.

    courtroom_evidence = [\n{\"evidence\": \"The defendant's fingerprints were found on the weapon.\"},\n{\"evidence\": \"A witness reported seeing the defendant near the crime scene.\"},\n]\nfor evidence in courtroom_evidence:\nsubmission = evidence[\"evidence\"]\nresponse = requests.post(\"http://localhost:3000/api/v1/classify\",\njson={\"text\": submission, \"candidate_labels\": [\"highly relevant\", \"relevant\", \"irrelevant\"]},\nauth=('user', 'password'))\nlabel_scores = response.json()[\"label_scores\"]\nmax_score = max(label_scores.values())\nchosen_label = [k for k, v in label_scores.items() if v == max_score][0]\nprint(f\"Evidence submitted: {submission}\")\nprint(f\"Judged as: {chosen_label}\")\n# Evidence submitted: The defendant's fingerprints were found on the weapon.\n# Judged as: highly relevant\n# Evidence submitted: A witness reported seeing the defendant near the crime scene.\n# Judged as: highly relevant\n
    "},{"location":"blog/huggingface/nli/#play-around","title":"Play Around","text":"

    There are 218 models under \"zero-shot-classification\" on the huggingface hub but a simple search for nli turns up 822 models so there are a lot of models that are not tagged properly. NLI is a very interesting and a core NLP task and a few good general models can be turned into a lot of fun!

    "},{"location":"blog/huggingface/ocr/","title":"Host OCR Models Using Geniusrise","text":"

    Optical Character Recognition (OCR) technology has revolutionized the way we process and digitize printed or handwritten documents, making it easier to edit, search, and store textual content in digital formats. Geniusrise facilitates the deployment of OCR models as APIs, enabling developers to integrate OCR capabilities into their applications seamlessly. This guide will demonstrate setting up OCR APIs using Geniusrise, covering the configuration, usage examples, and highlighting different use cases.

    "},{"location":"blog/huggingface/ocr/#setup-and-configuration","title":"Setup and Configuration","text":"

    Installation:

    First, install Geniusrise and its text extension:

    pip install geniusrise\npip install geniusrise-vision\n

    Configuration (genius.yml):

    Create a genius.yml file to define your OCR service:

    version: \"1\"\n\nbolts:\n    my_bolt:\n        name: ImageOCRAPI\n        state:\n            type: none\n        input:\n            type: batch\n            args:\n                input_folder: ./input\n        output:\n            type: batch\n            args:\n                output_folder: ./output\n        method: listen\n        args:\n            model_name: facebook/bart-large-cnn\n            model_class: AutoModelForSeq2SeqLM\n            tokenizer_class: AutoTokenizer\n            use_cuda: true\n            precision: float\n            device_map: cuda:0\n            endpoint: \"0.0.0.0\"\n            port: 3000\n            cors_domain: http://localhost:3000\n            username: user\n            password: password\n

    Activate your API with:

    genius rise\n
    "},{"location":"blog/huggingface/ocr/#configuration-parameters-explained","title":"Configuration Parameters Explained","text":"
    • model_name: Specifies the pre-trained model. For OCR tasks, models like paddleocr, facebook/nougat-base, or easyocr are popular choices.
    • use_cuda: Enables GPU acceleration.
    • precision: Affects performance through computational precision.
    • endpoint & port: Network address and port for API access.
    • username & password: Security credentials for API usage.
    "},{"location":"blog/huggingface/ocr/#using-paddleocr","title":"Using PaddleOCR","text":"

    PaddleOCR offers state-of-the-art accuracy and supports multiple languages, making it a great choice for applications requiring high-performance OCR.

    "},{"location":"blog/huggingface/ocr/#geniusyml-for-paddleocr","title":"genius.yml for PaddleOCR","text":"
    version: \"1\"\nbolts:\nmy_bolt:\nname: ImageOCRAPI\nstate:\ntype: none\ninput:\ntype: batch\nargs:\ninput_folder: ./input\noutput:\ntype: batch\nargs:\noutput_folder: ./output\nmethod: listen\nargs:\nmodel_name: \"paddleocr\"\ndevice_map: \"cuda:0\"\nendpoint: \"0.0.0.0\"\nport: 3000\ncors_domain: \"http://localhost:3000\"\nusername: \"user\"\npassword: \"password\"\n

    This configuration sets up an OCR API using PaddleOCR. After setting up your genius.yml, activate your API by running:

    genius rise\n
    "},{"location":"blog/huggingface/ocr/#using-easyocr","title":"Using EasyOCR","text":"

    EasyOCR is a practical tool that supports more than 80 languages and doesn't require machine learning expertise to implement.

    "},{"location":"blog/huggingface/ocr/#geniusyml-for-easyocr","title":"genius.yml for EasyOCR","text":"
    version: \"1\"\nbolts:\nmy_bolt:\nname: ImageOCRAPI\nstate:\ntype: none\ninput:\ntype: batch\nargs:\ninput_folder: ./input\noutput:\ntype: batch\nargs:\noutput_folder: ./output\nmethod: listen\nargs:\nmodel_name: \"easyocr\"\ndevice_map: \"cuda:0\"\nendpoint: \"0.0.0.0\"\nport: 3000\ncors_domain: \"http://localhost:3000\"\nusername: \"user\"\npassword: \"password\"\n

    This YAML file configures an OCR API utilizing EasyOCR. Like with PaddleOCR, you'll need to execute genius rise to get the API running.

    "},{"location":"blog/huggingface/ocr/#general-api-interaction-examples","title":"General API Interaction Examples","text":"

    Interacting with these OCR APIs can be done through HTTP requests, where you send a base64-encoded image and receive the detected text in response. Here's a generic example on how to send a request to either OCR API configured above:

    "},{"location":"blog/huggingface/ocr/#example-with-curl","title":"Example with curl:","text":"
    (base64 -w 0 path_to_your_image.jpg | awk '{print \"{\\\"image_base64\\\": \\\"\"$0\"\\\"}\"}' > /tmp/image_payload.json)\ncurl -X POST http://localhost:3000/api/v1/ocr \\\n-H \"Content-Type: application/json\" \\\n-u user:password \\\n-d @/tmp/image_payload.json | jq\n
    "},{"location":"blog/huggingface/ocr/#example-with-python-requests","title":"Example with python-requests:","text":"
    import requests\nimport base64\nwith open(\"path_to_your_image.jpg\", \"rb\") as image_file:\nimage_base64 = base64.b64encode(image_file.read()).decode('utf-8')\ndata = {\"image_base64\": image_base64}\nresponse = requests.post(\"http://localhost:3000/api/v1/ocr\",\njson=data,\nauth=('user', 'password'))\nprint(response.json())\n
    "},{"location":"blog/huggingface/ocr/#interacting-with-the-ocr-api","title":"Interacting with the OCR API","text":"

    OCR tasks involve converting images of text into editable and searchable data. Here's how to interact with the OCR API using curl and python-requests:

    "},{"location":"blog/huggingface/ocr/#example-with-curl_1","title":"Example with curl:","text":"
    (base64 -w 0 your_image.jpg | awk '{print \"{\\\"image_base64\\\": \\\"\"$0\"\\\"}\"}' > /tmp/image_payload.json)\ncurl -X POST http://localhost:3000/api/v1/ocr \\\n-H \"Content-Type: application/json\" \\\n-u user:password \\\n-d @/tmp/image_payload.json | jq\n
    "},{"location":"blog/huggingface/ocr/#example-with-python-requests_1","title":"Example with python-requests:","text":"
    import requests\nimport base64\nimage_path = 'your_image.jpg'\nwith open(image_path, 'rb') as image_file:\nimage_base64 = base64.b64encode(image_file.read()).decode('utf-8')\ndata = {\n\"image_base64\": image_base64\n}\nresponse = requests.post(\"http://localhost:3000/api/v1/ocr\",\njson=data,\nauth=('user', 'password'))\nprint(response.json())\n
    "},{"location":"blog/huggingface/ocr/#use-cases-variations","title":"Use Cases & Variations","text":""},{"location":"blog/huggingface/ocr/#different-ocr-models","title":"Different OCR Models","text":"

    To adapt the API for various OCR tasks, such as document digitization, receipt scanning, or handwritten note conversion, you can switch the model_name in your genius.yml:

    • Document OCR: Use models like paddleocr for general document recognition.
    • Handwritten OCR: Opt for models specifically fine-tuned for handwriting, such as facebook/nougat-base.
    • Receipt OCR: Utilize domain-specific models designed for extracting information from receipts or invoices.
    "},{"location":"blog/huggingface/ocr/#customizing-ocr-parameters","title":"Customizing OCR Parameters","text":"

    For advanced OCR needs, additional parameters can be included in your request to customize the OCR process, such as specifying the language, adjusting the resolution, or defining the output format.

    "},{"location":"blog/huggingface/qa/","title":"Host Question Answering Models Using Geniusrise","text":"
    • Host Question Answering Models Using Geniusrise
    • Types of Question Answering Tasks
      • Generative
      • Extractive
      • Why Extractive May be Better
    • Installation and Configuration
    • Understanding genius.yml
    • Use Cases \\& Variations
    • Making API Requests
      • Direct Question Answering API
      • Hugging Face Pipeline API
    • Fun
      • Long contexts
      • Domain-specific
    • Play around

    Deploying question answering (QA) models can significantly enhance the capabilities of applications, providing users with specific, concise answers to their queries. Geniusrise simplifies this process, enabling developers to rapidly set up and deploy QA APIs. This guide will walk you through the steps to create inference APIs for different QA tasks using Geniusrise, focusing on configuring the genius.yml file and providing interaction examples via curl and python-requests.

    "},{"location":"blog/huggingface/qa/#types-of-question-answering-tasks","title":"Types of Question Answering Tasks","text":"

    Before diving into the setup and deployment of question answering (QA) models using Geniusrise, it's essential to understand the two main types of QA tasks: generative and extractive. This distinction is crucial for selecting the right model for your application and configuring your genius.yml file accordingly.

    "},{"location":"blog/huggingface/qa/#generative","title":"Generative","text":"

    Generative QA models are designed to produce answers by generating text based on the context and the question asked. These models do not restrict their responses to the text's snippets but rather \"generate\" a new text passage that answers the question. Generative models are powerful for open-ended questions where the answer may not be directly present in the context or requires synthesis of information from multiple parts of the context.

    "},{"location":"blog/huggingface/qa/#extractive","title":"Extractive","text":"

    Extractive QA models, on the other hand, identify and extract a specific snippet from the provided text that answers the question. This approach is particularly effective for factual questions where the answer is explicitly stated in the text. Extractive QA is advantageous because it limits the model's responses to the actual content of the input text, reducing the chances of hallucination (producing incorrect or unfounded information) that can occur with generative models.

    "},{"location":"blog/huggingface/qa/#why-extractive-may-be-better","title":"Why Extractive May be Better","text":"
    • Accuracy: Extractive QA models provide answers directly sourced from the input text, ensuring that the information is accurate and grounded in the provided context.
    • Reliability: By constraining the answers to the text snippets, extractive QA minimizes the risk of hallucinations, making it a reliable choice for applications where factual correctness is paramount.
    • Efficiency for RAG: Extractive QA tasks can be particularly efficient for Retrieval-Augmented Generation (RAG) because they allow for precise information retrieval without the need for generating new text, which can be computationally more demanding.

    The models discussed in this guide focus on extractive QA tasks, which are particularly well-suited for direct, fact-based question answering from provided texts.

    Extractive QA models are ideal for applications requiring high precision and direct answers from given texts.

    "},{"location":"blog/huggingface/qa/#installation-and-configuration","title":"Installation and Configuration","text":"

    Requirements

    • python 3.10, PPA, AUR, brew, Windows.
    • You need to have a GPU. Most of the system works with NVIDIA GPUs.
    • Install CUDA.

    Optional: Set up a virtual environment:

    virtualenv venv -p `which python3.10`\nsource venv/bin/activate\n

    Step 1: Install Geniusrise

    pip install torch\npip install geniusrise\npip install geniusrise-text\n

    Step 2: Create Your Configuration File (genius.yml)

    version: \"1\"\n\nbolts:\n    my_bolt:\n        name: QAAPI\n        state:\n            type: none\n        input:\n            type: batch\n            args:\n                input_folder: ./input\n        output:\n            type: batch\n            args:\n                output_folder: ./output\n        method: listen\n        args:\n            model_name: deepset/deberta-v3-base-squad2\n            model_class: AutoModelForQuestionAnswering\n            tokenizer_class: AutoTokenizer\n            use_cuda: true\n            precision: float\n            device_map: cuda:0\n            endpoint: \"0.0.0.0\"\n            port: 3000\n            cors_domain: http://localhost:3000\n            username: user\n            password: password\n

    After setting up your genius.yml, launch your API with:

    genius rise\n
    "},{"location":"blog/huggingface/qa/#understanding-geniusyml","title":"Understanding genius.yml","text":"

    Each parameter in the genius.yml file is crucial for customizing your QA API:

    • model_name: The model identifier from Hugging Face, tailored to your specific QA task.
    • use_cuda: Toggle GPU acceleration (true or false). Using GPUs can drastically reduce inference time.
    • precision: Model precision (float for single precision). Adjusting this can affect performance and accuracy, e.g. to bfloat16.
    • device_map: Assigns model parts to specific GPUs, useful for systems with multiple GPUs. cuda:0 implies use GPU 0.
    • endpoint & port: Defines where your API is hosted, allowing for easy access.
    • username & password: Secure your API with basic authentication.
    "},{"location":"blog/huggingface/qa/#use-cases-variations","title":"Use Cases & Variations","text":"

    Replacing Model for Different QA Tasks

    To adapt the API for various QA tasks, simply change the model_name in your genius.yml. For example, to switch to a model specializing in medical QA, you might use bert-large-uncased-whole-word-masking-finetuned-squad for broader coverage of medical inquiries.

    Example genius.yml for a Different Use Case:

    args:\n  model_name: \"bert-large-uncased-whole-word-masking-finetuned-squad\"\n
    "},{"location":"blog/huggingface/qa/#making-api-requests","title":"Making API Requests","text":"

    Geniusrise enables two primary ways to interact with your Question Answering API: through direct question-answering and utilizing the Hugging Face pipeline. Below, we provide examples on how to use both endpoints using curl and python-requests.

    "},{"location":"blog/huggingface/qa/#direct-question-answering-api","title":"Direct Question Answering API","text":"

    This API endpoint directly answers questions based on the provided context.

    Using curl:

    /usr/bin/curl -X POST localhost:3000/api/v1/answer \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\n        \"data\": \"Theres something magical about Recurrent Neural Networks (RNNs). I still remember when I trained my first recurrent network for Image Captioning. Within a few dozen minutes of training my first baby model (with rather arbitrarily-chosen hyperparameters) started to generate very nice looking descriptions of images that were on the edge of making sense. Sometimes the ratio of how simple your model is to the quality of the results you get out of it blows past your expectations, and this was one of those times. What made this result so shocking at the time was that the common wisdom was that RNNs were supposed to be difficult to train (with more experience Ive in fact reached the opposite conclusion). Fast forward about a year: Im training RNNs all the time and Ive witnessed their power and robustness many times, and yet their magical outputs still find ways of amusing me.\",\n        \"question\": \"What is the common wisdom about RNNs?\"\n    }' | jq\n

    Using python-requests:

    import requests\ndata = {\n\"data\": \"Theres something magical about Recurrent Neural Networks (RNNs). I still remember when I trained my first recurrent network for Image Captioning. Within a few dozen minutes of training my first baby model (with rather arbitrarily-chosen hyperparameters) started to generate very nice looking descriptions of images that were on the edge of making sense. Sometimes the ratio of how simple your model is to the quality of the results you get out of it blows past your expectations, and this was one of those times. What made this result so shocking at the time was that the common wisdom was that RNNs were supposed to be difficult to train (with more experience Ive in fact reached the opposite conclusion). Fast forward about a year: Im training RNNs all the time and Ive witnessed their power and robustness many times, and yet their magical outputs still find ways of amusing me.\",\n\"question\": \"What is the common wisdom about RNNs?\"\n}\nresponse = requests.post(\"http://localhost:3000/api/v1/answer\",\njson=data,\nauth=('user', 'password'))\nprint(response.json())\n
    "},{"location":"blog/huggingface/qa/#hugging-face-pipeline-api","title":"Hugging Face Pipeline API","text":"

    This API endpoint leverages the Hugging Face pipeline for answering questions, offering a streamlined way to use pre-trained models for question answering.

    Using curl:

    curl -X POST http://localhost:3000/api/v1/answer_pipeline \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\"question\": \"Who created Geniusrise?\", \"data\": \"Geniusrise was created by a team of dedicated developers.\"}'\n

    Using python-requests:

    import requests\ndata = {\n\"question\": \"Who created Geniusrise?\",\n\"data\": \"Geniusrise was created by a team of dedicated developers.\"\n}\nresponse = requests.post(\"http://localhost:3000/api/v1/answer_pipeline\",\njson=data,\nauth=('user', 'password'))\nprint(response.json())\n
    "},{"location":"blog/huggingface/qa/#fun","title":"Fun","text":""},{"location":"blog/huggingface/qa/#long-contexts","title":"Long contexts","text":"

    An usual problem that faces QA models is small context sizes. This limits the model's capabilities for processing large documents or large amounts of text in their inputs. Though language models keep getting bigger contexts, QA models on the other hand tend to be much smaller and support smaller contexts.

    However there are exceptions like this one:

    version: \"1\"\n\nbolts:\n    my_bolt:\n        name: QAAPI\n        state:\n            type: none\n        input:\n            type: batch\n            args:\n                input_folder: ./input\n        output:\n            type: batch\n            args:\n                output_folder: ./output\n        method: listen\n        args:\n            model_name: valhalla/longformer-base-4096-finetuned-squadv1\n            model_class: AutoModelForQuestionAnswering\n            tokenizer_class: AutoTokenizer\n            use_cuda: true\n            precision: float\n            device_map: cuda:0\n            endpoint: \"0.0.0.0\"\n            port: 3000\n            cors_domain: http://localhost:3000\n            username: user\n            password: password\n
    /usr/bin/curl -X POST localhost:3000/api/v1/answer \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\n        \"data\": \"Theres something magical about Recurrent Neural Networks (RNNs). I still remember when I trained my first recurrent network for Image Captioning. Within a few dozen minutes of training my first baby model (with rather arbitrarily-chosen hyperparameters) started to generate very nice looking descriptions of images that were on the edge of making sense. Sometimes the ratio of how simple your model is to the quality of the results you get out of it blows past your expectations, and this was one of those times. What made this result so shocking at the time was that the common wisdom was that RNNs were supposed to be difficult to train (with more experience Ive in fact reached the opposite conclusion). Fast forward about a year: Im training RNNs all the time and Ive witnessed their power and robustness many times, and yet their magical outputs still find ways of amusing me. This post is about sharing some of that magic with you. By the way, together with this post I am also releasing code on Github that allows you to train character-level language models based on multi-layer LSTMs. You give it a large chunk of text and it will learn to generate text like it one character at a time. You can also use it to reproduce my experiments below. But we\u2019re getting ahead of ourselves; What are RNNs anyway? Recurrent Neural Networks Sequences. Depending on your background you might be wondering: What makes Recurrent Networks so special? A glaring limitation of Vanilla Neural Networks (and also Convolutional Networks) is that their API is too constrained: they accept a fixed-sized vector as input (e.g. an image) and produce a fixed-sized vector as output (e.g. probabilities of different classes). Not only that: These models perform this mapping using a fixed amount of computational steps (e.g. the number of layers in the model). The core reason that recurrent nets are more exciting is that they allow us to operate over sequences of vectors: Sequences in the input, the output, or in the most general case both. A few examples may make this more concrete: Each rectangle is a vector and arrows represent functions (e.g. matrix multiply). Input vectors are in red, output vectors are in blue and green vectors hold the RNNs state (more on this soon). From left to right: (1) Vanilla mode of processing without RNN, from fixed-sized input to fixed-sized output (e.g. image classification). (2) Sequence output (e.g. image captioning takes an image and outputs a sentence of words). (3) Sequence input (e.g. sentiment analysis where a given sentence is classified as expressing positive or negative sentiment). (4) Sequence input and sequence output (e.g. Machine Translation: an RNN reads a sentence in English and then outputs a sentence in French). (5) Synced sequence input and output (e.g. video classification where we wish to label each frame of the video). Notice that in every case are no pre-specified constraints on the lengths sequences because the recurrent transformation (green) is fixed and can be applied as many times as we like. As you might expect, the sequence regime of operation is much more powerful compared to fixed networks that are doomed from the get-go by a fixed number of computational steps, and hence also much more appealing for those of us who aspire to build more intelligent systems. Moreover, as we\u2019ll see in a bit, RNNs combine the input vector with their state vector with a fixed (but learned) function to produce a new state vector. This can in programming terms be interpreted as running a fixed program with certain inputs and some internal variables. Viewed this way, RNNs essentially describe programs. In fact, it is known that RNNs are Turing-Complete in the sense that they can to simulate arbitrary programs (with proper weights). But similar to universal approximation theorems for neural nets you shouldn\u2019t read too much into this. In fact, forget I said anything.\",\n        \"question\": \"What do the models essentially do?\"\n    }' | jq\n\n# {\n#   \"data\": \"Theres something magical about Recurrent Neural Networks (RNNs). I still remember when I trained my first recurrent network for Image Captioning. Within a few dozen minutes of training my first baby model (with rather arbitrarily-chosen hyperparameters) started to generate very nice looking descriptions of images that were on the edge of making sense. Sometimes the ratio of how simple your model is to the quality of the results you get out of it blows past your expectations, and this was one of those times. What made this result so shocking at the time was that the common wisdom was that RNNs were supposed to be difficult to train (with more experience Ive in fact reached the opposite conclusion). Fast forward about a year: Im training RNNs all the time and Ive witnessed their power and robustness many times, and yet their magical outputs still find ways of amusing me. This post is about sharing some of that magic with you. By the way, together with this post I am also releasing code on Github that allows you to train character-level language models based on multi-layer LSTMs. You give it a large chunk of text and it will learn to generate text like it one character at a time. You can also use it to reproduce my experiments below. But we\u2019re getting ahead of ourselves; What are RNNs anyway? Recurrent Neural Networks Sequences. Depending on your background you might be wondering: What makes Recurrent Networks so special? A glaring limitation of Vanilla Neural Networks (and also Convolutional Networks) is that their API is too constrained: they accept a fixed-sized vector as input (e.g. an image) and produce a fixed-sized vector as output (e.g. probabilities of different classes). Not only that: These models perform this mapping using a fixed amount of computational steps (e.g. the number of layers in the model). The core reason that recurrent nets are more exciting is that they allow us to operate over sequences of vectors: Sequences in the input, the output, or in the most general case both. A few examples may make this more concrete: Each rectangle is a vector and arrows represent functions (e.g. matrix multiply). Input vectors are in red, output vectors are in blue and green vectors hold the RNNs state (more on this soon). From left to right: (1) Vanilla mode of processing without RNN, from fixed-sized input to fixed-sized output (e.g. image classification). (2) Sequence output (e.g. image captioning takes an image and outputs a sentence of words). (3) Sequence input (e.g. sentiment analysis where a given sentence is classified as expressing positive or negative sentiment). (4) Sequence input and sequence output (e.g. Machine Translation: an RNN reads a sentence in English and then outputs a sentence in French). (5) Synced sequence input and output (e.g. video classification where we wish to label each frame of the video). Notice that in every case are no pre-specified constraints on the lengths sequences because the recurrent transformation (green) is fixed and can be applied as many times as we like. As you might expect, the sequence regime of operation is much more powerful compared to fixed networks that are doomed from the get-go by a fixed number of computational steps, and hence also much more appealing for those of us who aspire to build more intelligent systems. Moreover, as we\u2019ll see in a bit, RNNs combine the input vector with their state vector with a fixed (but learned) function to produce a new state vector. This can in programming terms be interpreted as running a fixed program with certain inputs and some internal variables. Viewed this way, RNNs essentially describe programs. In fact, it is known that RNNs are Turing-Complete in the sense that they can to simulate arbitrary programs (with proper weights). But similar to universal approximation theorems for neural nets you shouldn\u2019t read too much into this. In fact, forget I said anything.\",\n#   \"question\": \"What do the models essentially do?\",\n#   \"answer\": {\n#     \"answers\": [\n#       \"they allow us to operate over sequences of vectors\" <---\n#     ],\n#     \"aggregation\": \"NONE\"\n#   }\n# }\n
    "},{"location":"blog/huggingface/qa/#domain-specific","title":"Domain-specific","text":"

    QA models can also be trained to be better at answering questions at chosen domains. This one is optimized for healthcare:

    version: \"1\"\n\nbolts:\n    my_bolt:\n        name: QAAPI\n        state:\n            type: none\n        input:\n            type: batch\n            args:\n                input_folder: ./input\n        output:\n            type: batch\n            args:\n                output_folder: ./output\n        method: listen\n        args:\n            model_name: dmis-lab/biobert-large-cased-v1.1-squad\n            model_class: AutoModelForQuestionAnswering\n            tokenizer_class: AutoTokenizer\n            use_cuda: true\n            precision: float\n            device_map: cuda:0\n            endpoint: \"0.0.0.0\"\n            port: 3000\n            cors_domain: http://localhost:3000\n            username: user\n            password: password\n
    /usr/bin/curl -X POST localhost:3000/api/v1/answer \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\n        \"data\": \"The choice of medication or combination of medications depends on various factors, including your personal risk factors, your age, your health and possible drug side effects. Common choices include:  Statins. Statins block a substance your liver needs to make cholesterol. This causes your liver to remove cholesterol from your blood. Choices include atorvastatin, fluvastatin, lovastatin, pitavastatin, rosuvastatin and simvastatin. Cholesterol absorption inhibitors. The drug ezetimibe helps reduce blood cholesterol by limiting the absorption of dietary cholesterol. Ezetimibe can be used with a statin drug. Bempedoic acid. This newer drug works in much the same way as statins but is less likely to cause muscle pain. Adding bempedoic acid to a maximum statin dosage can help lower LDL significantly. A combination pill containing both bempedoic acid and ezetimibe also is available. Bile-acid-binding resins. Your liver uses cholesterol to make bile acids, a substance needed for digestion. The medications cholestyramine, colesevelam and colestipol lower cholesterol indirectly by binding to bile acids. This prompts your liver to use excess cholesterol to make more bile acids, which reduces the level of cholesterol in your blood. PCSK9 inhibitors. These drugs can help the liver absorb more LDL cholesterol, which lowers the amount of cholesterol circulating in your blood. Alirocumab and evolocumab might be used for people who have a genetic condition that causes very high levels of LDL or in people with a history of coronary disease who have intolerance to statins or other cholesterol medications. They are injected under the skin every few weeks and are expensive. Medications for high triglycerides If you also have high triglycerides, your doctor might prescribe:  Fibrates. The medications fenofibrate and gemfibrozil reduce your liver s production of very-low-density lipoprotein cholesterol and speed the removal of triglycerides from your blood. VLDL cholesterol contains mostly triglycerides. Using fibrates with a statin can increase the risk of statin side effects. Omega-3 fatty acid supplements. Omega-3 fatty acid supplements can help lower your triglycerides. They are available by prescription or over-the-counter.\",\n        \"question\": \"What do i take if i have high VLDL?\"\n    }' | jq\n\n# {\n#   \"data\": \"The choice of medication or combination of medications depends on various factors, including your personal risk factors, your age, your health and possible drug side effects. Common choices include:  Statins. Statins block a substance your liver needs to make cholesterol. This causes your liver to remove cholesterol from your blood. Choices include atorvastatin, fluvastatin, lovastatin, pitavastatin, rosuvastatin and simvastatin. Cholesterol absorption inhibitors. The drug ezetimibe helps reduce blood cholesterol by limiting the absorption of dietary cholesterol. Ezetimibe can be used with a statin drug. Bempedoic acid. This newer drug works in much the same way as statins but is less likely to cause muscle pain. Adding bempedoic acid to a maximum statin dosage can help lower LDL significantly. A combination pill containing both bempedoic acid and ezetimibe also is available. Bile-acid-binding resins. Your liver uses cholesterol to make bile acids, a substance needed for digestion. The medications cholestyramine, colesevelam and colestipol lower cholesterol indirectly by binding to bile acids. This prompts your liver to use excess cholesterol to make more bile acids, which reduces the level of cholesterol in your blood. PCSK9 inhibitors. These drugs can help the liver absorb more LDL cholesterol, which lowers the amount of cholesterol circulating in your blood. Alirocumab and evolocumab might be used for people who have a genetic condition that causes very high levels of LDL or in people with a history of coronary disease who have intolerance to statins or other cholesterol medications. They are injected under the skin every few weeks and are expensive. Medications for high triglycerides If you also have high triglycerides, your doctor might prescribe:  Fibrates. The medications fenofibrate and gemfibrozil reduce your liver s production of very-low-density lipoprotein cholesterol and speed the removal of triglycerides from your blood. VLDL cholesterol contains mostly triglycerides. Using fibrates with a statin can increase the risk of statin side effects. Omega-3 fatty acid supplements. Omega-3 fatty acid supplements can help lower your triglycerides. They are available by prescription or over-the-counter.\",\n#   \"question\": \"What do i take if i have high VLDL?\",\n#   \"answer\": {\n#     \"answers\": [\n#       \"fibrates\"  <-------\n#     ],\n#     \"aggregation\": \"NONE\"\n#   }\n# }\n

    Now there are also models like the sloshed lawyer but they are not recommended in production \ud83d\ude06

    "},{"location":"blog/huggingface/qa/#play-around","title":"Play around","text":"

    There are 9,593 QA models huggingface, go exlpore!

    "},{"location":"blog/huggingface/segment/","title":"Host Segmentation Models Using Geniusrise","text":"

    Segmentation models are pivotal in computer vision, allowing developers to delineate and understand the context within images by classifying each pixel into a set category. This capability is crucial for tasks ranging from autonomous driving to medical imaging. Geniusrise enables easy deployment of segmentation models as APIs, facilitating the integration of advanced vision capabilities into applications. This guide will demonstrate how to set up APIs for various segmentation tasks using Geniusrise, including semantic segmentation, panoptic segmentation, and instance segmentation.

    "},{"location":"blog/huggingface/segment/#setup-and-configuration","title":"Setup and Configuration","text":"

    Installation:

    To begin, ensure that Geniusrise and its text extension are installed:

    pip install geniusrise\npip install geniusrise-vision\n

    Configuration (genius.yml):

    Define your segmentation service in a genius.yml file. Here's an example for setting up a semantic segmentation model:

    version: \"1\"\nbolts:\nmy_bolt:\nname: VisionSegmentationAPI\nstate:\ntype: none\ninput:\ntype: batch\nargs:\ninput_folder: ./input\noutput:\ntype: batch\nargs:\noutput_folder: ./output\nmethod: listen\nargs:\nmodel_name: \"facebook/mask2former-swin-large-ade-panoptic\"\nmodel_class: \"Mask2FormerForUniversalSegmentation\"\nprocessor_class: \"AutoImageProcessor\"\ndevice_map: \"cuda:0\"\nuse_cuda: true\nprecision: \"float\"\nendpoint: \"0.0.0.0\"\nport: 3000\ncors_domain: \"http://localhost:3000\"\nusername: \"user\"\npassword: \"password\"\n

    Activate your API by running:

    genius rise\n
    "},{"location":"blog/huggingface/segment/#configuration-parameters-explained","title":"Configuration Parameters Explained","text":"
    • model_name: The pre-trained model identifier, adaptable based on the segmentation task (semantic, panoptic, instance).
    • model_class & processor_class: Specify the model and processor classes, essential for interpreting and processing images.
    • device_map & use_cuda: Configure GPU acceleration for enhanced processing speed.
    • endpoint, port, username, & password: Network settings and authentication for API access.
    "},{"location":"blog/huggingface/segment/#interacting-with-the-segmentation-api","title":"Interacting with the Segmentation API","text":"

    The interaction involves sending a base64-encoded image to the API and receiving segmented output. Here's how to execute this using curl and python-requests:

    "},{"location":"blog/huggingface/segment/#example-with-curl","title":"Example with curl:","text":"
    (base64 -w 0 your_image.jpg | awk '{print \"{\\\"image_base64\\\": \\\"\"$0\"\\\", \\\"subtask\\\": \\\"semantic\\\"}\"}' > /tmp/image_payload.json)\ncurl -X POST http://localhost:3000/api/v1/segment_image \\\n-H \"Content-Type: application/json\" \\\n-u user:password \\\n-d @/tmp/image_payload.json | jq\n
    "},{"location":"blog/huggingface/segment/#example-with-python-requests","title":"Example with python-requests:","text":"
    import requests\nimport base64\nwith open(\"your_image.jpg\", \"rb\") as image_file:\nimage_base64 = base64.b64encode(image_file.read()).decode('utf-8')\ndata = {\n\"image_base64\": image_base64,\n\"subtask\": \"semantic\"  # or \"panoptic\" for panoptic segmentation\n}\nresponse = requests.post(\"http://localhost:3000/api/v1/segment_image\",\njson=data,\nauth=('user', 'password'))\nprint(response.json())\n
    "},{"location":"blog/huggingface/segment/#use-cases-variations","title":"Use Cases & Variations","text":""},{"location":"blog/huggingface/segment/#different-segmentation-tasks","title":"Different Segmentation Tasks","text":"

    By modifying the subtask parameter, you can tailor the API for various segmentation models:

    • Semantic Segmentation: Classifies each pixel into a predefined category. Useful in urban scene understanding and medical image analysis.
    • Panoptic Segmentation: Combines semantic and instance segmentation, identifying and delineating each object instance. Ideal for detailed scene analysis.
    • Instance Segmentation: Identifies each instance of each object category. Used in scenarios requiring precise object boundaries.
    "},{"location":"blog/huggingface/segment/#customizing-segmentation-parameters","title":"Customizing Segmentation Parameters","text":"

    For advanced segmentation needs, additional parameters can be included in your request to customize the processing, such as specifying the output resolution or the segmentation task (semantic, panoptic, instance).

    "},{"location":"blog/huggingface/speak/","title":"Host Text to Speech Models Using Geniusrise","text":"

    Text to Speech (TTS) technology has transformed how we interact with digital devices, making information more accessible and enhancing user experiences. Geniusrise simplifies the deployment of TTS models as APIs, allowing developers to incorporate high-quality voice synthesis into their applications. This guide focuses on setting up TTS APIs with Geniusrise, showcasing various use cases and providing examples to help you get started.

    "},{"location":"blog/huggingface/speak/#quick-setup","title":"Quick Setup","text":"

    Installation:

    Begin by installing Geniusrise and its dependencies:

    pip install geniusrise\npip install geniusrise-vision\n

    Configuration File (genius.yml):

    Define your TTS API using a genius.yml file. Here's a basic example:

    version: \"1\"\nbolts:\nmy_bolt:\nname: TextToSpeechAPI\nstate:\ntype: none\ninput:\ntype: batch\nargs:\ninput_folder: ./input\noutput:\ntype: batch\nargs:\noutput_folder: ./output\nmethod: listen\nargs:\nmodel_name: \"facebook/mms-tts-eng\"\nmodel_class: \"VitsModel\"\nprocessor_class: \"VitsTokenizer\"\nuse_cuda: True\nprecision: \"float32\"\ndevice_map: \"cuda:0\"\nendpoint: \"*\"\nport: 3000\ncors_domain: \"http://localhost:3000\"\nusername: \"user\"\npassword: \"password\"\n

    This configuration sets up an API for Facebook's MMS TTS English model.

    "},{"location":"blog/huggingface/speak/#interacting-with-your-api","title":"Interacting with Your API","text":"

    Convert text to speech by making a POST request to your API. Here's how to do it using curl:

    curl -X POST localhost:3000/api/v1/synthesize \\\n-H \"Content-Type: application/json\" \\\n-u user:password \\\n-d '{\"text\": \"Your text here.\", \"output_type\": \"mp3\"}' \\\n| jq -r '.audio_file' | base64 -d > output.mp3 && vlc output.mp3\n
    "},{"location":"blog/huggingface/speak/#use-cases-variations","title":"Use Cases & Variations","text":""},{"location":"blog/huggingface/speak/#multilingual-support","title":"Multilingual Support","text":"

    Deploy models capable of synthesizing speech in multiple languages. Modify the model_name and add tgt_lang parameters to target different languages.

    "},{"location":"blog/huggingface/speak/#voice-personalization","title":"Voice Personalization","text":"

    Some models support different voice presets. Use the voice_preset parameter to select various voices, adjusting tone and style to fit your application's context.

    "},{"location":"blog/huggingface/speak/#high-quality-synthesis","title":"High-Quality Synthesis","text":"

    For applications requiring high-fidelity audio, select models optimized for quality, such as facebook/seamless-m4t-v2-large. These models often have larger sizes but produce more natural and clear voice outputs.

    "},{"location":"blog/huggingface/speak/#real-time-applications","title":"Real-Time Applications","text":"

    For real-time TTS needs, focus on models with lower latency. Configuration options like use_cuda for GPU acceleration and precision adjustments can help reduce response times.

    "},{"location":"blog/huggingface/speak/#configuration-tips","title":"Configuration Tips","text":"
    • Model Selection: Experiment with various models to find the best fit for your application's language, quality, and performance requirements.
    • Security: Use the username and password fields to secure your API endpoint.
    • Resource Management: Adjust precision, quantization, and device_map settings based on your server's capabilities and your application's needs.
    "},{"location":"blog/huggingface/speech/","title":"Host Speech to Text Models Using Geniusrise","text":"

    Speech to Text (STT) technology has become a cornerstone in creating accessible and efficient user interfaces. Geniusrise offers a streamlined approach to deploying STT models as APIs, enabling developers to integrate speech recognition capabilities into their applications with ease. This post will guide you through setting up STT APIs using Geniusrise, highlighting various use cases and providing practical examples.

    "},{"location":"blog/huggingface/speech/#quick-setup","title":"Quick Setup","text":"

    Installation:

    Before you start, make sure you have Geniusrise installed:

    pip install geniusrise\npip install geniusrise-vision\n

    Configuration File (genius.yml):

    Create a genius.yml configuration file to define your STT API's specifications. Here\u2019s an example configuration:

    version: \"1\"\nbolts:\nmy_bolt:\nname: SpeechToTextAPI\nstate:\ntype: none\ninput:\ntype: batch\nargs:\ninput_folder: ./input\noutput:\ntype: batch\nargs:\noutput_folder: ./output\nmethod: listen\nargs:\nmodel_name: \"openai/whisper-large-v3\"\nmodel_class: \"WhisperForConditionalGeneration\"\nprocessor_class: \"AutoProcessor\"\nuse_cuda: True\nprecision: \"float32\"\ndevice_map: \"cuda:0\"\nendpoint: \"*\"\nport: 3000\ncors_domain: \"http://localhost:3000\"\nusername: \"user\"\npassword: \"password\"\n

    This configuration launches an STT API using OpenAI's Whisper model.

    "},{"location":"blog/huggingface/speech/#api-interaction","title":"API Interaction","text":"

    To interact with your STT API, encode your audio file in base64 format and construct a JSON payload. Below are examples using curl:

    # Encode your audio file to base64 and create the payload\nbase64 -w 0 sample.mp3 | awk '{print \"{\\\"audio_file\\\": \\\"\"$0\"\\\", \\\"model_sampling_rate\\\": 16000}\"}' > payload.json\n\n# Send the request to your API\ncurl -X POST http://localhost:3000/api/v1/transcribe \\\n-H \"Content-Type: application/json\" \\\n-u user:password \\\n-d @payload.json | jq\n
    "},{"location":"blog/huggingface/speech/#use-cases-variations","title":"Use Cases & Variations","text":""},{"location":"blog/huggingface/speech/#general-speech-recognition","title":"General Speech Recognition","text":"

    Deploy models like openai/whisper-large-v3 for broad speech recognition tasks across various languages and domains.

    "},{"location":"blog/huggingface/speech/#specialized-transcription","title":"Specialized Transcription","text":"

    For specialized domains, such as medical or legal transcription, tailor your genius.yml to utilize domain-specific models to improve accuracy.

    "},{"location":"blog/huggingface/speech/#long-audio-files","title":"Long Audio Files","text":"

    Handling long audio files efficiently requires chunking the audio into manageable pieces. Adjust chunk_size in your configuration to enable this feature.

    "},{"location":"blog/huggingface/speech/#real-time-transcription","title":"Real-time Transcription","text":"

    For real-time applications, consider models optimized for speed and responsiveness. Adjust endpoint, port, and device_map accordingly to minimize latency.

    "},{"location":"blog/huggingface/speech/#advanced-configuration-tips","title":"Advanced Configuration Tips","text":"
    • Model Selection: Experiment with different models to find the one that best suits your needs. Geniusrise supports a wide range of STT models.
    • Precision and Performance: Adjust the precision and use_cuda settings to balance between transcription accuracy and resource utilization.
    • Security: Use username and password in your configuration to secure your API endpoint.
    "},{"location":"blog/huggingface/summz/","title":"Host Summarization Models Using Geniusrise","text":"

    In today's fast-paced world, the ability to condense large texts into concise summaries is invaluable. Geniusrise provides a streamlined approach to deploying summarization models as APIs, enabling developers to integrate summarization capabilities directly into their applications. This guide will walk you through setting up, configuring, and interacting with a summarization API using Geniusrise, highlighting various use cases and how to adapt the configuration for different models.

    "},{"location":"blog/huggingface/summz/#setup-and-configuration","title":"Setup and Configuration","text":"

    Installation:

    Begin by installing Geniusrise and its text module:

    pip install geniusrise\npip install geniusrise-text\n

    Configuration (genius.yml):

    Create a genius.yml to define your summarization service:

    version: \"1\"\n\nbolts:\n    my_bolt:\n        name: SummarizationAPI\n        state:\n            type: none\n        input:\n            type: batch\n            args:\n                input_folder: ./input\n        output:\n            type: batch\n            args:\n                output_folder: ./output\n        method: listen\n        args:\n            model_name: facebook/bart-large-cnn\n            model_class: AutoModelForSeq2SeqLM\n            tokenizer_class: AutoTokenizer\n            use_cuda: true\n            precision: float\n            device_map: cuda:0\n            endpoint: \"0.0.0.0\"\n            port: 3000\n            cors_domain: http://localhost:3000\n            username: user\n            password: password\n

    Activate your API with:

    genius rise\n
    "},{"location":"blog/huggingface/summz/#configuration-parameters-explained","title":"Configuration Parameters Explained","text":"
    • model_name: Specifies the pre-trained model, such as facebook/bart-large-cnn for summarization.
    • use_cuda: Utilizes GPU acceleration for faster processing.
    • precision: Controls computational precision, affecting performance.
    • endpoint & port: Network address and port for API access.
    • username & password: Basic authentication for API security.
    "},{"location":"blog/huggingface/summz/#interacting-with-the-summarization-api","title":"Interacting with the Summarization API","text":""},{"location":"blog/huggingface/summz/#summarizing-text","title":"Summarizing Text","text":"

    You can summarize text by making HTTP requests to your API.

    Example with curl:

    /usr/bin/curl -X POST localhost:3000/api/v1/summarize \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\n        \"text\": \"Theres something magical about Recurrent Neural Networks (RNNs). I still remember when I trained my first recurrent network for Image Captioning. Within a few dozen minutes of training my first baby model (with rather arbitrarily-chosen hyperparameters) started to generate very nice looking descriptions of images that were on the edge of making sense. Sometimes the ratio of how simple your model is to the quality of the results you get out of it blows past your expectations, and this was one of those times. What made this result so shocking at the time was that the common wisdom was that RNNs were supposed to be difficult to train (with more experience Ive in fact reached the opposite conclusion). Fast forward about a year: Im training RNNs all the time and Ive witnessed their power and robustness many times, and yet their magical outputs still find ways of amusing me.\",\n        \"decoding_strategy\": \"generate\",\n        \"bos_token_id\": 0,\n        \"decoder_start_token_id\": 2,\n        \"early_stopping\": true,\n        \"eos_token_id\": 2,\n        \"forced_bos_token_id\": 0,\n        \"forced_eos_token_id\": 2,\n        \"length_penalty\": 2.0,\n        \"max_length\": 142,\n        \"min_length\": 56,\n        \"no_repeat_ngram_size\": 3,\n        \"num_beams\": 4,\n        \"pad_token_id\": 1,\n        \"do_sample\": false\n    }' | jq\n

    Example with python-requests:

    import requests\ndata = {\n\"text\": \"Theres something magical about Recurrent Neural Networks (RNNs). I still remember when I trained my first recurrent network for Image Captioning. Within a few dozen minutes of training my first baby model (with rather arbitrarily-chosen hyperparameters) started to generate very nice looking descriptions of images that were on the edge of making sense. Sometimes the ratio of how simple your model is to the quality of the results you get out of it blows past your expectations, and this was one of those times. What made this result so shocking at the time was that the common wisdom was that RNNs were supposed to be difficult to train (with more experience Ive in fact reached the opposite conclusion). Fast forward about a year: Im training RNNs all the time and Ive witnessed their power and robustness many times, and yet their magical outputs still find ways of amusing me.\",\n\"decoding_strategy\": \"generate\",\n\"bos_token_id\": 0,\n\"decoder_start_token_id\": 2,\n\"early_stopping\": true,\n\"eos_token_id\": 2,\n\"forced_bos_token_id\": 0,\n\"forced_eos_token_id\": 2,\n\"length_penalty\": 2.0,\n\"max_length\": 142,\n\"min_length\": 56,\n\"no_repeat_ngram_size\": 3,\n\"num_beams\": 4,\n\"pad_token_id\": 1,\n\"do_sample\": false\n}\nresponse = requests.post(\"http://localhost:3000/api/v1/summarize\",\njson=data,\nauth=('user', 'password'))\nprint(response.json())\n
    "},{"location":"blog/huggingface/summz/#advanced-summarization-features","title":"Advanced Summarization Features","text":"

    For use cases requiring specific summarization strategies or adjustments (e.g., length penalty, no repeat ngram size), additional parameters can be included in your request to customize the summarization output.

    "},{"location":"blog/huggingface/summz/#use-cases-variations","title":"Use Cases & Variations","text":""},{"location":"blog/huggingface/summz/#different-summarization-models","title":"Different Summarization Models","text":"

    To cater to various summarization needs, such as domain-specific texts or languages, simply adjust the model_name in your genius.yml. For example, for summarizing scientific papers, you might choose a model like allenai/longformer-base-4096.

    "},{"location":"blog/huggingface/summz/#customizing-summarization-parameters","title":"Customizing Summarization Parameters","text":"

    Adjust summarization parameters such as max_length, min_length, and num_beams to fine-tune the output based on the specific requirements of your application.

    "},{"location":"blog/huggingface/table_qa/","title":"Host Table Question Answering Models Using Geniusrise","text":"
    • Host Table Question Answering Models Using Geniusrise
    • Setup and Configuration
    • Understanding genius.yml Parameters
    • Use Cases \\& Variations
      • Changing the Model for Different Table QA Tasks
      • Example genius.yml for tabular fact-checking:
    • Interacting with Your API
      • Table QA
      • Utilizing the Hugging Face Pipeline
    • Fun
      • Executing SQL on data
      • Query generators
    • Play around

    Deploying table question answering (QA) models is a sophisticated task that Geniusrise simplifies for developers. This guide aims to demonstrate how you can use Geniusrise to set up and run APIs for table QA, a crucial functionality for extracting structured information from tabular data. We'll cover the setup process, explain the parameters in the genius.yml file with examples, and provide code snippets for interacting with your API using curl and python-requests.

    "},{"location":"blog/huggingface/table_qa/#setup-and-configuration","title":"Setup and Configuration","text":"

    Requirements

    • python 3.10, PPA, AUR, brew, Windows.
    • You need to have a GPU. Most of the system works with NVIDIA GPUs.
    • Install CUDA.

    Optional: Set up a virtual environment:

    virtualenv venv -p `which python3.10`\nsource venv/bin/activate\n

    Step 1: Install Geniusrise

    pip install geniusrise\npip install geniusrise-text\n

    Step 2: Configure Your API

    Create a genius.yml file to define the settings of your table QA API.

    version: \"1\"\n\nbolts:\n    my_bolt:\n        name: QAAPI\n        state:\n            type: none\n        input:\n            type: batch\n            args:\n                input_folder: ./input\n        output:\n            type: batch\n            args:\n                output_folder: ./output\n        method: listen\n        args:\n            model_name: google/tapas-base-finetuned-wtq\n            model_class: AutoModelForTableQuestionAnswering\n            tokenizer_class: AutoTokenizer\n            use_cuda: true\n            precision: float\n            device_map: cuda:0\n            endpoint: \"0.0.0.0\"\n            port: 3000\n            cors_domain: http://localhost:3000\n            username: user\n            password: password\n

    Launch your API with:

    genius rise\n
    "},{"location":"blog/huggingface/table_qa/#understanding-geniusyml-parameters","title":"Understanding genius.yml Parameters","text":"
    • model_name: The identifier for the model from Hugging Face, designed for table QA tasks.
    • model_class & tokenizer_class: Specifies the classes used for the model and tokenizer, respectively, suitable for table QA.
    • use_cuda: Utilize GPU acceleration to speed up inference times.
    • precision: Determines the floating-point precision for calculations (e.g., float for single precision).
    • device_map: Designates model parts to specific GPUs, optimizing performance.
    • endpoint & port: The network address and port where the API will be accessible.
    • username & password: Basic authentication credentials to secure access to your API.
    "},{"location":"blog/huggingface/table_qa/#use-cases-variations","title":"Use Cases & Variations","text":""},{"location":"blog/huggingface/table_qa/#changing-the-model-for-different-table-qa-tasks","title":"Changing the Model for Different Table QA Tasks","text":"

    To tailor your API for different table QA tasks, such as financial data analysis or sports statistics, you can modify the model_name in your genius.yml. For example, to switch to a model optimized for financial tables, you might use google/tapas-large-finetuned-finance.

    "},{"location":"blog/huggingface/table_qa/#example-geniusyml-for-tabular-fact-checking","title":"Example genius.yml for tabular fact-checking:","text":"
    args:\n  model_name: \"google/tapas-large-finetuned-tabfact\"\n
    "},{"location":"blog/huggingface/table_qa/#interacting-with-your-api","title":"Interacting with Your API","text":""},{"location":"blog/huggingface/table_qa/#table-qa","title":"Table QA","text":"

    Using curl:

    curl -X POST http://localhost:3000/api/v1/answer \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\"question\": \"Who had the highest batting average?\", \"data\": [{\"player\": \"John Doe\", \"average\": \".312\"}, {\"player\": \"Jane Roe\", \"average\": \".328\"}]}'\n

    Using python-requests:

    import requests\ndata = {\n\"question\": \"Who had the highest batting average?\",\n\"data\": [\n{\"player\": \"John Doe\", \"average\": \".312\"},\n{\"player\": \"Jane Roe\", \"average\": \".328\"}\n]\n}\nresponse = requests.post(\"http://localhost:3000/api/v1/answer\",\njson=data,\nauth=('user', 'password'))\nprint(response.json())\n
    "},{"location":"blog/huggingface/table_qa/#utilizing-the-hugging-face-pipeline","title":"Utilizing the Hugging Face Pipeline","text":"

    Although primarily for text-based QA, you might experiment with the pipeline for preprocessing or extracting text from tables before querying.

    Using curl:

    curl -X POST http://localhost:3000/api/v1/answer_pipeline \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\"question\": \"What is the total revenue?\", \"data\": \"The total revenue in Q1 was $10M, and in Q2 was $15M.\"}'\n

    Using python-requests:

    import requests\ndata = {\n\"question\": \"What is the total revenue?\",\n\"data\": \"\nThe total revenue in Q1 was $10M, and in Q2 was $15M.\"\n}\nresponse = requests.post(\"http://localhost:3000/api/v1/answer_pipeline\",\njson=data,\nauth=('user', 'password'))\nprint(response.json())\n
    "},{"location":"blog/huggingface/table_qa/#fun","title":"Fun","text":"

    Table QA is dominated by two families of base models: the google TAPAS and microsoft TAPEX.

    "},{"location":"blog/huggingface/table_qa/#executing-sql-on-data","title":"Executing SQL on data","text":"

    Given some data and an sql query, this model can return the results.

    version: \"1\"\n\nbolts:\n    my_bolt:\n        name: QAAPI\n        state:\n            type: none\n        input:\n            type: batch\n            args:\n                input_folder: ./input\n        output:\n            type: batch\n            args:\n                output_folder: ./output\n        method: listen\n        args:\n            model_name: microsoft/tapex-large-sql-execution\n            model_class: BartForConditionalGeneration\n            tokenizer_class: TapexTokenizer\n            use_cuda: true\n            precision: float\n            device_map: cuda:0\n            endpoint: \"0.0.0.0\"\n            port: 3000\n            cors_domain: http://localhost:3000\n            username: user\n            password: password\n
    /usr/bin/curl -X POST localhost:3000/api/v1/answer \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\n        \"data\": {\n            \"year\": [1896, 1900, 1904, 2004, 2008, 2012],\n            \"city\": [\"athens\", \"paris\", \"st. louis\", \"athens\", \"beijing\", \"london\"]\n        },\n        \"question\": \"select year where city = beijing\"\n  }\n  ' | jq\n\n# {\n#   \"data\": {\n#     \"year\": [\n#       1896,\n#       1900,\n#       1904,\n#       2004,\n#       2008,\n#       2012\n#     ],\n#     \"city\": [\n#       \"athens\",\n#       \"paris\",\n#       \"st. louis\",\n#       \"athens\",\n#       \"beijing\",\n#       \"london\"\n#     ]\n#   },\n#   \"question\": \"select year where city = beijing\",\n#   \"answer\": {\n#     \"answers\": [\n#       \"2008\"        # <----\n#     ],\n#     \"aggregation\": \"NONE\"\n#   }\n# }\n
    "},{"location":"blog/huggingface/table_qa/#query-generators","title":"Query generators","text":"

    Given some data and a natural language query, these models generate a query that can be used to compute the result. These models are what power spreadsheet automations.

    version: \"1\"\n\nbolts:\n    my_bolt:\n        name: QAAPI\n        state:\n            type: none\n        input:\n            type: batch\n            args:\n                input_folder: ./input\n        output:\n            type: batch\n            args:\n                output_folder: ./output\n        method: listen\n        args:\n            model_name: google/tapas-large-finetuned-wtq\n            model_class: AutoModelForTableQuestionAnswering\n            tokenizer_class: AutoTokenizer\n            use_cuda: true\n            precision: float\n            device_map: cuda:0\n            endpoint: \"0.0.0.0\"\n            port: 3000\n            cors_domain: http://localhost:3000\n            username: user\n            password: password\n
    /usr/bin/curl -X POST localhost:3000/api/v1/answer \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\n        \"data\": {\n          \"population\": [\"10.6\", \"12.6\", \"12.9\", \"11.9\", \"10.3\", \"11.5\", \"12.5\", \"12.0\", \"11.5\", \"12.4\", \"11.0\", \"12.8\", \"12.5\", \"10.6\", \"11.9\", \"12.0\", \"12.6\", \"11.7\", \"12.3\", \"10.8\", \"11.2\", \"12.7\", \"10.5\", \"11.3\", \"12.2\", \"10.9\", \"11.7\", \"10.3\", \"10.9\", \"10.2\", \"10.6\", \"10.4\", \"10.5\", \"11.5\", \"11.7\", \"10.9\", \"10.4\", \"11.0\", \"12.4\", \"12.2\", \"11.3\", \"10.2\", \"11.0\", \"11.5\", \"11.0\", \"10.9\", \"11.5\", \"12.8\", \"11.3\", \"11.9\", \"12.9\", \"10.9\", \"11.4\", \"12.8\", \"10.3\", \"12.6\", \"11.1\", \"10.6\", \"12.0\", \"12.4\", \"10.2\", \"12.9\", \"11.7\", \"12.3\", \"12.4\", \"12.0\", \"10.9\", \"10.9\", \"12.3\", \"12.7\", \"10.2\", \"11.7\", \"12.4\", \"12.5\", \"12.0\", \"11.0\", \"12.9\", \"10.9\", \"10.4\", \"12.8\", \"10.3\", \"11.6\", \"12.9\", \"12.4\", \"12.4\", \"10.2\", \"11.2\", \"10.2\", \"10.1\", \"12.7\", \"11.2\", \"12.5\", \"11.7\", \"11.4\", \"10.7\", \"10.9\", \"11.5\", \"11.3\", \"10.3\", \"10.7\", \"11.2\", \"10.6\", \"11.0\", \"12.3\", \"11.7\", \"10.0\", \"10.4\", \"11.4\", \"11.5\", \"12.2\"],\n          \"city\": [\"Tokyo\", \"Delhi\", \"Shanghai\", \"Sao Paulo\", \"Mumbai\", \"Mexico City\", \"Beijing\", \"Osaka\", \"Cairo\", \"New York\", \"Dhaka\", \"Karachi\", \"Buenos Aires\", \"Kolkata\", \"Istanbul\", \"Chongqing\", \"Lagos\", \"Rio de Janeiro\", \"Tianjin\", \"Kinshasa\", \"Guangzhou\", \"Los Angeles\", \"Moscow\", \"Shenzhen\", \"Lahore\", \"Bangalore\", \"Paris\", \"Bogota\", \"Jakarta\", \"Chennai\", \"Lima\", \"Bangkok\", \"Seoul\", \"Nagoya\", \"Hyderabad\", \"London\", \"Tehran\", \"Chicago\", \"Chengdu\", \"Nanjing\", \"Wuhan\", \"Ho Chi Minh City\", \"Luanda\", \"Ahmedabad\", \"Kuala Lumpur\", \"Riyadh\", \"Baghdad\", \"Santiago\", \"Surat\", \"Madrid\", \"Suzhou\", \"Pune\", \"Houston\", \"Dallas\", \"Toronto\", \"Dar es Salaam\", \"Miami\", \"Belo Horizonte\", \"Singapore\", \"Philadelphia\", \"Atlanta\", \"Fukuoka\", \"Khartoum\", \"Barcelona\", \"Johannesburg\", \"Saint Petersburg\", \"Qingdao\", \"Dalian\", \"Washington, D.C.\", \"Yangon\", \"Alexandria\", \"Jinan\", \"Guadalajara\", \"Harbin\", \"San Francisco\", \"Fort Worth\", \"Boston\", \"Detroit\", \"Montreal\", \"Porto Alegre\", \"Ankara\", \"Monterrey\", \"Nairobi\", \"Doha\", \"Luoyang\", \"Kuwait City\", \"Dublin\", \"Mecca\", \"Medina\", \"Amman\", \"Algiers\", \"Kampala\", \"Maputo\", \"Addis Ababa\", \"Brasilia\", \"Havana\", \"Faisalabad\", \"Tashkent\", \"Accra\", \"Sapporo\", \"Manila\", \"Hanoi\", \"Sydney\", \"Melbourne\", \"Cape Town\", \"Auckland\", \"Oslo\", \"Stockholm\", \"Helsinki\", \"Copenhagen\"]\n        },\n        \"question\": \"what is the total population of these cities\"\n  }\n  ' | jq\n\n# {\n#   \"data\": {\n#     \"population\": [ ...\n#     ],\n#     \"city\": [\n#       \"Tokyo\", ...\n#     ]\n#   },\n#   \"question\": \"what is the total population of these cities\",\n#   \"answer\": {\n#     \"answers\": [\n#       \"10.6\",\n#       ...\n#       \"12.2\"\n#     ],\n#     \"aggregation\": \"COUNT\" # <---\n#   }\n# }\n

    The answer.aggregation field indicates the operation to be done on the answer.answers field to get the answer.

    However, when queries involve selecting one value from the data, the value of answer.aggregation remains as NONE.

    /usr/bin/curl -X POST localhost:3000/api/v1/answer \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\n      \"data\": [\n        {\n          \"Name\": \"Acme Corp\",\n          \"Revenue\": \"1622908.31\",\n          \"Expenses\": \"802256.16\",\n          \"Profit\": \"820652.15\",\n          \"Assets\": \"2758871.86\",\n          \"Liabilities\": \"1786333.21\",\n          \"Equity\": \"972538.65\"\n        },\n        {\n          \"Name\": \"Globex Inc\",\n          \"Revenue\": \"1846200.97\",\n          \"Expenses\": \"1414781.1\",\n          \"Profit\": \"431419.87\",\n          \"Assets\": \"246642.65\",\n          \"Liabilities\": \"1969146.36\",\n          \"Equity\": \"-1722503.71\"\n        },\n        {\n          \"Name\": \"Soylent Corp\",\n          \"Revenue\": \"1585575.02\",\n          \"Expenses\": \"1457030.2\",\n          \"Profit\": \"128544.82\",\n          \"Assets\": \"1599655.56\",\n          \"Liabilities\": \"1260425.14\",\n          \"Equity\": \"339230.42\"\n        },\n        {\n          \"Name\": \"Initech LLC\",\n          \"Revenue\": \"179462.76\",\n          \"Expenses\": \"792898.88\",\n          \"Profit\": \"-613436.12\",\n          \"Assets\": \"780230.44\",\n          \"Liabilities\": \"990416.97\",\n          \"Equity\": \"-210186.53\"\n        },\n        {\n          \"Name\": \"Umbrella Corp\",\n          \"Revenue\": \"1882828.73\",\n          \"Expenses\": \"487215.16\",\n          \"Profit\": \"1395613.57\",\n          \"Assets\": \"2933377.54\",\n          \"Liabilities\": \"1519978.31\",\n          \"Equity\": \"1413399.23\"\n        },\n        {\n          \"Name\": \"Vandelay Ind\",\n          \"Revenue\": \"1641614.11\",\n          \"Expenses\": \"722957.57\",\n          \"Profit\": \"918656.54\",\n          \"Assets\": \"1818305.88\",\n          \"Liabilities\": \"1051099.45\",\n          \"Equity\": \"767206.43\"\n        },\n        {\n          \"Name\": \"Hooli Inc\",\n          \"Revenue\": \"784472.77\",\n          \"Expenses\": \"1035568.89\",\n          \"Profit\": \"-251096.12\",\n          \"Assets\": \"1011898.52\",\n          \"Liabilities\": \"757685.31\",\n          \"Equity\": \"254213.21\"\n        },\n        {\n          \"Name\": \"Stark Industries\",\n          \"Revenue\": \"1752780.24\",\n          \"Expenses\": \"954382.19\",\n          \"Profit\": \"798398.05\",\n          \"Assets\": \"1828265.8\",\n          \"Liabilities\": \"1785958.67\",\n          \"Equity\": \"42307.13\"\n        },\n        {\n          \"Name\": \"Wayne Enterprises\",\n          \"Revenue\": \"772662.41\",\n          \"Expenses\": \"724219.29\",\n          \"Profit\": \"48443.12\",\n          \"Assets\": \"2952379.67\",\n          \"Liabilities\": \"1255329.61\",\n          \"Equity\": \"1697050.06\"\n        },\n        {\n          \"Name\": \"Weyland-Yutani\",\n          \"Revenue\": \"1157644.0\",\n          \"Expenses\": \"1454230.66\",\n          \"Profit\": \"-296586.66\",\n          \"Assets\": \"776909.75\",\n          \"Liabilities\": \"759733.68\",\n          \"Equity\": \"17176.07\"\n        }\n      ],\n      \"question\": \"Given the balance sheet data, identify the company with the highest equity to assets ratio.\"\n}\n' | jq\n\n# {\n#   \"data\": [\n#     ...\n#   ],\n#   \"question\": \"Given the balance sheet data, identify the company with the highest equity to assets ratio.\",\n#   \"answer\": {\n#     \"answers\": [\n#       \"Wayne Enterprises\"\n#     ],\n#     \"aggregation\": \"NONE\"\n#   }\n# }\n

    Lets verify this:

    def calculate_highest_equity_to_assets_ratio(data):\n    ratios = {}\nfor company in data[\"data\"]:\n        name = company[\"Name\"]\nequity = float(company[\"Equity\"])\nassets = float(company[\"Assets\"])\nratio = equity / assets if assets != 0 else 0\nratios[name] = ratio\n\nhighest_ratio_company = max(ratios, key=ratios.get)\nhighest_ratio = ratios[highest_ratio_company]\nreturn highest_ratio_company, highest_ratio\n\nhighest_ratio_company, highest_ratio = calculate_highest_equity_to_assets_ratio(financial_data)\nhighest_ratio_company, highest_ratio\n

    which gives us:

    ('Wayne Enterprises', 0.574807528057528)\n

    yay \ud83e\udd73

    "},{"location":"blog/huggingface/table_qa/#play-around","title":"Play around","text":"

    This kind of models are few with 82 models on the huggingface hub.

    "},{"location":"blog/huggingface/trans/","title":"Host Translation Models Using Geniusrise","text":"

    This guide will walk you through deploying translation models using Geniusrise, covering the setup, configuration, and interaction with the translation API for various use cases.

    "},{"location":"blog/huggingface/trans/#setup-and-configuration","title":"Setup and Configuration","text":"

    Requirements

    • python 3.10, PPA, AUR, brew, Windows.
    • You need to have a GPU. Most of the system works with NVIDIA GPUs.
    • Install CUDA.

    Optional: Set up a virtual environment:

    virtualenv venv -p `which python3.10`\nsource venv/bin/activate\n

    Installation:

    Begin by installing Geniusrise and the necessary text processing extensions:

    pip install geniusrise\npip install geniusrise-text\n

    Configuration (genius.yml):

    Next, define your translation service in a genius.yml file:

    version: \"1\"\n\nbolts:\n    my_bolt:\n        name: TranslationAPI\n        state:\n            type: none\n        input:\n            type: batch\n            args:\n                input_folder: ./input\n        output:\n            type: batch\n            args:\n                output_folder: ./output\n        method: listen\n        args:\n            model_name: facebook/mbart-large-50-many-to-many-mmt\n            model_class: AutoModelForSeq2SeqLM\n            tokenizer_class: AutoTokenizer\n            use_cuda: true\n            precision: float\n            device_map: cuda:0\n            endpoint: \"0.0.0.0\"\n            port: 3000\n            cors_domain: http://localhost:3000\n            username: user\n            password: password\n

    To launch your API, execute:

    genius rise\n
    "},{"location":"blog/huggingface/trans/#configuration-parameters-explained","title":"Configuration Parameters Explained","text":"
    • model_name: Specifies the model to use, such as facebook/mbart-large-50-many-to-many-mmt for multilingual translation.
    • model_class & tokenizer_class: Defines the classes for the model and tokenizer, crucial for the translation process.
    • use_cuda: Indicates whether to use GPU acceleration for faster processing.
    • precision: The computational precision (e.g., float) affects performance and resource usage.
    • endpoint & port: The network address where the API is accessible.
    • username & password: Security credentials for accessing the API.
    "},{"location":"blog/huggingface/trans/#interacting-with-the-translation-api","title":"Interacting with the Translation API","text":""},{"location":"blog/huggingface/trans/#translating-text","title":"Translating Text","text":"

    Translate text from one language to another using a simple HTTP request.

    Example using curl:

    curl -X POST http://localhost:3000/api/v1/translate \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\n        \"text\": \"\u0938\u0902\u092f\u0941\u0915\u094d\u0924 \u0930\u093e\u0937\u094d\u091f\u094d\u0930 \u0915\u0947 \u092a\u094d\u0930\u092e\u0941\u0916 \u0915\u093e \u0915\u0939\u0928\u093e \u0939\u0948 \u0915\u093f \u0938\u0940\u0930\u093f\u092f\u093e \u092e\u0947\u0902 \u0915\u094b\u0908 \u0938\u0948\u0928\u094d\u092f \u0938\u092e\u093e\u0927\u093e\u0928 \u0928\u0939\u0940\u0902 \u0939\u0948\",\n        \"source_lang\": \"hi_IN\",\n        \"target_lang\": \"en_XX\",\n        \"decoding_strategy\": \"generate\",\n        \"decoder_start_token_id\": 2,\n        \"early_stopping\": true,\n        \"eos_token_id\": 2,\n        \"forced_eos_token_id\": 2,\n        \"max_length\": 200,\n        \"num_beams\": 5,\n        \"pad_token_id\": 1\n    }' | jq\n

    Example using python-requests:

    import requests\ndata = {\n\"text\": \"\u0938\u0902\u092f\u0941\u0915\u094d\u0924 \u0930\u093e\u0937\u094d\u091f\u094d\u0930 \u0915\u0947 \u092a\u094d\u0930\u092e\u0941\u0916 \u0915\u093e \u0915\u0939\u0928\u093e \u0939\u0948 \u0915\u093f \u0938\u0940\u0930\u093f\u092f\u093e \u092e\u0947\u0902 \u0915\u094b\u0908 \u0938\u0948\u0928\u094d\u092f \u0938\u092e\u093e\u0927\u093e\u0928 \u0928\u0939\u0940\u0902 \u0939\u0948\",\n\"source_lang\": \"hi_IN\",\n\"target_lang\": \"en_XX\",\n\"decoding_strategy\": \"generate\",\n\"decoder_start_token_id\": 2,\n\"early_stopping\": true,\n\"eos_token_id\": 2,\n\"forced_eos_token_id\": 2,\n\"max_length\": 200,\n\"num_beams\": 5,\n\"pad_token_id\": 1\n}\nresponse = requests.post(\"http://localhost:3000/api/v1/translate\",\njson=data,\nauth=('user', 'password'))\nprint(response.json())\n
    "},{"location":"blog/huggingface/trans/#advanced-translation-features","title":"Advanced Translation Features","text":"

    For use cases requiring specific translation strategies or parameters (e.g., beam search, number of beams), you can pass additional parameters in your request to customize the translation process.

    "},{"location":"blog/huggingface/trans/#use-cases-variations","title":"Use Cases & Variations","text":""},{"location":"blog/huggingface/trans/#different-language-pairs","title":"Different Language Pairs","text":"

    Adjust the source_lang and target_lang parameters to cater to various language pairs, enabling translation between numerous languages supported by the chosen model.

    "},{"location":"blog/huggingface/trans/#customizing-translation-parameters","title":"Customizing Translation Parameters","text":"

    For advanced translation needs, such as controlling the length of the output or employing beam search, modify the additional_params in your requests:

    {\n\"text\": \"Your text here\",\n\"source_lang\": \"en_XX\",\n\"target_lang\": \"es_XX\",\n\"num_beams\": 4\n}\n
    "},{"location":"blog/huggingface/trans/#fun","title":"Fun","text":"

    There are two families of models from facebook that can perform any to any language translation among a large number of languages.

    • facebook/mbart-large-50-many-to-many-mmt: 50 languages
    • facebook/nllb-200-distilled-600M: 200 languages

    Both the MBART and the NLLB families have several members, with facebook/nllb-moe-54b 54billion parameter mixture of experts being the largest and most capable one.

    See here for the language codes for the FLORES-200 dataset.

    version: \"1\"\n\nbolts:\n    my_bolt:\n        name: TranslationAPI\n        state:\n            type: none\n        input:\n            type: batch\n            args:\n                input_folder: ./input\n        output:\n            type: batch\n            args:\n                output_folder: ./output\n        method: listen\n        args:\n            model_name: facebook/nllb-200-3.3B\n            model_class: AutoModelForSeq2SeqLM\n            tokenizer_class: AutoTokenizer\n            use_cuda: true\n            precision: float\n            device_map: cuda:0\n            endpoint: \"0.0.0.0\"\n            port: 3000\n            cors_domain: http://localhost:3000\n            username: user\n            password: password\n

    We can try translating from hindi to tatar:

    curl -X POST http://localhost:3000/api/v1/translate \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\n        \"text\": \"\u0938\u0902\u092f\u0941\u0915\u094d\u0924 \u0930\u093e\u0937\u094d\u091f\u094d\u0930 \u0915\u0947 \u092a\u094d\u0930\u092e\u0941\u0916 \u0915\u093e \u0915\u0939\u0928\u093e \u0939\u0948 \u0915\u093f \u0938\u0940\u0930\u093f\u092f\u093e \u092e\u0947\u0902 \u0915\u094b\u0908 \u0938\u0948\u0928\u094d\u092f \u0938\u092e\u093e\u0927\u093e\u0928 \u0928\u0939\u0940\u0902 \u0939\u0948\",\n        \"target_lang\": \"tat_Cyrl\",\n        \"decoding_strategy\": \"generate\",\n        \"bos_token_id\": 0,\n        \"decoder_start_token_id\": 2,\n        \"eos_token_id\": 2,\n        \"max_length\": 200,\n        \"pad_token_id\": 1\n    }'\n

    Now how do we even verify whether this is correct? Lets reverse translate followed by sentence similarity from NLI. We need to launch 2 containers - one for translation and another for NLI:

    version: \"1\"\nbolts:\nmy_translation_bolt:\nname: TranslationAPI\nstate:\ntype: none\ninput:\ntype: batch\nargs:\ninput_folder: ./input\noutput:\ntype: batch\nargs:\noutput_folder: ./output\nmethod: listen\nargs:\nmodel_name: facebook/nllb-200-3.3B\nmodel_class: AutoModelForSeq2SeqLM\ntokenizer_class: AutoTokenizer\nuse_cuda: true\nprecision: float\ndevice_map: cuda:0\nendpoint: \"0.0.0.0\"\nport: 3000\ncors_domain: http://localhost:3000\nusername: user\npassword: password\nmy_nli_bolt:\nname: NLIAPI\nstate:\ntype: none\ninput:\ntype: batch\nargs:\ninput_folder: ./input\noutput:\ntype: batch\nargs:\noutput_folder: ./output\nmethod: listen\nargs:\nmodel_name: facebook/bart-large-mnli\nmodel_class: AutoModelForSequenceClassification\ntokenizer_class: AutoTokenizer\nuse_cuda: true\nprecision: float\ndevice_map: cuda:0\nendpoint: \"0.0.0.0\"\nport: 3001\ncors_domain: http://localhost:3001\nusername: user\npassword: password\n

    import requests\n# First we translate this hindi sentence to tatar\ndata = {\n\"text\": \"\u0938\u0902\u092f\u0941\u0915\u094d\u0924 \u0930\u093e\u0937\u094d\u091f\u094d\u0930 \u0915\u0947 \u092a\u094d\u0930\u092e\u0941\u0916 \u0915\u093e \u0915\u0939\u0928\u093e \u0939\u0948 \u0915\u093f \u0938\u0940\u0930\u093f\u092f\u093e \u092e\u0947\u0902 \u0915\u094b\u0908 \u0938\u0948\u0928\u094d\u092f \u0938\u092e\u093e\u0927\u093e\u0928 \u0928\u0939\u0940\u0902 \u0939\u0948\",\n\"target_lang\": \"tat_Cyrl\",\n\"decoding_strategy\": \"generate\",\n\"bos_token_id\": 0,\n\"decoder_start_token_id\": 2,\n\"eos_token_id\": 2,\n\"max_length\": 200,\n\"pad_token_id\": 1\n}\nresponse = requests.post(\"http://localhost:3000/api/v1/translate\",\njson=data,\nauth=('user', 'password'))\ntranslated = response.json()[\"translated_text\"]\n# \u0411\u041c\u041e \u0431\u0430\u0448\u043b\u044b\u0433\u044b \u0421\u04af\u0440\u0438\u044f\u0434\u04d9 \u0445\u04d9\u0440\u0431\u0438 \u0447\u0430\u0440\u0430\u043b\u0430\u0440 \u044e\u043a \u0434\u0438\u043f \u0431\u0435\u043b\u0434\u0435\u0440\u04d9\n# Then we translate the tatar back to hindi\nrev = data.copy()\nrev[\"text\"] = translated\nrev[\"target_lang\"] = \"hin_Deva\"\nresponse = requests.post(\"http://localhost:3000/api/v1/translate\",\njson=rev,\nauth=('user', 'password'))\nrev_translated = response.json()[\"translated_text\"]\n# Finally we look at similarity of the source and reverse-translated hindi sentences\ndata = {\n\"text1\": data[\"text\"],\n\"text2\": rev_translated\n}\nresponse = requests.post(\"http://localhost:3001/api/v1/textual_similarity\",\njson=data,\nauth=('user', 'password'))\nprint(response.json())\n# {\n#     'text1': '\u0938\u0902\u092f\u0941\u0915\u094d\u0924 \u0930\u093e\u0937\u094d\u091f\u094d\u0930 \u0915\u0947 \u092a\u094d\u0930\u092e\u0941\u0916 \u0915\u093e \u0915\u0939\u0928\u093e \u0939\u0948 \u0915\u093f \u0938\u0940\u0930\u093f\u092f\u093e \u092e\u0947\u0902 \u0915\u094b\u0908 \u0938\u0948\u0928\u094d\u092f \u0938\u092e\u093e\u0927\u093e\u0928 \u0928\u0939\u0940\u0902 \u0939\u0948',\n#     'text2': '\u092c\u0940\u090f\u092e\u0913 \u092a\u094d\u0930\u092e\u0941\u0916 \u0928\u0947 \u0915\u0939\u093e \u0915\u093f \u0938\u0940\u0930\u093f\u092f\u093e \u092e\u0947\u0902 \u0915\u094b\u0908 \u0938\u0948\u0928\u094d\u092f \u0909\u092a\u093e\u092f \u0928\u0939\u0940\u0902 \u0939\u0948\u0902',\n#     'similarity_score': 0.9829527983379287\n# }\n
    0.9829527983379287 looks like a great similarity score, so the translation really works! (or the mistakes are isomorphic) \ud83e\udd73\ud83d\udc4d

    "},{"location":"blog/huggingface/trans/#play-around","title":"Play around","text":"

    There is not much to really do in translation except mess around with different languagues \ud83e\udd37\u200d\u2642\ufe0f Not many models either, facebook is the undisputed leader in translation models.

    "},{"location":"blog/huggingface/txtclass/","title":"Host Text Classification Models Using Geniusrise","text":"
    • Host Text Classification Models Using Geniusrise
    • Quick Setup
    • Configuration Breakdown
    • Use Cases \\& Variations
      • Sentiment Analysis
      • Content Moderation
      • Language Detection
      • Making API Requests
      • Classify Text
      • Classification Pipeline
    • Fun
      • Political bias detection
      • Intent classification
      • Hallucination Evaluation
      • Irony Detection
    • Play around

    This post will guide you through creating inference APIs for different text classification tasks using geniusrise, explaining the genius.yml configuration and providing examples of how to interact with your API using curl and python-requests.

    "},{"location":"blog/huggingface/txtclass/#quick-setup","title":"Quick Setup","text":"

    Requirements:

    • python 3.10, PPA, AUR, brew, Windows.
    • You need to have a GPU. Most of the system works with NVIDIA GPUs.
    • Install CUDA.

    Installation:

    Optional: Set up a virtual environment:

    virtualenv venv -p `which python3.10`\nsource venv/bin/activate\n

    Install the packages:

    pip install torch\npip install geniusrise\npip install geniusrise-text\n

    Configuration File (genius.yml):

    Create a genius.yml with the necessary configuration for your text classification API:

    version: \"1\"\n\nbolts:\n    my_bolt:\n        name: TextClassificationAPI\n        state:\n            type: none\n        input:\n            type: batch\n            args:\n                input_folder: ./input\n        output:\n            type: batch\n            args:\n                output_folder: ./output\n        method: listen\n        args:\n            model_name: tomh/toxigen_roberta\n            model_class: AutoModelForSequenceClassification\n            tokenizer_class: AutoTokenizer\n            use_cuda: true\n            precision: float\n            device_map: cuda:0\n            compile: false\n            endpoint: \"0.0.0.0\"\n            port: 3000\n            cors_domain: http://localhost:3000\n            username: user\n            password: password\n

    Launch your API with:

    genius rise\n
    "},{"location":"blog/huggingface/txtclass/#configuration-breakdown","title":"Configuration Breakdown","text":"
    • model_name: Specify the Hugging Face model ID, e.g., bert-base-uncased for sentiment analysis.
    • use_cuda: Enable GPU acceleration with true or false for CPU.
    • precision: Set to float for single precision; consider half for faster inference on compatible GPUs. Does not work for most small models.
    • device_map: Assign model parts to specific GPUs, e.g., cuda:0.
    • endpoint & port: Define the API access point.
    • username & password: Secure your API with basic authentication.
    "},{"location":"blog/huggingface/txtclass/#use-cases-variations","title":"Use Cases & Variations","text":""},{"location":"blog/huggingface/txtclass/#sentiment-analysis","title":"Sentiment Analysis","text":"

    For sentiment analysis, swap the model_name to a model trained for sentiment, like distilbert-base-uncased-finetuned-sst-2-english.

    args:\n  model_name: \"distilbert-base-uncased-finetuned-sst-2-english\"\n
    "},{"location":"blog/huggingface/txtclass/#content-moderation","title":"Content Moderation","text":"

    To filter inappropriate content, use a model like roberta-base-openai-detector.

    args:\n  model_name: \"roberta-base-openai-detector\"\n
    "},{"location":"blog/huggingface/txtclass/#language-detection","title":"Language Detection","text":"

    For detecting the language of the input text, a model like xlm-roberta-base is suitable.

    args:\n  model_name: \"xlm-roberta-base\"\n

    Try out various models from huggingface.

    "},{"location":"blog/huggingface/txtclass/#making-api-requests","title":"Making API Requests","text":""},{"location":"blog/huggingface/txtclass/#classify-text","title":"Classify Text","text":"

    cURL:

    curl -X POST http://localhost:3000/api/v1/classify \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\"text\": \"Your text here.\"}'\n

    Python-Requests:

    import requests\nresponse = requests.post(\"http://localhost:3000/api/v1/classify\",\njson={\"text\": \"Your text here.\"},\nauth=('user', 'password'))\nprint(response.json())\n
    "},{"location":"blog/huggingface/txtclass/#classification-pipeline","title":"Classification Pipeline","text":"

    cURL:

    curl -X POST http://localhost:3000/api/v1/classification_pipeline \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\"text\": \"Your text here.\"}'\n

    Python-Requests:

    import requests\nresponse = requests.post(\"http://localhost:3000/api/v1/classification_pipeline\",\njson={\"text\": \"Your text here.\"},\nauth=('user', 'password'))\nprint(response.json())\n
    "},{"location":"blog/huggingface/txtclass/#fun","title":"Fun","text":"

    There are quite a few fun models to try out from huggingface!

    "},{"location":"blog/huggingface/txtclass/#political-bias-detection","title":"Political bias detection","text":"

    This model tries to classify text according to the political bias they might possess.

    version: \"1\"\n\nbolts:\n    my_bolt:\n        name: TextClassificationAPI\n        state:\n            type: none\n        input:\n            type: batch\n            args:\n                input_folder: ./input\n        output:\n            type: batch\n            args:\n                output_folder: ./output\n        method: listen\n        args:\n            model_name: bucketresearch/politicalBiasBERT\n            model_class: AutoModelForSequenceClassification\n            tokenizer_class: AutoTokenizer\n            use_cuda: true\n            precision: float\n            device_map: cuda:0\n            compile: false\n            endpoint: \"0.0.0.0\"\n            port: 3000\n            cors_domain: http://localhost:3000\n            username: user\n            password: password\n
    /usr/bin/curl -X POST localhost:3000/api/v1/classify \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\n        \"text\": \"i think i agree with bjp that hindus need to be respected\"\n    }' | jq\n\n# {\n#   \"input\": \"i think i agree with bjp that hindus need to be respected\",\n#   \"label_scores\": {\n#     \"LEFT\": 0.28080788254737854,\n#     \"CENTER\": 0.18140915036201477,\n#     \"RIGHT\": 0.5377829670906067 # <--\n#   }\n# }\n
    /usr/bin/curl -X POST localhost:3000/api/v1/classify \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\n        \"text\": \"these ghettos are sprawling these days and the people who live there stink\"\n    }' | jq\n\n# {\n#   \"input\": \"these ghettos are sprawling these days and the people who live there stink\",\n#   \"label_scores\": {\n#     \"LEFT\": 0.38681042194366455, # <-- NIMBY?\n#     \"CENTER\": 0.20437702536582947,\n#     \"RIGHT\": 0.408812552690506 # <--\n#   }\n# }\n

    Works fairly well empirically for medium-sized sentences and in an american context.

    "},{"location":"blog/huggingface/txtclass/#intent-classification","title":"Intent classification","text":"

    Text classification can be used to figure out the intent of the user in a chat conversation scenario. For e.g. to determine whether the user has an intent to explore or to buy.

    version: \"1\"\n\nbolts:\n    my_bolt:\n        name: TextClassificationAPI\n        state:\n            type: none\n        input:\n            type: batch\n            args:\n                input_folder: ./input\n        output:\n            type: batch\n            args:\n                output_folder: ./output\n        method: listen\n        args:\n            model_name: Falconsai/intent_classification\n            model_class: AutoModelForSequenceClassification\n            tokenizer_class: AutoTokenizer\n            use_cuda: true\n            precision: float\n            device_map: cuda:0\n            compile: false\n            endpoint: \"0.0.0.0\"\n            port: 3000\n            cors_domain: http://localhost:3000\n            username: user\n            password: password\n
    /usr/bin/curl -X POST localhost:3000/api/v1/classify \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\n        \"text\": \"hey i havent got my package yet where is it\"\n    }' | jq\n\n# {\n#   \"input\": \"hey i havent got my package yet where is it\",\n#   \"label_scores\": {\n#     \"cancellation\": 6.553709398088303E-12,\n#     \"ordering\": 4.977344745534613E-15,\n#     \"shipping\": 4.109915668426903E-15,\n#     \"invoicing\": 1.3524543897996955E-13,\n#     \"billing and payment\": 2.5260177283215057E-10,\n#     \"returns and refunds\": 1.915349389508547E-12,\n#     \"complaints and feedback\": 1.0671016614826126E-13,\n#     \"speak to person\": 2.6417441435886042E-15,\n#     \"edit account\": 3.1924864227900196E-13,\n#     \"delete account\": 2.704471304022793E-13,\n#     \"delivery information\": 1.0,                 # <--\n#     \"subscription\": 1.2307567616963444E-13,\n#     \"recover password\": 1.387644556552492E-12,\n#     \"registration problems\": 2.686436142984583E-13,\n#     \"appointment\": 3.555285948454723E-13\n#   }\n# }\n
    "},{"location":"blog/huggingface/txtclass/#hallucination-evaluation","title":"Hallucination Evaluation","text":"

    Figuring out whether your chat / LLM model is hallucinating or not is a text classification task!

    version: \"1\"\n\nbolts:\n    my_bolt:\n        name: TextClassificationAPI\n        state:\n            type: none\n        input:\n            type: batch\n            args:\n                input_folder: ./input\n        output:\n            type: batch\n            args:\n                output_folder: ./output\n        method: listen\n        args:\n            model_name: vectara/hallucination_evaluation_model\n            model_class: AutoModelForSequenceClassification\n            tokenizer_class: AutoTokenizer\n            use_cuda: true\n            precision: float\n            device_map: cuda:0\n            compile: false\n            endpoint: \"0.0.0.0\"\n            port: 3000\n            cors_domain: http://localhost:3000\n            username: user\n            password: password\n
    /usr/bin/curl -X POST localhost:3000/api/v1/classify \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\n        \"text\": \"A man walks into a bar and buys a drink [SEP] A bloke swigs alcohol at a pub\"\n    }' | jq\n\n# {\n#   \"input\": \"A man walks into a bar and buys a drink [SEP] A bloke swigs alcohol at a pub\",\n#   \"label_scores\": [\n#     0.6105160713195801\n#   ]\n# }\n
    "},{"location":"blog/huggingface/txtclass/#irony-detection","title":"Irony Detection","text":"

    Yussss NLP has advanced enough for us to be easily be able to detect irony!

    version: \"1\"\n\nbolts:\n    my_bolt:\n        name: TextClassificationAPI\n        state:\n            type: none\n        input:\n            type: batch\n            args:\n                input_folder: ./input\n        output:\n            type: batch\n            args:\n                output_folder: ./output\n        method: listen\n        args:\n            model_name: cardiffnlp/twitter-roberta-base-irony\n            model_class: AutoModelForSequenceClassification\n            tokenizer_class: AutoTokenizer\n            use_cuda: true\n            precision: float\n            device_map: cuda:0\n            compile: false\n            endpoint: \"0.0.0.0\"\n            port: 3000\n            cors_domain: http://localhost:3000\n            username: user\n            password: password\n
    /usr/bin/curl -X POST localhost:3000/api/v1/classify \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\n        \"text\": \"What a wonderful day to have a flat tire!\"\n    }' | jq\n\n# {\n#   \"input\": \"What a wonderful day to have a flat tire!\",\n#   \"label_scores\": {\n#     \"non_irony\": 0.023495545610785484,\n#     \"irony\": 0.9765045046806335  <---\n#   }\n# }\n
    "},{"location":"blog/huggingface/txtclass/#play-around","title":"Play around","text":"

    There are 49,863 text classification models as of this article on huggingface. Play around with them, tweak various parameters, learn about various usecases and cool shit that can be built with \"mere\" text classification!

    "},{"location":"blog/huggingface/vqa/","title":"Host Visual QA Models Using Geniusrise","text":"

    Visual Question Answering (VQA) combines the power of visual understanding with natural language processing to answer questions about images. Geniusrise offers a streamlined process to deploy VQA models as APIs, making it accessible to developers to integrate advanced AI capabilities into their applications. This blog post demonstrates how to set up VQA APIs using Geniusrise and provides examples for various use cases.

    "},{"location":"blog/huggingface/vqa/#setting-up","title":"Setting Up","text":"

    To begin, ensure you have Geniusrise and Geniusrise-Vision installed:

    pip install geniusrise\npip install geniusrise-vision\n

    Create a genius.yml configuration file tailored to your API requirements, specifying the model, tokenizer, and additional parameters necessary for inference.

    "},{"location":"blog/huggingface/vqa/#sample-configuration","title":"Sample Configuration","text":"

    Below is an example of a configuration file for a VQA API:

    version: \"1\"\nbolts:\nmy_bolt:\nname: VisualQAAPI\nstate:\ntype: none\ninput:\ntype: batch\nargs:\ninput_folder: ./input\noutput:\ntype: batch\nargs:\noutput_folder: ./output\nmethod: listen\nargs:\nmodel_name: \"google/pix2struct-ai2d-base\"\nmodel_class: \"Pix2StructForConditionalGeneration\"\nprocessor_class: \"Pix2StructProcessor\"\nuse_cuda: true\nprecision: \"float\"\ndevice_map: \"cuda:0\"\nendpoint: \"*\"\nport: 3000\ncors_domain: \"http://localhost:3000\"\nusername: \"user\"\npassword: \"password\"\n

    This configuration sets up a VQA API using the Pix2Struct model, ready to process images and answer questions about them.

    "},{"location":"blog/huggingface/vqa/#interacting-with-your-api","title":"Interacting with Your API","text":"

    To interact with your VQA API, encode your images in base64 format and construct a JSON payload with the image and the question. Here are examples using curl:

    # Convert the image to base64 and prepare the payload\nbase64 -w 0 image.jpg | awk '{print \"{\\\"image_base64\\\": \\\"\"$0\"\\\", \\\"question\\\": \\\"What is in this image?\\\"}\"}' > payload.json\n\n# Send the request to your API\ncurl -X POST http://localhost:3000/api/v1/answer_question \\\n-H \"Content-Type: application/json\" \\\n-u user:password \\\n-d @payload.json | jq\n
    "},{"location":"blog/huggingface/vqa/#use-cases-variations","title":"Use Cases & Variations","text":""},{"location":"blog/huggingface/vqa/#general-vqa","title":"General VQA","text":"

    Use models like google/pix2struct-ai2d-base for general VQA tasks, where the model predicts answers based on the image content and the posed question.

    "},{"location":"blog/huggingface/vqa/#specialized-vqa","title":"Specialized VQA","text":"

    For specialized domains, such as medical imaging or technical diagrams, tailor your genius.yml to use domain-specific models. This requires replacing the model_name, model_class, and processor_class with those suitable for your specific application.

    "},{"location":"blog/huggingface/vqa/#advanced-configuration","title":"Advanced Configuration","text":"

    Experiment with different models, precision levels, and CUDA settings to optimize performance and accuracy for your use case. Geniusrise allows for detailed configuration, including quantization and torchscript options, to fine-tune the deployment according to your requirements.

    "},{"location":"bolts/openai/base/","title":"Base Fine Tuner","text":"

    Bases: Bolt

    An abstract base class for writing bolts for fine-tuning OpenAI models.

    This base class is intended to be subclassed for fine-tuning OpenAI models. The chief objective of its subclasses is to load and preprocess the dataset, though of course, other methods, including fine-tuning, can be overridden for customization.

    This bolt uses the OpenAI API to fine-tune a pre-trained model.

    Each subclass can be invoked using the genius cli or yaml.

    "},{"location":"bolts/openai/base/#geniusrise_openai.OpenAIFineTuner--using-genius-cli","title":"Using genius cli","text":"
    genius <bolt_name> rise \\\nbatch \\\n--input_s3_bucket my-input-bucket \\\n--input_s3_folder my-input-folder \\\nbatch \\\n--output_s3_bucket my-output-bucket \\\n--output_s3_folder my-output-folder \\\npostgres \\\n--postgres_host 127.0.0.1 \\\n--postgres_port 5432 \\\n--postgres_user postgres \\\n--postgres_password postgres \\\n--postgres_database geniusrise \\\n--postgres_table task_state \\\nfine_tune \\\n--args\n        model=gpt-3.5-turbo \\\nn_epochs=2 \\\nbatch_size=64 \\\nlearning_rate_multiplier=0.5 \\\nprompt_loss_weight=1 \\\nwait=True\n

    This will load and preprocess data from input s3 location, and upload it to openai for fine tuning, and wait.

    "},{"location":"bolts/openai/base/#geniusrise_openai.OpenAIFineTuner--using-yaml","title":"Using YAML","text":"

    Bolts can be invoked using the genius cli on a yaml file.

    Create a yaml file with the following content (looks very similar to cli):

    version: 1\nbolts:\nmy_fine_tuner:\nname: OpenAIClassificationFineTuner\nmethod: fine_tune\nargs:\nmodel: gpt-3.5-turbo\nn_epochs: 2\nbatch_size: 64\nlearning_rate_multiplier: 0.5\nprompt_loss_weight: 1\nwait: True\ninput:\ntype: batch\nbucket: my-input-bucket\nfolder: my-input-folder\noutput:\ntype: batch\nbucket: my-output-bucket\nfolder: my-output-folder\nstate:\ntype: postgres\nhost: 127.0.0.1\nport: 5432\nuser: postgres\npassword: postgres\ndatabase: geniusrise\ntable: state\n
    genius rise\n

    Gotchas:

    1. Extra command line arguments can be passed to the load_dataset method via fine_tune method by appending data_ to the param name.

    e.g.

            args:\nmodel: gpt-3.5-turbo\n...\ndata_some_arbitrary_key: passed_to_load_dataset_method\n
    "},{"location":"bolts/openai/base/#geniusrise_openai.base.OpenAIFineTuner.__init__","title":"__init__(input, output, state)","text":"

    Initialize the bolt.

    Parameters:

    Name Type Description Default input BatchInput

    The batch input data.

    required output BatchOutput

    The output data.

    required state State

    The state manager.

    required"},{"location":"bolts/openai/base/#geniusrise_openai.base.OpenAIFineTuner.delete_fine_tuned_model","title":"delete_fine_tuned_model(model_id) staticmethod","text":"

    Delete a fine-tuned model.

    "},{"location":"bolts/openai/base/#geniusrise_openai.base.OpenAIFineTuner.fine_tune","title":"fine_tune(model, n_epochs, batch_size, learning_rate_multiplier, prompt_loss_weight, suffix=None, wait=False, data_extractor_lambda=None, **kwargs)","text":"

    Fine-tune the model.

    Parameters:

    Name Type Description Default model str

    The pre-trained model name.

    required suffix str

    The suffix to append to the model name.

    None n_epochs int

    Total number of training epochs to perform.

    required batch_size int

    Batch size during training.

    required learning_rate_multiplier int

    Learning rate multiplier.

    required prompt_loss_weight int

    Prompt loss weight.

    required wait bool

    Whether to wait for the fine-tuning to complete. Defaults to False.

    False data_extractor_lambda str

    A lambda function run on each data element to extract the actual data.

    None **kwargs

    Additional keyword arguments for training and data loading.

    {}

    Raises:

    Type Description Exception

    If any step in the fine-tuning process fails.

    "},{"location":"bolts/openai/base/#geniusrise_openai.base.OpenAIFineTuner.get_fine_tuning_job","title":"get_fine_tuning_job(job_id) staticmethod","text":"

    Get the status of a fine-tuning job.

    "},{"location":"bolts/openai/base/#geniusrise_openai.base.OpenAIFineTuner.load_dataset","title":"load_dataset(dataset_path, **kwargs) abstractmethod","text":"

    Load a dataset from a file.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset file.

    required **kwargs

    Additional keyword arguments to pass to the load_dataset method.

    {}

    Returns:

    Name Type Description Dataset Union[Dataset, DatasetDict, Optional[Dataset]]

    The loaded dataset.

    Raises:

    Type Description NotImplementedError

    This method should be overridden by subclasses.

    "},{"location":"bolts/openai/base/#geniusrise_openai.base.OpenAIFineTuner.prepare_fine_tuning_data","title":"prepare_fine_tuning_data(data, data_type)","text":"

    Prepare the given data for fine-tuning.

    Parameters:

    Name Type Description Default data Union[Dataset, DatasetDict, Optional[Dataset]]

    The dataset to prepare.

    required data_type str

    Either 'train' or 'eval' to specify the type of data.

    required

    Raises:

    Type Description ValueError

    If data_type is not 'train' or 'eval'.

    "},{"location":"bolts/openai/base/#geniusrise_openai.base.OpenAIFineTuner.preprocess_data","title":"preprocess_data(**kwargs)","text":"

    Load and preprocess the dataset.

    Raises:

    Type Description Exception

    If any step in the preprocessing fails.

    "},{"location":"bolts/openai/base/#geniusrise_openai.base.OpenAIFineTuner.wait_for_fine_tuning","title":"wait_for_fine_tuning(job_id, check_interval=60)","text":"

    Wait for a fine-tuning job to complete, checking the status every check_interval seconds.

    "},{"location":"bolts/openai/classification/","title":"Classification Fine Tuner","text":"

    Bases: OpenAIFineTuner

    A bolt for fine-tuning OpenAI models for text classification tasks.

    Parameters:

    Name Type Description Default input BatchInput

    The batch input data.

    required output BatchOutput

    The output data.

    required state State

    The state manager.

    required

    CLI Usage:

        genius HuggingFaceCommonsenseReasoningFineTuner rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder train \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder model \\\nfine_tune \\\n--args model_name=my_model tokenizer_name=my_tokenizer num_train_epochs=3 per_device_train_batch_size=8\n

    YAML Configuration:

        version: \"1\"\nbolts:\nmy_fine_tuner:\nname: \"HuggingFaceCommonsenseReasoningFineTuner\"\nmethod: \"fine_tune\"\nargs:\nmodel_name: \"my_model\"\ntokenizer_name: \"my_tokenizer\"\nnum_train_epochs: 3\nper_device_train_batch_size: 8\ndata_max_length: 512\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\nfolder: \"my_dataset\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\nfolder: \"my_model\"\ndeploy:\ntype: k8s\nargs:\nkind: deployment\nname: my_fine_tuner\ncontext_name: arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\nnamespace: geniusrise\nimage: geniusrise/geniusrise\nkube_config_path: ~/.kube/config\n
    Supported Data Formats
    • JSONL
    • CSV
    • Parquet
    • JSON
    • XML
    • YAML
    • TSV
    • Excel (.xls, .xlsx)
    • SQLite (.db)
    • Feather
    "},{"location":"bolts/openai/classification/#geniusrise_openai.classification.OpenAIClassificationFineTuner.load_dataset","title":"load_dataset(dataset_path, **kwargs)","text":"

    Load a classification dataset from a directory.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory.

    required

    Returns:

    Name Type Description Dataset Union[Dataset, DatasetDict, Optional[Dataset]]

    The loaded dataset.

    Raises:

    Type Description Exception

    If there was an error loading the dataset.

    "},{"location":"bolts/openai/classification/#geniusrise_openai.classification.OpenAIClassificationFineTuner.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"bolts/openai/classification/#geniusrise_openai.classification.OpenAIClassificationFineTuner.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\"text\": \"The text content\", \"label\": \"The label\"}\n

    "},{"location":"bolts/openai/classification/#geniusrise_openai.classification.OpenAIClassificationFineTuner.load_dataset--csv","title":"CSV","text":"

    Should contain 'text' and 'label' columns.

    text,label\n\"The text content\",\"The label\"\n

    "},{"location":"bolts/openai/classification/#geniusrise_openai.classification.OpenAIClassificationFineTuner.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'text' and 'label' columns.

    "},{"location":"bolts/openai/classification/#geniusrise_openai.classification.OpenAIClassificationFineTuner.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'text' and 'label' keys.

    [{\"text\": \"The text content\", \"label\": \"The label\"}]\n

    "},{"location":"bolts/openai/classification/#geniusrise_openai.classification.OpenAIClassificationFineTuner.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'text' and 'label' child elements.

    <record>\n<text>The text content</text>\n<label>The label</label>\n</record>\n

    "},{"location":"bolts/openai/classification/#geniusrise_openai.classification.OpenAIClassificationFineTuner.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'text' and 'label' keys.

    - text: \"The text content\"\nlabel: \"The label\"\n

    "},{"location":"bolts/openai/classification/#geniusrise_openai.classification.OpenAIClassificationFineTuner.load_dataset--tsv","title":"TSV","text":"

    Should contain 'text' and 'label' columns separated by tabs.

    "},{"location":"bolts/openai/classification/#geniusrise_openai.classification.OpenAIClassificationFineTuner.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'text' and 'label' columns.

    "},{"location":"bolts/openai/classification/#geniusrise_openai.classification.OpenAIClassificationFineTuner.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'text' and 'label' columns.

    "},{"location":"bolts/openai/classification/#geniusrise_openai.classification.OpenAIClassificationFineTuner.load_dataset--feather","title":"Feather","text":"

    Should contain 'text' and 'label' columns.

    "},{"location":"bolts/openai/commonsense_reasoning/","title":"Natural Language Inference Fine Tuner","text":"

    Bases: OpenAIFineTuner

    A bolt for fine-tuning OpenAI models for commonsense reasoning tasks.

    This bolt uses the OpenAI API to fine-tune a pre-trained model for commonsense reasoning.

    CLI Usage:

        genius HuggingFaceCommonsenseReasoningFineTuner rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder train \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder model \\\nfine_tune \\\n--args model_name=my_model tokenizer_name=my_tokenizer num_train_epochs=3 per_device_train_batch_size=8\n

    YAML Configuration:

        version: \"1\"\nbolts:\nmy_fine_tuner:\nname: \"HuggingFaceCommonsenseReasoningFineTuner\"\nmethod: \"fine_tune\"\nargs:\nmodel_name: \"my_model\"\ntokenizer_name: \"my_tokenizer\"\nnum_train_epochs: 3\nper_device_train_batch_size: 8\ndata_max_length: 512\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\nfolder: \"my_dataset\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\nfolder: \"my_model\"\ndeploy:\ntype: k8s\nargs:\nkind: deployment\nname: my_fine_tuner\ncontext_name: arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\nnamespace: geniusrise\nimage: geniusrise/geniusrise\nkube_config_path: ~/.kube/config\n
    Supported Data Formats
    • JSONL
    • CSV
    • Parquet
    • JSON
    • XML
    • YAML
    • TSV
    • Excel (.xls, .xlsx)
    • SQLite (.db)
    • Feather
    "},{"location":"bolts/openai/commonsense_reasoning/#geniusrise_openai.commonsense_reasoning.OpenAICommonsenseReasoningFineTuner.load_dataset","title":"load_dataset(dataset_path, **kwargs)","text":"

    Load a commonsense reasoning dataset from a directory.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory.

    required **kwargs Any

    Additional keyword arguments.

    {}

    Returns:

    Name Type Description Dataset Union[Dataset, DatasetDict, Optional[Dataset]]

    The loaded dataset.

    Raises:

    Type Description Exception

    If there was an error loading the dataset.

    "},{"location":"bolts/openai/commonsense_reasoning/#geniusrise_openai.commonsense_reasoning.OpenAICommonsenseReasoningFineTuner.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"bolts/openai/commonsense_reasoning/#geniusrise_openai.commonsense_reasoning.OpenAICommonsenseReasoningFineTuner.load_dataset--hugging-face-dataset","title":"Hugging Face Dataset","text":"

    Dataset files saved by the Hugging Face datasets library.

    "},{"location":"bolts/openai/commonsense_reasoning/#geniusrise_openai.commonsense_reasoning.OpenAICommonsenseReasoningFineTuner.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    "},{"location":"bolts/openai/commonsense_reasoning/#geniusrise_openai.commonsense_reasoning.OpenAICommonsenseReasoningFineTuner.load_dataset--csv","title":"CSV","text":"

    Should contain 'premise', 'hypothesis', and 'label' columns.

    "},{"location":"bolts/openai/commonsense_reasoning/#geniusrise_openai.commonsense_reasoning.OpenAICommonsenseReasoningFineTuner.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'premise', 'hypothesis', and 'label' columns.

    "},{"location":"bolts/openai/commonsense_reasoning/#geniusrise_openai.commonsense_reasoning.OpenAICommonsenseReasoningFineTuner.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'premise', 'hypothesis', and 'label' keys.

    "},{"location":"bolts/openai/commonsense_reasoning/#geniusrise_openai.commonsense_reasoning.OpenAICommonsenseReasoningFineTuner.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'premise', 'hypothesis', and 'label' child elements.

    "},{"location":"bolts/openai/commonsense_reasoning/#geniusrise_openai.commonsense_reasoning.OpenAICommonsenseReasoningFineTuner.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'premise', 'hypothesis', and 'label' keys.

    "},{"location":"bolts/openai/commonsense_reasoning/#geniusrise_openai.commonsense_reasoning.OpenAICommonsenseReasoningFineTuner.load_dataset--tsv","title":"TSV","text":"

    Should contain 'premise', 'hypothesis', and 'label' columns separated by tabs.

    "},{"location":"bolts/openai/commonsense_reasoning/#geniusrise_openai.commonsense_reasoning.OpenAICommonsenseReasoningFineTuner.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'premise', 'hypothesis', and 'label' columns.

    "},{"location":"bolts/openai/commonsense_reasoning/#geniusrise_openai.commonsense_reasoning.OpenAICommonsenseReasoningFineTuner.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'premise', 'hypothesis', and 'label' columns.

    "},{"location":"bolts/openai/commonsense_reasoning/#geniusrise_openai.commonsense_reasoning.OpenAICommonsenseReasoningFineTuner.load_dataset--feather","title":"Feather","text":"

    Should contain 'premise', 'hypothesis', and 'label' columns.

    "},{"location":"bolts/openai/commonsense_reasoning/#geniusrise_openai.commonsense_reasoning.OpenAICommonsenseReasoningFineTuner.prepare_fine_tuning_data","title":"prepare_fine_tuning_data(data, data_type)","text":"

    Prepare the given data for fine-tuning.

    Parameters:

    Name Type Description Default data Union[Dataset, DatasetDict, Optional[Dataset]]

    The dataset to prepare.

    required data_type str

    Either 'train' or 'eval' to specify the type of data.

    required

    Raises:

    Type Description ValueError

    If data_type is not 'train' or 'eval'.

    "},{"location":"bolts/openai/instruction_tuning/","title":"Instruction Tuning Fine Tuner","text":"

    Bases: OpenAIFineTuner

    A bolt for fine-tuning OpenAI models on instruction following tasks.

    This bolt uses the OpenAI API to fine-tune a pre-trained model for instruction following tasks.

    Parameters:

    Name Type Description Default input BatchInput

    The batch input data.

    required output BatchOutput

    The output data.

    required state State

    The state manager.

    required

    CLI Usage:

        genius HuggingFaceCommonsenseReasoningFineTuner rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder train \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder model \\\nfine_tune \\\n--args model_name=my_model tokenizer_name=my_tokenizer num_train_epochs=3 per_device_train_batch_size=8\n

    YAML Configuration:

        version: \"1\"\nbolts:\nmy_fine_tuner:\nname: \"HuggingFaceCommonsenseReasoningFineTuner\"\nmethod: \"fine_tune\"\nargs:\nmodel_name: \"my_model\"\ntokenizer_name: \"my_tokenizer\"\nnum_train_epochs: 3\nper_device_train_batch_size: 8\ndata_max_length: 512\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\nfolder: \"my_dataset\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\nfolder: \"my_model\"\ndeploy:\ntype: k8s\nargs:\nkind: deployment\nname: my_fine_tuner\ncontext_name: arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\nnamespace: geniusrise\nimage: geniusrise/geniusrise\nkube_config_path: ~/.kube/config\n
    Supported Data Formats
    • JSONL
    • CSV
    • Parquet
    • JSON
    • XML
    • YAML
    • TSV
    • Excel (.xls, .xlsx)
    • SQLite (.db)
    • Feather
    "},{"location":"bolts/openai/instruction_tuning/#geniusrise_openai.instruction_tuning.OpenAIInstructionFineTuner.load_dataset","title":"load_dataset(dataset_path, **kwargs)","text":"

    Load an instruction following dataset from a directory.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory.

    required **kwargs Any

    Additional keyword arguments.

    {}

    Returns:

    Name Type Description Dataset Union[Dataset, DatasetDict, Optional[Dataset]]

    The loaded dataset.

    Raises:

    Type Description Exception

    If there was an error loading the dataset.

    "},{"location":"bolts/openai/instruction_tuning/#geniusrise_openai.instruction_tuning.OpenAIInstructionFineTuner.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"bolts/openai/instruction_tuning/#geniusrise_openai.instruction_tuning.OpenAIInstructionFineTuner.load_dataset--hugging-face-dataset","title":"Hugging Face Dataset","text":"

    Dataset files saved by the Hugging Face datasets library.

    "},{"location":"bolts/openai/instruction_tuning/#geniusrise_openai.instruction_tuning.OpenAIInstructionFineTuner.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    "},{"location":"bolts/openai/instruction_tuning/#geniusrise_openai.instruction_tuning.OpenAIInstructionFineTuner.load_dataset--csv","title":"CSV","text":"

    Should contain 'instruction' and 'output' columns.

    "},{"location":"bolts/openai/instruction_tuning/#geniusrise_openai.instruction_tuning.OpenAIInstructionFineTuner.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'instruction' and 'output' columns.

    "},{"location":"bolts/openai/instruction_tuning/#geniusrise_openai.instruction_tuning.OpenAIInstructionFineTuner.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'instruction' and 'output' keys.

    "},{"location":"bolts/openai/instruction_tuning/#geniusrise_openai.instruction_tuning.OpenAIInstructionFineTuner.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'instruction' and 'output' child elements.

    "},{"location":"bolts/openai/instruction_tuning/#geniusrise_openai.instruction_tuning.OpenAIInstructionFineTuner.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'instruction' and 'output' keys.

    "},{"location":"bolts/openai/instruction_tuning/#geniusrise_openai.instruction_tuning.OpenAIInstructionFineTuner.load_dataset--tsv","title":"TSV","text":"

    Should contain 'instruction' and 'output' columns separated by tabs.

    "},{"location":"bolts/openai/instruction_tuning/#geniusrise_openai.instruction_tuning.OpenAIInstructionFineTuner.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'instruction' and 'output' columns.

    "},{"location":"bolts/openai/instruction_tuning/#geniusrise_openai.instruction_tuning.OpenAIInstructionFineTuner.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'instruction' and 'output' columns.

    "},{"location":"bolts/openai/instruction_tuning/#geniusrise_openai.instruction_tuning.OpenAIInstructionFineTuner.load_dataset--feather","title":"Feather","text":"

    Should contain 'instruction' and 'output' columns.

    "},{"location":"bolts/openai/instruction_tuning/#geniusrise_openai.instruction_tuning.OpenAIInstructionFineTuner.prepare_fine_tuning_data","title":"prepare_fine_tuning_data(data, data_type)","text":"

    Prepare the given data for fine-tuning.

    Parameters:

    Name Type Description Default data Union[Dataset, DatasetDict, Optional[Dataset]]

    The dataset to prepare.

    required data_type str

    Either 'train' or 'eval' to specify the type of data.

    required

    Raises:

    Type Description ValueError

    If data_type is not 'train' or 'eval'.

    "},{"location":"bolts/openai/language_model/","title":"Language Model Fine Tuner","text":"

    Bases: OpenAIFineTuner

    A bolt for fine-tuning OpenAI models on language modeling tasks.

    This bolt uses the OpenAI API to fine-tune a pre-trained model for language modeling.

    Parameters:

    Name Type Description Default input BatchInput

    The batch input data.

    required output BatchOutput

    The output data.

    required state State

    The state manager.

    required **kwargs

    Additional keyword arguments.

    required

    CLI Usage:

        genius HuggingFaceCommonsenseReasoningFineTuner rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder train \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder model \\\nfine_tune \\\n--args model_name=my_model tokenizer_name=my_tokenizer num_train_epochs=3 per_device_train_batch_size=8\n

    YAML Configuration:

        version: \"1\"\nbolts:\nmy_fine_tuner:\nname: \"HuggingFaceCommonsenseReasoningFineTuner\"\nmethod: \"fine_tune\"\nargs:\nmodel_name: \"my_model\"\ntokenizer_name: \"my_tokenizer\"\nnum_train_epochs: 3\nper_device_train_batch_size: 8\ndata_max_length: 512\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\nfolder: \"my_dataset\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\nfolder: \"my_model\"\ndeploy:\ntype: k8s\nargs:\nkind: deployment\nname: my_fine_tuner\ncontext_name: arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\nnamespace: geniusrise\nimage: geniusrise/geniusrise\nkube_config_path: ~/.kube/config\n
    Supported Data Formats
    • JSONL
    • CSV
    • Parquet
    • JSON
    • XML
    • YAML
    • TSV
    • Excel (.xls, .xlsx)
    • SQLite (.db)
    • Feather
    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.load_dataset","title":"load_dataset(dataset_path, **kwargs)","text":"

    Load a language modeling dataset from a directory.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory.

    required

    Returns:

    Name Type Description Dataset Union[Dataset, DatasetDict, Optional[Dataset]]

    The loaded dataset.

    Raises:

    Type Description Exception

    If there was an error loading the dataset.

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.load_dataset--dataset-files-saved-by-hugging-face-datasets-library","title":"Dataset files saved by Hugging Face datasets library","text":"

    The directory should contain 'dataset_info.json' and other related files.

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\"text\": \"The text content\"}\n

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.load_dataset--csv","title":"CSV","text":"

    Should contain 'text' column.

    text\n\"The text content\"\n

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'text' column.

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'text' key.

    [{\"text\": \"The text content\"}]\n

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'text' child element.

    <record>\n<text>The text content</text>\n</record>\n

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'text' key.

    - text: \"The text content\"\n

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.load_dataset--tsv","title":"TSV","text":"

    Should contain 'text' column separated by tabs.

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'text' column.

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'text' column.

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.load_dataset--feather","title":"Feather","text":"

    Should contain 'text' column.

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.prepare_fine_tuning_data","title":"prepare_fine_tuning_data(data, data_type)","text":"

    Load a language modeling dataset from a directory.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory.

    required masked bool

    Whether to use masked language modeling. Defaults to True.

    required max_length int

    The maximum length for tokenization. Defaults to 512.

    required

    Returns:

    Name Type Description Dataset None

    The loaded dataset.

    Raises:

    Type Description Exception

    If there was an error loading the dataset.

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.prepare_fine_tuning_data--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.prepare_fine_tuning_data--dataset-files-saved-by-hugging-face-datasets-library","title":"Dataset files saved by Hugging Face datasets library","text":"

    The directory should contain 'dataset_info.json' and other related files.

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.prepare_fine_tuning_data--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\"text\": \"The text content\"}\n

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.prepare_fine_tuning_data--csv","title":"CSV","text":"

    Should contain 'text' column.

    text\n\"The text content\"\n

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.prepare_fine_tuning_data--parquet","title":"Parquet","text":"

    Should contain 'text' column.

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.prepare_fine_tuning_data--json","title":"JSON","text":"

    An array of dictionaries with 'text' key.

    [{\"text\": \"The text content\"}]\n

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.prepare_fine_tuning_data--xml","title":"XML","text":"

    Each 'record' element should contain 'text' child element.

    <record>\n<text>The text content</text>\n</record>\n

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.prepare_fine_tuning_data--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'text' key.

    - text: \"The text content\"\n

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.prepare_fine_tuning_data--tsv","title":"TSV","text":"

    Should contain 'text' column separated by tabs.

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.prepare_fine_tuning_data--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'text' column.

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.prepare_fine_tuning_data--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'text' column.

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.prepare_fine_tuning_data--feather","title":"Feather","text":"

    Should contain 'text' column.

    "},{"location":"bolts/openai/ner/","title":"Named Entity Recognition Fine Tuner","text":"

    Bases: OpenAIFineTuner

    A bolt for fine-tuning OpenAI models on named entity recognition tasks.

    This bolt extends the OpenAIFineTuner to handle the specifics of named entity recognition tasks.

    Parameters:

    Name Type Description Default input BatchInput

    The batch input data.

    required output BatchOutput

    The output data.

    required state State

    The state manager.

    required

    CLI Usage:

        genius HuggingFaceCommonsenseReasoningFineTuner rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder train \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder model \\\nfine_tune \\\n--args model_name=my_model tokenizer_name=my_tokenizer num_train_epochs=3 per_device_train_batch_size=8\n

    YAML Configuration:

        version: \"1\"\nbolts:\nmy_fine_tuner:\nname: \"HuggingFaceCommonsenseReasoningFineTuner\"\nmethod: \"fine_tune\"\nargs:\nmodel_name: \"my_model\"\ntokenizer_name: \"my_tokenizer\"\nnum_train_epochs: 3\nper_device_train_batch_size: 8\ndata_max_length: 512\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\nfolder: \"my_dataset\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\nfolder: \"my_model\"\ndeploy:\ntype: k8s\nargs:\nkind: deployment\nname: my_fine_tuner\ncontext_name: arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\nnamespace: geniusrise\nimage: geniusrise/geniusrise\nkube_config_path: ~/.kube/config\n
    Supported Data Formats
    • JSONL
    • CSV
    • Parquet
    • JSON
    • XML
    • YAML
    • TSV
    • Excel (.xls, .xlsx)
    • SQLite (.db)
    • Feather
    "},{"location":"bolts/openai/ner/#geniusrise_openai.ner.NamedEntityRecognitionFineTuner.load_dataset","title":"load_dataset(dataset_path, **kwargs)","text":"

    Load a named entity recognition dataset from a directory.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory.

    required

    Returns:

    Name Type Description DatasetDict Union[Dataset, DatasetDict, None]

    The loaded dataset.

    Raises:

    Type Description Exception

    If there was an error loading the dataset.

    "},{"location":"bolts/openai/ner/#geniusrise_openai.ner.NamedEntityRecognitionFineTuner.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"bolts/openai/ner/#geniusrise_openai.ner.NamedEntityRecognitionFineTuner.load_dataset--hugging-face-dataset","title":"Hugging Face Dataset","text":"

    Dataset files saved by the Hugging Face datasets library.

    "},{"location":"bolts/openai/ner/#geniusrise_openai.ner.NamedEntityRecognitionFineTuner.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\"tokens\": [\"token1\", \"token2\", ...], \"ner_tags\": [0, 1, ...]}\n

    "},{"location":"bolts/openai/ner/#geniusrise_openai.ner.NamedEntityRecognitionFineTuner.load_dataset--csv","title":"CSV","text":"

    Should contain 'tokens' and 'ner_tags' columns.

    tokens,ner_tags\n\"['token1', 'token2', ...]\", \"[0, 1, ...]\"\n

    "},{"location":"bolts/openai/ner/#geniusrise_openai.ner.NamedEntityRecognitionFineTuner.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'tokens' and 'ner_tags' columns.

    "},{"location":"bolts/openai/ner/#geniusrise_openai.ner.NamedEntityRecognitionFineTuner.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'tokens' and 'ner_tags' keys.

    [{\"tokens\": [\"token1\", \"token2\", ...], \"ner_tags\": [0, 1, ...]}]\n

    "},{"location":"bolts/openai/ner/#geniusrise_openai.ner.NamedEntityRecognitionFineTuner.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'tokens' and 'ner_tags' child elements.

    <record>\n<tokens>token1 token2 ...</tokens>\n<ner_tags>0 1 ...</ner_tags>\n</record>\n

    "},{"location":"bolts/openai/ner/#geniusrise_openai.ner.NamedEntityRecognitionFineTuner.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'tokens' and 'ner_tags' keys.

    - tokens: [\"token1\", \"token2\", ...]\nner_tags: [0, 1, ...]\n

    "},{"location":"bolts/openai/ner/#geniusrise_openai.ner.NamedEntityRecognitionFineTuner.load_dataset--tsv","title":"TSV","text":"

    Should contain 'tokens' and 'ner_tags' columns separated by tabs.

    "},{"location":"bolts/openai/ner/#geniusrise_openai.ner.NamedEntityRecognitionFineTuner.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'tokens' and 'ner_tags' columns.

    "},{"location":"bolts/openai/ner/#geniusrise_openai.ner.NamedEntityRecognitionFineTuner.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'tokens' and 'ner_tags' columns.

    "},{"location":"bolts/openai/ner/#geniusrise_openai.ner.NamedEntityRecognitionFineTuner.load_dataset--feather","title":"Feather","text":"

    Should contain 'tokens' and 'ner_tags' columns.

    "},{"location":"bolts/openai/ner/#geniusrise_openai.ner.NamedEntityRecognitionFineTuner.prepare_fine_tuning_data","title":"prepare_fine_tuning_data(data, data_type)","text":"

    Prepare the given data for fine-tuning.

    Parameters:

    Name Type Description Default data Union[Dataset, DatasetDict, Optional[Dataset]]

    The dataset to prepare.

    required data_type str

    Either 'train' or 'eval' to specify the type of data.

    required

    Raises:

    Type Description ValueError

    If data_type is not 'train' or 'eval'.

    "},{"location":"bolts/openai/question_answering/","title":"Question Answering Fine Tuner","text":"

    Bases: OpenAIFineTuner

    A bolt for fine-tuning OpenAI models on question answering tasks.

    CLI Usage:

        genius HuggingFaceCommonsenseReasoningFineTuner rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder train \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder model \\\nfine_tune \\\n--args model_name=my_model tokenizer_name=my_tokenizer num_train_epochs=3 per_device_train_batch_size=8\n

    YAML Configuration:

        version: \"1\"\nbolts:\nmy_fine_tuner:\nname: \"HuggingFaceCommonsenseReasoningFineTuner\"\nmethod: \"fine_tune\"\nargs:\nmodel_name: \"my_model\"\ntokenizer_name: \"my_tokenizer\"\nnum_train_epochs: 3\nper_device_train_batch_size: 8\ndata_max_length: 512\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\nfolder: \"my_dataset\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\nfolder: \"my_model\"\ndeploy:\ntype: k8s\nargs:\nkind: deployment\nname: my_fine_tuner\ncontext_name: arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\nnamespace: geniusrise\nimage: geniusrise/geniusrise\nkube_config_path: ~/.kube/config\n
    Supported Data Formats
    • JSONL
    • CSV
    • Parquet
    • JSON
    • XML
    • YAML
    • TSV
    • Excel (.xls, .xlsx)
    • SQLite (.db)
    • Feather
    "},{"location":"bolts/openai/question_answering/#geniusrise_openai.question_answering.OpenAIQuestionAnsweringFineTuner.load_dataset","title":"load_dataset(dataset_path, **kwargs)","text":"

    Load a dataset from a directory.

    "},{"location":"bolts/openai/question_answering/#geniusrise_openai.question_answering.OpenAIQuestionAnsweringFineTuner.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"bolts/openai/question_answering/#geniusrise_openai.question_answering.OpenAIQuestionAnsweringFineTuner.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\"context\": \"The context content\", \"question\": \"The question\", \"answers\": {\"answer_start\": [int], \"context\": [str]}}\n

    "},{"location":"bolts/openai/question_answering/#geniusrise_openai.question_answering.OpenAIQuestionAnsweringFineTuner.load_dataset--csv","title":"CSV","text":"

    Should contain 'context', 'question', and 'answers' columns.

    context,question,answers\n\"The context content\",\"The question\",\"{'answer_start': [int], 'text': [str]}\"\n

    "},{"location":"bolts/openai/question_answering/#geniusrise_openai.question_answering.OpenAIQuestionAnsweringFineTuner.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'context', 'question', and 'answers' columns.

    "},{"location":"bolts/openai/question_answering/#geniusrise_openai.question_answering.OpenAIQuestionAnsweringFineTuner.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'context', 'question', and 'answers' keys.

    [{\"context\": \"The context content\", \"question\": \"The question\", \"answers\": {\"answer_start\": [int], \"context\": [str]}}]\n

    "},{"location":"bolts/openai/question_answering/#geniusrise_openai.question_answering.OpenAIQuestionAnsweringFineTuner.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'context', 'question', and 'answers' child elements.

    <record>\n<context>The context content</context>\n<question>The question</question>\n<answers answer_start=\"int\" context=\"str\"></answers>\n</record>\n

    "},{"location":"bolts/openai/question_answering/#geniusrise_openai.question_answering.OpenAIQuestionAnsweringFineTuner.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'context', 'question', and 'answers' keys.

    - context: \"The context content\"\nquestion: \"The question\"\nanswers:\nanswer_start: [int]\ncontext: [str]\n

    "},{"location":"bolts/openai/question_answering/#geniusrise_openai.question_answering.OpenAIQuestionAnsweringFineTuner.load_dataset--tsv","title":"TSV","text":"

    Should contain 'context', 'question', and 'answers' columns separated by tabs.

    "},{"location":"bolts/openai/question_answering/#geniusrise_openai.question_answering.OpenAIQuestionAnsweringFineTuner.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'context', 'question', and 'answers' columns.

    "},{"location":"bolts/openai/question_answering/#geniusrise_openai.question_answering.OpenAIQuestionAnsweringFineTuner.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'context', 'question', and 'answers' columns.

    "},{"location":"bolts/openai/question_answering/#geniusrise_openai.question_answering.OpenAIQuestionAnsweringFineTuner.load_dataset--feather","title":"Feather","text":"

    Should contain 'context', 'question', and 'answers' columns.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory.

    required pad_on_right bool

    Whether to pad on the right.

    required max_length int

    The maximum length of the sequences.

    required doc_stride int

    The document stride.

    required evaluate_squadv2 bool

    Whether to evaluate using SQuAD v2 metrics.

    required

    Returns:

    Name Type Description Dataset Union[Dataset, DatasetDict, Optional[Dataset]]

    The loaded dataset.

    "},{"location":"bolts/openai/question_answering/#geniusrise_openai.question_answering.OpenAIQuestionAnsweringFineTuner.prepare_fine_tuning_data","title":"prepare_fine_tuning_data(data, data_type)","text":"

    Prepare the given data for fine-tuning.

    Parameters:

    Name Type Description Default data Union[Dataset, DatasetDict, Optional[Dataset]]

    The dataset to prepare.

    required data_type str

    Either 'train' or 'eval' to specify the type of data.

    required

    Raises:

    Type Description ValueError

    If data_type is not 'train' or 'eval'.

    "},{"location":"bolts/openai/sentiment_analysis/","title":"Sentiment Analysis Fine Tuner","text":"

    Bases: OpenAIFineTuner

    A bolt for fine-tuning OpenAI models on sentiment analysis tasks.

    Parameters:

    Name Type Description Default input BatchInput

    The batch input data.

    required output BatchOutput

    The output data.

    required state State

    The state manager.

    required

    CLI Usage:

        genius HuggingFaceCommonsenseReasoningFineTuner rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder train \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder model \\\nfine_tune \\\n--args model_name=my_model tokenizer_name=my_tokenizer num_train_epochs=3 per_device_train_batch_size=8\n

    YAML Configuration:

        version: \"1\"\nbolts:\nmy_fine_tuner:\nname: \"HuggingFaceCommonsenseReasoningFineTuner\"\nmethod: \"fine_tune\"\nargs:\nmodel_name: \"my_model\"\ntokenizer_name: \"my_tokenizer\"\nnum_train_epochs: 3\nper_device_train_batch_size: 8\ndata_max_length: 512\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\nfolder: \"my_dataset\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\nfolder: \"my_model\"\ndeploy:\ntype: k8s\nargs:\nkind: deployment\nname: my_fine_tuner\ncontext_name: arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\nnamespace: geniusrise\nimage: geniusrise/geniusrise\nkube_config_path: ~/.kube/config\n
    Supported Data Formats
    • JSONL
    • CSV
    • Parquet
    • JSON
    • XML
    • YAML
    • TSV
    • Excel (.xls, .xlsx)
    • SQLite (.db)
    • Feather
    "},{"location":"bolts/openai/sentiment_analysis/#geniusrise_openai.sentiment_analysis.OpenAISentimentAnalysisFineTuner.load_dataset","title":"load_dataset(dataset_path, **kwargs)","text":"

    Load a dataset from a directory.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory.

    required **kwargs Any

    Additional keyword arguments.

    {}

    Returns:

    Type Description Union[Dataset, DatasetDict, Optional[Dataset]]

    Dataset | DatasetDict: The loaded dataset.

    "},{"location":"bolts/openai/sentiment_analysis/#geniusrise_openai.sentiment_analysis.OpenAISentimentAnalysisFineTuner.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"bolts/openai/sentiment_analysis/#geniusrise_openai.sentiment_analysis.OpenAISentimentAnalysisFineTuner.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\"text\": \"The text content\", \"label\": \"The label\"}\n

    "},{"location":"bolts/openai/sentiment_analysis/#geniusrise_openai.sentiment_analysis.OpenAISentimentAnalysisFineTuner.load_dataset--csv","title":"CSV","text":"

    Should contain 'text' and 'label' columns.

    text,label\n\"The text content\",\"The label\"\n

    "},{"location":"bolts/openai/sentiment_analysis/#geniusrise_openai.sentiment_analysis.OpenAISentimentAnalysisFineTuner.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'text' and 'label' columns.

    "},{"location":"bolts/openai/sentiment_analysis/#geniusrise_openai.sentiment_analysis.OpenAISentimentAnalysisFineTuner.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'text' and 'label' keys.

    [{\"text\": \"The text content\", \"label\": \"The label\"}]\n

    "},{"location":"bolts/openai/sentiment_analysis/#geniusrise_openai.sentiment_analysis.OpenAISentimentAnalysisFineTuner.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'text' and 'label' child elements.

    <record>\n<text>The text content</text>\n<label>The label</label>\n</record>\n

    "},{"location":"bolts/openai/sentiment_analysis/#geniusrise_openai.sentiment_analysis.OpenAISentimentAnalysisFineTuner.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'text' and 'label' keys.

    - text: \"The text content\"\nlabel: \"The label\"\n

    "},{"location":"bolts/openai/sentiment_analysis/#geniusrise_openai.sentiment_analysis.OpenAISentimentAnalysisFineTuner.load_dataset--tsv","title":"TSV","text":"

    Should contain 'text' and 'label' columns separated by tabs.

    "},{"location":"bolts/openai/sentiment_analysis/#geniusrise_openai.sentiment_analysis.OpenAISentimentAnalysisFineTuner.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'text' and 'label' columns.

    "},{"location":"bolts/openai/sentiment_analysis/#geniusrise_openai.sentiment_analysis.OpenAISentimentAnalysisFineTuner.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'text' and 'label' columns.

    "},{"location":"bolts/openai/sentiment_analysis/#geniusrise_openai.sentiment_analysis.OpenAISentimentAnalysisFineTuner.load_dataset--feather","title":"Feather","text":"

    Should contain 'text' and 'label' columns.

    "},{"location":"bolts/openai/sentiment_analysis/#geniusrise_openai.sentiment_analysis.OpenAISentimentAnalysisFineTuner.prepare_fine_tuning_data","title":"prepare_fine_tuning_data(data, data_type)","text":"

    Prepare the given data for fine-tuning.

    Parameters:

    Name Type Description Default data Union[Dataset, DatasetDict, Optional[Dataset]]

    The dataset to prepare.

    required data_type str

    Either 'train' or 'eval' to specify the type of data.

    required

    Raises:

    Type Description ValueError

    If data_type is not 'train' or 'eval'.

    "},{"location":"bolts/openai/summarization/","title":"Sentiment Analysis Fine Tuner","text":"

    Bases: OpenAIFineTuner

    A bolt for fine-tuning OpenAI models for summarization tasks.

    This bolt uses the OpenAI API to fine-tune a pre-trained model for summarization.

    Parameters:

    Name Type Description Default input BatchInput

    The batch input data.

    required output BatchOutput

    The output data.

    required state State

    The state manager.

    required

    CLI Usage:

        genius HuggingFaceCommonsenseReasoningFineTuner rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder train \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder model \\\nfine_tune \\\n--args model_name=my_model tokenizer_name=my_tokenizer num_train_epochs=3 per_device_train_batch_size=8\n

    YAML Configuration:

        version: \"1\"\nbolts:\nmy_fine_tuner:\nname: \"HuggingFaceCommonsenseReasoningFineTuner\"\nmethod: \"fine_tune\"\nargs:\nmodel_name: \"my_model\"\ntokenizer_name: \"my_tokenizer\"\nnum_train_epochs: 3\nper_device_train_batch_size: 8\ndata_max_length: 512\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\nfolder: \"my_dataset\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\nfolder: \"my_model\"\ndeploy:\ntype: k8s\nargs:\nkind: deployment\nname: my_fine_tuner\ncontext_name: arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\nnamespace: geniusrise\nimage: geniusrise/geniusrise\nkube_config_path: ~/.kube/config\n
    Supported Data Formats
    • JSONL
    • CSV
    • Parquet
    • JSON
    • XML
    • YAML
    • TSV
    • Excel (.xls, .xlsx)
    • SQLite (.db)
    • Feather
    "},{"location":"bolts/openai/summarization/#geniusrise_openai.summarization.OpenAISummarizationFineTuner.load_dataset","title":"load_dataset(dataset_path, **kwargs)","text":"

    Load a dataset from a directory.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory.

    required **kwargs Any

    Additional keyword arguments.

    {}

    Returns:

    Type Description Optional[DatasetDict]

    Dataset | DatasetDict: The loaded dataset.

    "},{"location":"bolts/openai/summarization/#geniusrise_openai.summarization.OpenAISummarizationFineTuner.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"bolts/openai/summarization/#geniusrise_openai.summarization.OpenAISummarizationFineTuner.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\"text\": \"The text content\", \"summary\": \"The summary\"}\n

    "},{"location":"bolts/openai/summarization/#geniusrise_openai.summarization.OpenAISummarizationFineTuner.load_dataset--csv","title":"CSV","text":"

    Should contain 'text' and 'summary' columns.

    text,summary\n\"The text content\",\"The summary\"\n

    "},{"location":"bolts/openai/summarization/#geniusrise_openai.summarization.OpenAISummarizationFineTuner.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'text' and 'summary' columns.

    "},{"location":"bolts/openai/summarization/#geniusrise_openai.summarization.OpenAISummarizationFineTuner.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'text' and 'summary' keys.

    [{\"text\": \"The text content\", \"summary\": \"The summary\"}]\n

    "},{"location":"bolts/openai/summarization/#geniusrise_openai.summarization.OpenAISummarizationFineTuner.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'text' and 'summary' child elements.

    <record>\n<text>The text content</text>\n<summary>The summary</summary>\n</record>\n

    "},{"location":"bolts/openai/summarization/#geniusrise_openai.summarization.OpenAISummarizationFineTuner.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'text' and 'summary' keys.

    - text: \"The text content\"\nsummary: \"The summary\"\n

    "},{"location":"bolts/openai/summarization/#geniusrise_openai.summarization.OpenAISummarizationFineTuner.load_dataset--tsv","title":"TSV","text":"

    Should contain 'text' and 'summary' columns separated by tabs.

    "},{"location":"bolts/openai/summarization/#geniusrise_openai.summarization.OpenAISummarizationFineTuner.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'text' and 'summary' columns.

    "},{"location":"bolts/openai/summarization/#geniusrise_openai.summarization.OpenAISummarizationFineTuner.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'text' and 'summary' columns.

    "},{"location":"bolts/openai/summarization/#geniusrise_openai.summarization.OpenAISummarizationFineTuner.load_dataset--feather","title":"Feather","text":"

    Should contain 'text' and 'summary' columns.

    "},{"location":"bolts/openai/summarization/#geniusrise_openai.summarization.OpenAISummarizationFineTuner.prepare_fine_tuning_data","title":"prepare_fine_tuning_data(data, data_type)","text":"

    Prepare the given data for fine-tuning.

    Parameters:

    Name Type Description Default data Dataset | DatasetDict | Optional[Dataset]

    The dataset to prepare.

    required data_type str

    Either 'train' or 'eval' to specify the type of data.

    required

    Raises:

    Type Description ValueError

    If data_type is not 'train' or 'eval'.

    "},{"location":"bolts/openai/translation/","title":"Sentiment Analysis Fine Tuner","text":"

    Bases: OpenAIFineTuner

    A bolt for fine-tuning OpenAI models for translation tasks.

    This bolt uses the OpenAI API to fine-tune a pre-trained model for translation.

    Parameters:

    Name Type Description Default input BatchInput

    The batch input data.

    required output BatchOutput

    The output data.

    required state State

    The state manager.

    required

    CLI Usage:

        genius HuggingFaceCommonsenseReasoningFineTuner rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder train \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder model \\\nfine_tune \\\n--args model_name=my_model tokenizer_name=my_tokenizer num_train_epochs=3 per_device_train_batch_size=8\n

    YAML Configuration:

        version: \"1\"\nbolts:\nmy_fine_tuner:\nname: \"HuggingFaceCommonsenseReasoningFineTuner\"\nmethod: \"fine_tune\"\nargs:\nmodel_name: \"my_model\"\ntokenizer_name: \"my_tokenizer\"\nnum_train_epochs: 3\nper_device_train_batch_size: 8\ndata_max_length: 512\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\nfolder: \"my_dataset\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\nfolder: \"my_model\"\ndeploy:\ntype: k8s\nargs:\nkind: deployment\nname: my_fine_tuner\ncontext_name: arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\nnamespace: geniusrise\nimage: geniusrise/geniusrise\nkube_config_path: ~/.kube/config\n
    Supported Data Formats
    • JSONL
    • CSV
    • Parquet
    • JSON
    • XML
    • YAML
    • TSV
    • Excel (.xls, .xlsx)
    • SQLite (.db)
    • Feather
    "},{"location":"bolts/openai/translation/#geniusrise_openai.translation.OpenAITranslationFineTuner.load_dataset","title":"load_dataset(dataset_path, origin='en', target='fr', **kwargs)","text":"

    Load a dataset from a directory.

    "},{"location":"bolts/openai/translation/#geniusrise_openai.translation.OpenAITranslationFineTuner.load_dataset--supported-data-formats-and-structures-for-translation-tasks","title":"Supported Data Formats and Structures for Translation Tasks:","text":""},{"location":"bolts/openai/translation/#geniusrise_openai.translation.OpenAITranslationFineTuner.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\n\"translation\": {\n\"en\": \"English text\",\n\"fr\": \"French text\"\n}\n}\n

    "},{"location":"bolts/openai/translation/#geniusrise_openai.translation.OpenAITranslationFineTuner.load_dataset--csv","title":"CSV","text":"

    Should contain 'en' and 'fr' columns.

    en,fr\n\"English text\",\"French text\"\n

    "},{"location":"bolts/openai/translation/#geniusrise_openai.translation.OpenAITranslationFineTuner.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'en' and 'fr' columns.

    "},{"location":"bolts/openai/translation/#geniusrise_openai.translation.OpenAITranslationFineTuner.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'en' and 'fr' keys.

    [\n{\n\"en\": \"English text\",\n\"fr\": \"French text\"\n}\n]\n

    "},{"location":"bolts/openai/translation/#geniusrise_openai.translation.OpenAITranslationFineTuner.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'en' and 'fr' child elements.

    <record>\n<en>English text</en>\n<fr>French text</fr>\n</record>\n

    "},{"location":"bolts/openai/translation/#geniusrise_openai.translation.OpenAITranslationFineTuner.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'en' and 'fr' keys.

    - en: \"English text\"\nfr: \"French text\"\n

    "},{"location":"bolts/openai/translation/#geniusrise_openai.translation.OpenAITranslationFineTuner.load_dataset--tsv","title":"TSV","text":"

    Should contain 'en' and 'fr' columns separated by tabs.

    "},{"location":"bolts/openai/translation/#geniusrise_openai.translation.OpenAITranslationFineTuner.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'en' and 'fr' columns.

    "},{"location":"bolts/openai/translation/#geniusrise_openai.translation.OpenAITranslationFineTuner.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'en' and 'fr' columns.

    "},{"location":"bolts/openai/translation/#geniusrise_openai.translation.OpenAITranslationFineTuner.load_dataset--feather","title":"Feather","text":"

    Should contain 'en' and 'fr' columns.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the directory containing the dataset files.

    required max_length int

    The maximum length for tokenization. Defaults to 512.

    required origin str

    The origin language. Defaults to 'en'.

    'en' target str

    The target language. Defaults to 'fr'.

    'fr' **kwargs

    Additional keyword arguments.

    {}

    Returns:

    Name Type Description DatasetDict Dataset | DatasetDict | Optional[Dataset]

    The loaded dataset.

    "},{"location":"bolts/openai/translation/#geniusrise_openai.translation.OpenAITranslationFineTuner.prepare_fine_tuning_data","title":"prepare_fine_tuning_data(data, data_type)","text":"

    Prepare the given data for fine-tuning.

    Parameters:

    Name Type Description Default data Dataset | DatasetDict | Optional[Dataset]

    The dataset to prepare.

    required data_type str

    Either 'train' or 'eval' to specify the type of data.

    required

    Raises:

    Type Description ValueError

    If data_type is not 'train' or 'eval'.

    "},{"location":"core/airflow/","title":"Airflow Deployment","text":"

    AirflowRunner is a utility for managing and orchestrating Airflow DAGs. It is designed to provide a command-line interface (CLI) for creating, describing, showing, deleting, and getting the status of Airflow DAGs.

    This class uses the Airflow models to interact with DAGs and DockerOperator to run tasks in Docker containers. It is aimed to simplify the deployment and management of Airflow tasks, providing a straightforward way to deploy DAGs with Docker tasks from the command line.

    CLI Usage

    genius airflow sub-command

    Sub-commands
    • create: Create a new DAG with the given parameters and Docker task. genius airflow create [options]
    • describe: Describe a specific DAG by its ID. genius airflow describe --dag_id example_dag
    • show: Show all available DAGs in the Airflow environment. genius airflow show
    • delete: Delete a specific DAG by its ID. genius airflow delete --dag_id example_dag
    • status: Get the status of a specific DAG by its ID. genius airflow status --dag_id example_dag --airflow_api_base_url http://localhost:8080/api/v1

    Each sub-command supports various options to specify the details of the DAG or the Docker task, such as the schedule interval, start date, owner, image, command, and more.

    Example

    Creating a new DAG:

    genius airflow create --dag_directory ~/airflow/dags \\\n--dag_id my_dag \\\n--image python:3.10-slim \\\n--command \"echo Hello World\"\n

    Attributes:

    Name Type Description dag_directory str

    Directory where DAGs are stored. This path should be known to Airflow.

    Methods
    • create: Method to create a new DAG based on the provided parameters and template.
    • describe: Method to describe a specific DAG by its ID, showing details like tasks and schedule.
    • show: Method to list all available DAGs.
    • delete: Method to remove a specific DAG by its ID from the directory.
    • status: Method to fetch and display the status of a specific DAG using Airflow's REST API.
    Note
    • Ensure that the Airflow environment is properly configured and the specified DAG directory is correct.
    • Make sure that the Airflow REST API base URL is accessible if using the status command.
    "},{"location":"core/airflow/#runners.airflow.generate.AirflowRunner.__init__","title":"__init__()","text":"

    Initialize the AirflowRunner class for managing Airflow DAGs.

    "},{"location":"core/airflow/#runners.airflow.generate.AirflowRunner.create","title":"create(args)","text":"

    Create a new DAG with a Docker task using the provided arguments.

    Parameters:

    Name Type Description Default args Namespace

    Namespace containing all the arguments needed for creating the DAG.

    required"},{"location":"core/airflow/#runners.airflow.generate.AirflowRunner.delete","title":"delete(dag_id)","text":"

    Delete a specific DAG by removing its file from the DAG directory.

    Parameters:

    Name Type Description Default dag_id str

    The ID of the DAG to delete.

    required"},{"location":"core/airflow/#runners.airflow.generate.AirflowRunner.describe","title":"describe(dag_id)","text":"

    Describe the details of a specific DAG.

    Parameters:

    Name Type Description Default dag_id str

    The ID of the DAG to describe.

    required

    Returns:

    Type Description None

    The DAG object if found, None otherwise.

    "},{"location":"core/airflow/#runners.airflow.generate.AirflowRunner.run","title":"run(args)","text":"

    Execute the command based on the parsed arguments.

    "},{"location":"core/airflow/#runners.airflow.generate.AirflowRunner.show","title":"show()","text":"

    Show all available DAGs by listing their IDs.

    "},{"location":"core/airflow/#runners.airflow.generate.AirflowRunner.status","title":"status(dag_id, airflow_api_base_url)","text":"

    Get the status of a specific DAG using Airflow's REST API.

    Parameters:

    Name Type Description Default dag_id str

    The ID of the DAG to get the status of.

    required airflow_api_base_url str

    URL of airflow for calling its APIs.

    required"},{"location":"core/cli_boltctl/","title":"Boltctl","text":"

    The main bolt controller

    "},{"location":"core/cli_boltctl/#cli.boltctl.BoltCtl","title":"BoltCtl","text":"

    Class for managing bolts end-to-end from the command line.

    "},{"location":"core/cli_boltctl/#cli.boltctl.BoltCtl.__init__","title":"__init__(discovered_bolt)","text":"

    Initialize BoltCtl with a DiscoveredBolt object.

    Parameters:

    Name Type Description Default discovered_bolt DiscoveredBolt

    DiscoveredBolt object used to create and manage bolts.

    required"},{"location":"core/cli_boltctl/#cli.boltctl.BoltCtl.create_bolt","title":"create_bolt(input_type, output_type, state_type, id, **kwargs)","text":"

    Create a bolt of a specific type.

    Parameters:

    Name Type Description Default input_type str

    The type of input (\"batch\" or \"streaming\").

    required output_type str

    The type of output (\"batch\" or \"streaming\").

    required state_type str

    The type of state manager (\"none\", \"redis\", \"postgres\", or \"dynamodb\").

    required **kwargs

    Additional keyword arguments for initializing the bolt.

    Keyword Arguments:\n    Batch input:\n    - input_folder (str): The input folder argument.\n    - input_s3_bucket (str): The input bucket argument.\n    - input_s3_folder (str): The input S3 folder argument.\n    Batch output:\n    - output_folder (str): The output folder argument.\n    - output_s3_bucket (str): The output bucket argument.\n    - output_s3_folder (str): The output S3 folder argument.\n    Streaming input:\n    - input_kafka_cluster_connection_string (str): The input Kafka servers argument.\n    - input_kafka_topic (str): The input kafka topic argument.\n    - input_kafka_consumer_group_id (str): The Kafka consumer group id.\n    Streaming output:\n    - output_kafka_cluster_connection_string (str): The output Kafka servers argument.\n    - output_kafka_topic (str): The output kafka topic argument.\n    Redis state manager config:\n    - redis_host (str): The Redis host argument.\n    - redis_port (str): The Redis port argument.\n    - redis_db (str): The Redis database argument.\n    Postgres state manager config:\n    - postgres_host (str): The PostgreSQL host argument.\n    - postgres_port (str): The PostgreSQL port argument.\n    - postgres_user (str): The PostgreSQL user argument.\n    - postgres_password (str): The PostgreSQL password argument.\n    - postgres_database (str): The PostgreSQL database argument.\n    - postgres_table (str): The PostgreSQL table argument.\n    DynamoDB state manager config:\n    - dynamodb_table_name (str): The DynamoDB table name argument.\n    - dynamodb_region_name (str): The DynamoDB region name argument.\n

    {}

    Returns:

    Name Type Description Bolt Bolt

    The created bolt.

    "},{"location":"core/cli_boltctl/#cli.boltctl.BoltCtl.create_parser","title":"create_parser(parser)","text":"

    Add arguments to the command-line parser for managing the bolt.

    Parameters:

    Name Type Description Default parser argparse.ArgumentParser

    Command-line parser.

    required"},{"location":"core/cli_boltctl/#cli.boltctl.BoltCtl.deploy_bolt","title":"deploy_bolt(args)","text":"

    Deploy a spout of a specific type.

    Parameters:

    Name Type Description Default **kwargs

    Additional keyword arguments for initializing the spout.

    Keyword Arguments:\n    Batch input:\n    - input_folder (str): The input folder argument.\n    - input_s3_bucket (str): The input bucket argument.\n    - input_s3_folder (str): The input S3 folder argument.\n    Batch outupt:\n    - output_folder (str): The output folder argument.\n    - output_s3_bucket (str): The output bucket argument.\n    - output_s3_folder (str): The output S3 folder argument.\n    Streaming input:\n    - input_kafka_cluster_connection_string (str): The input Kafka servers argument.\n    - input_kafka_topic (str): The input kafka topic argument.\n    - input_kafka_consumer_group_id (str): The Kafka consumer group id.\n    Streaming output:\n    - output_kafka_cluster_connection_string (str): The output Kafka servers argument.\n    - output_kafka_topic (str): The output kafka topic argument.\n    Redis state manager config:\n    - redis_host (str): The host address for the Redis server.\n    - redis_port (int): The port number for the Redis server.\n    - redis_db (int): The Redis database to be used.\n    Postgres state manager config:\n    - postgres_host (str): The host address for the PostgreSQL server.\n    - postgres_port (int): The port number for the PostgreSQL server.\n    - postgres_user (str): The username for the PostgreSQL server.\n    - postgres_password (str): The password for the PostgreSQL server.\n    - postgres_database (str): The PostgreSQL database to be used.\n    - postgres_table (str): The PostgreSQL table to be used.\n    DynamoDB state manager config:\n    - dynamodb_table_name (str): The name of the DynamoDB table.\n    - dynamodb_region_name (str): The AWS region for DynamoDB.\n    Deployment\n    - k8s_kind (str): Kind opf kubernetes resource to be deployed as, choices are \"deployment\", \"service\", \"job\", \"cron_job\"\n    - k8s_name (str): Name of the Kubernetes resource.\n    - k8s_image (str): Docker image for the Kubernetes resource.\n    - k8s_replicas (int): Number of replicas.\n    - k8s_env_vars (json): Environment variables as a JSON string.\n    - k8s_cpu (str): CPU requirements.\n    - k8s_memory (str): Memory requirements.\n    - k8s_storage (str): Storage requirements.\n    - k8s_gpu (str): GPU requirements.\n    - k8s_kube_config_path (str): Name of the Kubernetes cluster local config.\n    - k8s_api_key (str): GPU requirements.\n    - k8s_api_host (str): GPU requirements.\n    - k8s_verify_ssl (str): GPU requirements.\n    - k8s_ssl_ca_cert (str): GPU requirements.\n    - k8s_cluster_name (str): Name of the Kubernetes cluster.\n    - k8s_context_name (str): Name of the kubeconfig context.\n    - k8s_namespace (str): Kubernetes namespace.\", default=\"default\n    - k8s_labels (json): Labels for Kubernetes resources, as a JSON string.\n    - k8s_annotations (json): Annotations for Kubernetes resources, as a JSON string.\n    - k8s_port (int): Port to run the spout on as a service.\n    - k8s_target_port (int): Port to expose the spout on as a service.\n    - k8s_schedule (str): Schedule to run the spout on as a cron job.\n

    required"},{"location":"core/cli_boltctl/#cli.boltctl.BoltCtl.execute_bolt","title":"execute_bolt(bolt, method_name, *args, **kwargs)","text":"

    Execute a method of a bolt.

    Parameters:

    Name Type Description Default bolt Bolt

    The bolt to execute.

    required method_name str

    The name of the method to execute.

    required *args

    Positional arguments to pass to the method.

    () **kwargs

    Keyword arguments to pass to the method.

    {}

    Returns:

    Name Type Description Any

    The result of the method.

    "},{"location":"core/cli_boltctl/#cli.boltctl.BoltCtl.run","title":"run(args)","text":"

    Run the command-line interface.

    Parameters:

    Name Type Description Default args argparse.Namespace

    Parsed command-line arguments.

    required"},{"location":"core/cli_discover/","title":"Discover","text":"

    Module discovery

    "},{"location":"core/cli_discover/#cli.discover.Discover","title":"Discover","text":""},{"location":"core/cli_discover/#cli.discover.Discover.__init__","title":"__init__(directory=None)","text":"

    Initialize the Discover class.

    "},{"location":"core/cli_discover/#cli.discover.Discover.discover_geniusrise_installed_modules","title":"discover_geniusrise_installed_modules()","text":"

    Discover installed geniusrise modules from Python path directories.

    "},{"location":"core/cli_discover/#cli.discover.Discover.find_classes","title":"find_classes(module)","text":"

    Discover spout/bolt classes in a module.

    Parameters:

    Name Type Description Default module Any

    Module to scan for spout/bolt classes.

    required"},{"location":"core/cli_discover/#cli.discover.Discover.get_geniusignore_patterns","title":"get_geniusignore_patterns(directory) staticmethod","text":"

    Read the .geniusignore file and return a list of patterns to ignore.

    Parameters:

    Name Type Description Default directory str

    Directory containing the .geniusignore file.

    required

    Returns:

    Type Description List[str]

    List[str]: List of patterns to ignore.

    "},{"location":"core/cli_discover/#cli.discover.Discover.get_init_args","title":"get_init_args(cls)","text":"

    Extract initialization arguments of a class.

    Parameters:

    Name Type Description Default cls type

    Class to extract initialization arguments from.

    required

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: Initialization arguments.

    "},{"location":"core/cli_discover/#cli.discover.Discover.import_module","title":"import_module(path)","text":"

    Import a module given its path.

    Parameters:

    Name Type Description Default path str

    Path to the module.

    required

    Returns:

    Name Type Description Any

    Imported module.

    "},{"location":"core/cli_discover/#cli.discover.Discover.scan_directory","title":"scan_directory(directory=None)","text":"

    Scan for spouts/bolts in installed extensions and user's codebase.

    Parameters:

    Name Type Description Default directory Optional[str]

    Directory to scan for user-defined spouts/bolts.

    None

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: Discovered spouts/bolts.

    "},{"location":"core/cli_dockerctl/","title":"Dockerctl","text":""},{"location":"core/cli_dockerctl/#cli.dockerctl.DockerCtl","title":"DockerCtl","text":"

    This class manages the creation and uploading of Docker containers.

    Attributes:

    Name Type Description base_image str

    The base image to use for the Docker container.

    workdir str

    The working directory in the Docker container.

    local_dir str

    The local directory to copy into the Docker container.

    packages List[str]

    List of packages to install in the Docker container.

    os_packages List[str]

    List of OS packages to install in the Docker container.

    env_vars Dict[str, str]

    Environment variables to set in the Docker container.

    Command-Line Interface

    genius docker package [options]

    Parameters:

    Name Type Description Default - <image_name>

    The name of the Docker image to build and upload.

    required - <repository>

    The container repository to upload to (e.g., \"ECR\", \"DockerHub\", \"Quay\", \"ACR\", \"GCR\").

    required Options
    • --auth: Authentication credentials as a JSON string. Default is an empty JSON object.
    • --base_image: The base image to use for the Docker container. Default is \"nvidia/cuda:12.2.0-runtime-ubuntu20.04\".
    • --workdir: The working directory in the Docker container. Default is \"/app\".
    • --local_dir: The local directory to copy into the Docker container. Default is \".\".
    • --packages: List of Python packages to install in the Docker container. Default is an empty list.
    • --os_packages: List of OS packages to install in the Docker container. Default is an empty list.
    • --env_vars: Environment variables to set in the Docker container. Default is an empty dictionary.
    Authentication Details
    • ECR: {\"aws_region\": \"ap-south-1\", \"aws_secret_access_key\": \"aws_key\", \"aws_access_key_id\": \"aws_secret\"}
    • DockerHub: {\"dockerhub_username\": \"username\", \"dockerhub_password\": \"password\"}
    • ACR: {\"acr_username\": \"username\", \"acr_password\": \"password\", \"acr_login_server\": \"login_server\"}
    • GCR: {\"gcr_key_file_path\": \"/path/to/keyfile.json\", \"gcr_repository\": \"repository\"}
    • Quay: {\"quay_username\": \"username\", \"quay_password\": \"password\"}
    "},{"location":"core/cli_dockerctl/#cli.dockerctl.DockerCtl--examples","title":"Examples","text":""},{"location":"core/cli_dockerctl/#cli.dockerctl.DockerCtl--uploading-to-ecr-amazon-elastic-container-registry","title":"Uploading to ECR (Amazon Elastic Container Registry)","text":"
    genius docker package geniusrise ecr --auth '{\"aws_region\": \"ap-south-1\"}'\n
    "},{"location":"core/cli_dockerctl/#cli.dockerctl.DockerCtl--uploading-to-dockerhub","title":"Uploading to DockerHub","text":"
    genius docker package geniusrise dockerhub --auth '{\"dockerhub_username\": \"username\", \"dockerhub_password\": \"password\"}'\n

    This is how we upload to dockerhub:

    export DOCKERHUB_USERNAME=\nexport DOCKERHUB_PASSWORD=\ngenius docker package geniusrise dockerhub \\\n--packages geniusrise-listeners geniusrise-databases geniusrise-huggingface geniusrise-openai \\\n--os_packages libmysqlclient-dev libldap2-dev libsasl2-dev libssl-dev\n
    genius docker package geniusrise-core dockerhub\n
    "},{"location":"core/cli_dockerctl/#cli.dockerctl.DockerCtl--uploading-to-acr-azure-container-registry","title":"Uploading to ACR (Azure Container Registry)","text":"
    genius docker package geniusrise acr --auth '{\"acr_username\": \"username\", \"acr_password\": \"password\", \"acr_login_server\": \"login_server\"}'\n
    "},{"location":"core/cli_dockerctl/#cli.dockerctl.DockerCtl--uploading-to-gcr-google-container-registry","title":"Uploading to GCR (Google Container Registry)","text":"
    genius docker package geniusrise gcr --auth '{\"gcr_key_file_path\": \"/path/to/keyfile.json\", \"gcr_repository\": \"repository\"}'\n
    "},{"location":"core/cli_dockerctl/#cli.dockerctl.DockerCtl.__init__","title":"__init__()","text":"

    Initialize the DockerContainerManager with logging.

    "},{"location":"core/cli_dockerctl/#cli.dockerctl.DockerCtl.build_image","title":"build_image(image_name, dockerfile_path)","text":"

    Build a Docker image based on the provided Dockerfile.

    Parameters:

    Name Type Description Default image_name str

    The name to give to the built Docker image.

    required dockerfile_path str

    The path to the Dockerfile to use for building the image.

    required

    Returns:

    Name Type Description bool

    True if the build was successful, False otherwise.

    "},{"location":"core/cli_dockerctl/#cli.dockerctl.DockerCtl.create_dockerfile","title":"create_dockerfile()","text":"

    Create a Dockerfile based on the class attributes.

    Returns:

    Name Type Description str str

    The path to the created Dockerfile.

    "},{"location":"core/cli_dockerctl/#cli.dockerctl.DockerCtl.create_parser","title":"create_parser(parser)","text":"

    Add arguments to the command-line parser for managing Docker containers.

    Parameters:

    Name Type Description Default parser argparse.ArgumentParser

    Command-line parser.

    required

    Returns:

    Type Description argparse.ArgumentParser

    argparse.ArgumentParser: The updated parser.

    "},{"location":"core/cli_dockerctl/#cli.dockerctl.DockerCtl.run","title":"run(args)","text":"

    Run the command-line interface.

    Parameters:

    Name Type Description Default args argparse.Namespace

    Parsed command-line arguments.

    required"},{"location":"core/cli_dockerctl/#cli.dockerctl.DockerCtl.upload_to_acr","title":"upload_to_acr(image_name, auth)","text":"

    Upload the Docker image to Azure Container Registry (ACR).

    Parameters:

    Name Type Description Default image_name str

    The name of the Docker image to upload.

    required auth dict

    Authentication credentials for ACR.

    required

    Returns:

    Name Type Description bool bool

    True if the upload was successful, False otherwise.

    "},{"location":"core/cli_dockerctl/#cli.dockerctl.DockerCtl.upload_to_dockerhub","title":"upload_to_dockerhub(image_name, auth)","text":"

    Upload the Docker image to DockerHub.

    Parameters:

    Name Type Description Default image_name str

    The name of the Docker image to upload.

    required auth dict

    Authentication credentials for DockerHub.

    required

    Returns:

    Name Type Description bool bool

    True if the upload was successful, False otherwise.

    "},{"location":"core/cli_dockerctl/#cli.dockerctl.DockerCtl.upload_to_ecr","title":"upload_to_ecr(image_name, auth, ecr_repo=None)","text":"

    Upload the Docker image to Amazon Elastic Container Registry (ECR).

    Parameters:

    Name Type Description Default image_name str

    The name of the Docker image to upload.

    required auth dict

    Authentication credentials for ECR.

    required ecr_repo Optional[str]

    The ECR repository to upload to. If not provided, it will be generated.

    None

    Returns:

    Name Type Description bool bool

    True if the upload was successful, False otherwise.

    "},{"location":"core/cli_dockerctl/#cli.dockerctl.DockerCtl.upload_to_gcr","title":"upload_to_gcr(image_name, auth)","text":"

    Upload the Docker image to Google Container Registry (GCR).

    Parameters:

    Name Type Description Default image_name str

    The name of the Docker image to upload.

    required auth dict

    Authentication credentials for GCR.

    required

    Returns:

    Name Type Description bool bool

    True if the upload was successful, False otherwise.

    "},{"location":"core/cli_dockerctl/#cli.dockerctl.DockerCtl.upload_to_quay","title":"upload_to_quay(image_name, auth)","text":"

    Upload the Docker image to Quay.io.

    Parameters:

    Name Type Description Default image_name str

    The name of the Docker image to upload.

    required auth dict

    Authentication credentials for Quay.io.

    required

    Returns:

    Name Type Description bool bool

    True if the upload was successful, False otherwise.

    "},{"location":"core/cli_dockerctl/#cli.dockerctl.DockerCtl.upload_to_repository","title":"upload_to_repository(image_name, repository, auth={})","text":"

    Upload the Docker image to a specified container repository.

    Parameters:

    Name Type Description Default image_name str

    The name of the Docker image to upload.

    required repository str

    The container repository to upload to (e.g., \"ECR\", \"DockerHub\", \"Quay\").

    required auth dict

    Authentication credentials for the container repository. Defaults to None.

    {}

    Returns:

    Name Type Description bool bool

    True if the upload was successful, False otherwise.

    "},{"location":"core/cli_geniusctl/","title":"Geniusctl","text":"

    The main command line application

    "},{"location":"core/cli_geniusctl/#cli.geniusctl.GeniusCtl","title":"GeniusCtl","text":"

    Main class for managing the geniusrise CLI application.

    "},{"location":"core/cli_geniusctl/#cli.geniusctl.GeniusCtl.__init__","title":"__init__()","text":"

    Initialize GeniusCtl.v

    Parameters:

    Name Type Description Default directory str

    The directory to scan for spouts and bolts.

    required"},{"location":"core/cli_geniusctl/#cli.geniusctl.GeniusCtl.cli","title":"cli()","text":"

    Main function to be called when geniusrise is run from the command line.

    "},{"location":"core/cli_geniusctl/#cli.geniusctl.GeniusCtl.create_parser","title":"create_parser()","text":"

    Create a command-line parser with arguments for managing the application.

    Returns:

    Type Description

    argparse.ArgumentParser: Command-line parser.

    "},{"location":"core/cli_geniusctl/#cli.geniusctl.GeniusCtl.list_spouts_and_bolts","title":"list_spouts_and_bolts(verbose=False)","text":"

    List all discovered spouts and bolts in a table.

    "},{"location":"core/cli_geniusctl/#cli.geniusctl.GeniusCtl.run","title":"run(args)","text":"

    Run the command-line interface.

    Parameters:

    Name Type Description Default args argparse.Namespace

    Parsed command-line arguments.

    required"},{"location":"core/cli_schema/","title":"YAML schema","text":"

    YAML schema definition as pydantic

    "},{"location":"core/cli_schema/#cli.schema.Bolt","title":"Bolt","text":"

    Bases: BaseModel

    This class defines a bolt. A bolt has a name, method, optional arguments, input, output, state, and deployment.

    "},{"location":"core/cli_schema/#cli.schema.Deploy","title":"Deploy","text":"

    Bases: BaseModel

    This class defines the deployment of the spout or bolt. The deployment can be of type k8s or ecs.

    "},{"location":"core/cli_schema/#cli.schema.DeployArgs","title":"DeployArgs","text":"

    Bases: BaseModel

    This class defines the arguments for the deployment. Depending on the type of deployment (k8s, ecs), different arguments are required.

    "},{"location":"core/cli_schema/#cli.schema.ExtraKwargs","title":"ExtraKwargs","text":"

    Bases: BaseModel

    This class is used to handle any extra arguments that are not explicitly defined in the schema.

    "},{"location":"core/cli_schema/#cli.schema.Geniusfile","title":"Geniusfile","text":"

    Bases: BaseModel

    This class defines the overall structure of the YAML file. It includes a version, spouts, and bolts.

    "},{"location":"core/cli_schema/#cli.schema.Input","title":"Input","text":"

    Bases: BaseModel

    This class defines the input of the bolt. The input can be of type batch, streaming, spout, or bolt.

    "},{"location":"core/cli_schema/#cli.schema.InputArgs","title":"InputArgs","text":"

    Bases: BaseModel

    This class defines the arguments for the input. Depending on the type of input (batch, streaming, spout, bolt), different arguments are required.

    "},{"location":"core/cli_schema/#cli.schema.Output","title":"Output","text":"

    Bases: BaseModel

    This class defines the output of the spout or bolt. The output can be of type batch or streaming.

    "},{"location":"core/cli_schema/#cli.schema.OutputArgs","title":"OutputArgs","text":"

    Bases: BaseModel

    This class defines the arguments for the output. Depending on the type of output (batch, streaming), different arguments are required.

    "},{"location":"core/cli_schema/#cli.schema.Spout","title":"Spout","text":"

    Bases: BaseModel

    This class defines a spout. A spout has a name, method, optional arguments, output, state, and deployment.

    "},{"location":"core/cli_schema/#cli.schema.State","title":"State","text":"

    Bases: BaseModel

    This class defines the state of the spout or bolt. The state can be of type none, redis, postgres, or dynamodb.

    "},{"location":"core/cli_schema/#cli.schema.StateArgs","title":"StateArgs","text":"

    Bases: BaseModel

    This class defines the arguments for the state. Depending on the type of state (none, redis, postgres, dynamodb), different arguments are required.

    "},{"location":"core/cli_spoutctl/","title":"Spoutctl","text":"

    The main spout controller

    "},{"location":"core/cli_spoutctl/#cli.spoutctl.SpoutCtl","title":"SpoutCtl","text":"

    Class for managing spouts end-to-end from the command line.

    "},{"location":"core/cli_spoutctl/#cli.spoutctl.SpoutCtl.__init__","title":"__init__(discovered_spout)","text":"

    Initialize SpoutCtl with a DiscoveredSpout object.

    Parameters:

    Name Type Description Default discovered_spout DiscoveredSpout

    DiscoveredSpout object used to create and manage spouts.

    required"},{"location":"core/cli_spoutctl/#cli.spoutctl.SpoutCtl.create_parser","title":"create_parser(parser)","text":"

    Add arguments to the command-line parser for managing the spout.

    Parameters:

    Name Type Description Default parser argparse.ArgumentParser

    Command-line parser.

    required"},{"location":"core/cli_spoutctl/#cli.spoutctl.SpoutCtl.create_spout","title":"create_spout(output_type, state_type, id, **kwargs)","text":"

    Create a spout of a specific type.

    Parameters:

    Name Type Description Default output_type str

    The type of output (\"batch\" or \"streaming\").

    required state_type str

    The type of state manager (\"none\", \"redis\", \"postgres\", or \"dynamodb\").

    required **kwargs

    Additional keyword arguments for initializing the spout.

    Keyword Arguments:\n    Batch output:\n    - output_folder (str): The directory where output files should be stored temporarily.\n    - output_s3_bucket (str): The name of the S3 bucket for output storage.\n    - output_s3_folder (str): The S3 folder for output storage.\n    Streaming output:\n    - output_kafka_topic (str): Kafka output topic for streaming spouts.\n    - output_kafka_cluster_connection_string (str): Kafka connection string for streaming spouts.\n    Stream to Batch output:\n    - output_folder (str): The directory where output files should be stored temporarily.\n    - output_s3_bucket (str): The name of the S3 bucket for output storage.\n    - output_s3_folder (str): The S3 folder for output storage.\n    - buffer_size (int): Number of messages to buffer.\n    Redis state manager config:\n    - redis_host (str): The host address for the Redis server.\n    - redis_port (int): The port number for the Redis server.\n    - redis_db (int): The Redis database to be used.\n    Postgres state manager config:\n    - postgres_host (str): The host address for the PostgreSQL server.\n    - postgres_port (int): The port number for the PostgreSQL server.\n    - postgres_user (str): The username for the PostgreSQL server.\n    - postgres_password (str): The password for the PostgreSQL server.\n    - postgres_database (str): The PostgreSQL database to be used.\n    - postgres_table (str): The PostgreSQL table to be used.\n    DynamoDB state manager config:\n    - dynamodb_table_name (str): The name of the DynamoDB table.\n    - dynamodb_region_name (str): The AWS region for DynamoDB.\n

    {}

    Returns:

    Name Type Description Spout Spout

    The created spout.

    "},{"location":"core/cli_spoutctl/#cli.spoutctl.SpoutCtl.deploy_spout","title":"deploy_spout(args)","text":"

    Deploy a spout of a specific type.

    Parameters:

    Name Type Description Default **kwargs

    Additional keyword arguments for initializing the spout.

    Keyword Arguments:\n    Batch output:\n    - output_folder (str): The directory where output files should be stored temporarily.\n    - output_s3_bucket (str): The name of the S3 bucket for output storage.\n    - output_s3_folder (str): The S3 folder for output storage.\n    Streaming output:\n    - output_kafka_topic (str): Kafka output topic for streaming spouts.\n    - output_kafka_cluster_connection_string (str): Kafka connection string for streaming spouts.\n    Stream to Batch output:\n    - output_folder (str): The directory where output files should be stored temporarily.\n    - output_s3_bucket (str): The name of the S3 bucket for output storage.\n    - output_s3_folder (str): The S3 folder for output storage.\n    - buffer_size (int): Number of messages to buffer.\n    Redis state manager config:\n    - redis_host (str): The host address for the Redis server.\n    - redis_port (int): The port number for the Redis server.\n    - redis_db (int): The Redis database to be used.\n    Postgres state manager config:\n    - postgres_host (str): The host address for the PostgreSQL server.\n    - postgres_port (int): The port number for the PostgreSQL server.\n    - postgres_user (str): The username for the PostgreSQL server.\n    - postgres_password (str): The password for the PostgreSQL server.\n    - postgres_database (str): The PostgreSQL database to be used.\n    - postgres_table (str): The PostgreSQL table to be used.\n    DynamoDB state manager config:\n    - dynamodb_table_name (str): The name of the DynamoDB table.\n    - dynamodb_region_name (str): The AWS region for DynamoDB.\n    Deployment\n    - k8s_kind (str): Kind opf kubernetes resource to be deployed as, choices are \"deployment\", \"service\", \"job\", \"cron_job\"\n    - k8s_name (str): Name of the Kubernetes resource.\n    - k8s_image (str): Docker image for the Kubernetes resource.\n    - k8s_replicas (int): Number of replicas.\n    - k8s_env_vars (json): Environment variables as a JSON string.\n    - k8s_cpu (str): CPU requirements.\n    - k8s_memory (str): Memory requirements.\n    - k8s_storage (str): Storage requirements.\n    - k8s_gpu (str): GPU requirements.\n    - k8s_kube_config_path (str): Name of the Kubernetes cluster local config.\n    - k8s_api_key (str): GPU requirements.\n    - k8s_api_host (str): GPU requirements.\n    - k8s_verify_ssl (str): GPU requirements.\n    - k8s_ssl_ca_cert (str): GPU requirements.\n    - k8s_cluster_name (str): Name of the Kubernetes cluster.\n    - k8s_context_name (str): Name of the kubeconfig context.\n    - k8s_namespace (str): Kubernetes namespace.\", default=\"default\n    - k8s_labels (json): Labels for Kubernetes resources, as a JSON string.\n    - k8s_annotations (json): Annotations for Kubernetes resources, as a JSON string.\n    - k8s_port (int): Port to run the spout on as a service.\n    - k8s_target_port (int): Port to expose the spout on as a service.\n    - k8s_schedule (str): Schedule to run the spout on as a cron job.\n

    required"},{"location":"core/cli_spoutctl/#cli.spoutctl.SpoutCtl.execute_spout","title":"execute_spout(spout, method_name, *args, **kwargs)","text":"

    Execute a method of a spout.

    Parameters:

    Name Type Description Default spout Spout

    The spout to execute.

    required method_name str

    The name of the method to execute.

    required *args

    Positional arguments to pass to the method.

    () **kwargs

    Keyword arguments to pass to the method.

    {}

    Returns:

    Name Type Description Any

    The result of the method.

    "},{"location":"core/cli_spoutctl/#cli.spoutctl.SpoutCtl.run","title":"run(args)","text":"

    Run the command-line interface.

    Parameters:

    Name Type Description Default args argparse.Namespace

    Parsed command-line arguments.

    required"},{"location":"core/cli_yamlctl/","title":"YamlCtl","text":"

    Control spouts and bolts defined in a YAML file

    "},{"location":"core/cli_yamlctl/#cli.yamlctl.YamlCtl","title":"YamlCtl","text":"

    Command-line interface for managing spouts and bolts based on a YAML configuration.

    The YamlCtl class provides methods to run specific or all spouts and bolts defined in a YAML file. The YAML file's structure is defined by the Geniusfile schema.

    Example YAML structures:

    version: 1\nspouts:\nhttp_listener:\nname: WebhookListener\nmethod: listen\nargs:\nport: 8081\nstate:\ntype: redis\nargs:\nredis_host: \"127.0.0.1\"\nredis_port: 6379\nredis_db: 0\noutput:\ntype: batch\nargs:\nbucket: geniusrise-test\nfolder: train\ndeploy:\ntype: k8s\nargs:\nkind: deployment\nname: webhook-listener\ncontext_name: arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\nnamespace: geniusrise\nimage: geniusrise/geniusrise\nkube_config_path: ~/.kube/config\nbolts:\ntext_classifier:\nname: TextClassifier\nmethod: classify\nargs:\nmodel_name: bert-base-uncased\nstate:\ntype: none\ninput:\ntype: batch\nargs:\nbucket: geniusrise-test\nfolder: train\noutput:\ntype: batch\nargs:\nbucket: geniusrise-test\nfolder: model\ndeploy:\ntype: k8s\nargs:\nkind: deployment\nname: text-classifier\ncontext_name: arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\nnamespace: geniusrise\nimage: geniusrise/geniusrise\nkube_config_path: ~/.kube/config\n
    version: 1\nspouts:\ntwitter_stream:\nname: TwitterStream\nmethod: stream\nargs:\napi_key: \"your_twitter_api_key\"\nhashtags: [\"#AI\", \"#ML\"]\nstate:\ntype: postgres\nargs:\npostgres_host: \"127.0.0.1\"\npostgres_port: 5432\npostgres_user: \"postgres\"\npostgres_password: \"postgres\"\npostgres_database: \"geniusrise\"\npostgres_table: \"twitter_data\"\noutput:\ntype: streaming\nargs:\noutput_topic: twitter_topic\nkafka_servers: \"localhost:9092\"\ndeploy:\ntype: k8s\nargs:\nkind: deployment\nname: twitter-stream\ncontext_name: arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\nnamespace: geniusrise\nimage: geniusrise/geniusrise\nkube_config_path: ~/.kube/config\nbolts:\nsentiment_analyzer:\nname: SentimentAnalyzer\nmethod: analyze\nargs:\nmodel_name: \"sentiment-model\"\nstate:\ntype: dynamodb\nargs:\ndynamodb_table_name: \"SentimentAnalysis\"\ndynamodb_region_name: \"us-east-1\"\ninput:\ntype: streaming\nargs:\ninput_topic: twitter_topic\nkafka_servers: \"localhost:9092\"\ngroup_id: \"sentiment-group\"\noutput:\ntype: batch\nargs:\nbucket: geniusrise-test\nfolder: sentiment_results\ndeploy:\ntype: k8s\nargs:\nkind: deployment\nname: sentiment-analyzer\ncontext_name: arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\nnamespace: geniusrise\nimage: geniusrise/geniusrise\nkube_config_path: ~/.kube/config\n

    Attributes:

    Name Type Description geniusfile Geniusfile

    Parsed YAML configuration.

    spout_ctls Dict[str, SpoutCtl]

    Dictionary of SpoutCtl instances.

    bolt_ctls Dict[str, BoltCtl]

    Dictionary of BoltCtl instances.

    "},{"location":"core/cli_yamlctl/#cli.yamlctl.YamlCtl.__init__","title":"__init__(spout_ctls, bolt_ctls)","text":"

    Initialize YamlCtl with the path to the YAML file and control instances for spouts and bolts.

    Parameters:

    Name Type Description Default spout_ctls Dict[str, SpoutCtl]

    Dictionary of SpoutCtl instances.

    required bolt_ctls Dict[str, BoltCtl]

    Dictionary of BoltCtl instances.

    required"},{"location":"core/cli_yamlctl/#cli.yamlctl.YamlCtl.create_parser","title":"create_parser(parser)","text":"

    Create and return the command-line parser for managing spouts and bolts.

    "},{"location":"core/cli_yamlctl/#cli.yamlctl.YamlCtl.deploy_bolt","title":"deploy_bolt(bolt_name)","text":"

    Deploy a specific bolt based on its name.

    Parameters:

    Name Type Description Default bolt_name str

    Name of the bolt to run.

    required"},{"location":"core/cli_yamlctl/#cli.yamlctl.YamlCtl.deploy_bolts","title":"deploy_bolts()","text":"

    Deploy all bolts defined in the YAML configuration.

    "},{"location":"core/cli_yamlctl/#cli.yamlctl.YamlCtl.deploy_spout","title":"deploy_spout(spout_name)","text":"

    Deploy a specific spout based on its name.

    Parameters:

    Name Type Description Default spout_name str

    Name of the spout to deploy.

    required"},{"location":"core/cli_yamlctl/#cli.yamlctl.YamlCtl.deploy_spouts","title":"deploy_spouts()","text":"

    Deploy all spouts defined in the YAML configuration.

    "},{"location":"core/cli_yamlctl/#cli.yamlctl.YamlCtl.resolve_reference","title":"resolve_reference(input_type, ref_name)","text":"

    Resolve the reference of a bolt's input based on the input type (spout or bolt).

    Parameters:

    Name Type Description Default input_type str

    Type of the input (\"spout\" or \"bolt\").

    required ref_name str

    Name of the spout or bolt to refer to.

    required

    Returns:

    Name Type Description Output

    The output data of the referred spout or bolt.

    "},{"location":"core/cli_yamlctl/#cli.yamlctl.YamlCtl.run","title":"run(args)","text":"

    Run the command-line interface for managing spouts and bolts based on provided arguments. Please note that there is no ordering of the spouts and bolts in the YAML configuration. Each spout and bolt is an independent entity even when connected together.

    Parameters:

    Name Type Description Default args argparse.Namespace

    Parsed command-line arguments.

    required"},{"location":"core/cli_yamlctl/#cli.yamlctl.YamlCtl.run_bolt","title":"run_bolt(bolt_name)","text":"

    Run a specific bolt based on its name.

    Parameters:

    Name Type Description Default bolt_name str

    Name of the bolt to run.

    required"},{"location":"core/cli_yamlctl/#cli.yamlctl.YamlCtl.run_bolts","title":"run_bolts(executor)","text":"

    Run all bolts defined in the YAML configuration.

    "},{"location":"core/cli_yamlctl/#cli.yamlctl.YamlCtl.run_spout","title":"run_spout(spout_name)","text":"

    Run a specific spout based on its name.

    Parameters:

    Name Type Description Default spout_name str

    Name of the spout to run.

    required"},{"location":"core/cli_yamlctl/#cli.yamlctl.YamlCtl.run_spouts","title":"run_spouts(executor)","text":"

    Run all spouts defined in the YAML configuration.

    "},{"location":"core/core_bolt/","title":"Bolt","text":"

    Core Bolt class

    "},{"location":"core/core_bolt/#core.bolt.Bolt","title":"Bolt","text":"

    Bases: Task

    Base class for all bolts.

    A bolt is a component that consumes streams of data, processes them, and possibly emits new data streams.

    "},{"location":"core/core_bolt/#core.bolt.Bolt.__call__","title":"__call__(method_name, *args, **kwargs)","text":"

    Execute a method locally and manage the state.

    Parameters:

    Name Type Description Default method_name str

    The name of the method to execute.

    required *args

    Positional arguments to pass to the method.

    () **kwargs

    Keyword arguments to pass to the method. Keyword Arguments: - Additional keyword arguments specific to the method.

    {}

    Returns:

    Name Type Description Any Any

    The result of the method.

    "},{"location":"core/core_bolt/#core.bolt.Bolt.__init__","title":"__init__(input, output, state, id=None, **kwargs)","text":"

    The Bolt class is a base class for all bolts in the given context. It inherits from the Task class and provides methods for executing tasks both locally and remotely, as well as managing their state, with state management options including in-memory, Redis, PostgreSQL, and DynamoDB, and input and output data for batch, streaming, stream-to-batch, and batch-to-streaming.

    The Bolt class uses the Input, Output and State classes, which are abstract base classes for managing input data, output data and states, respectively. The Input and Output classes each have two subclasses: StreamingInput, BatchInput, StreamingOutput and BatchOutput, which manage streaming and batch input and output data, respectively. The State class is used to get and set state, and it has several subclasses for different types of state managers.

    The Bolt class also uses the ECSManager and K8sManager classes in the execute_remote method, which are used to manage tasks on Amazon ECS and Kubernetes, respectively.

    Usage
    • Create an instance of the Bolt class by providing an Input object, an Output object and a State object.
    • The Input object specifies the input data for the bolt.
    • The Output object specifies the output data for the bolt.
    • The State object handles the management of the bolt's state.
    Example

    input = Input(...) output = Output(...) state = State(...) bolt = Bolt(input, output, state)

    Parameters:

    Name Type Description Default input Input

    The input data.

    required output Output

    The output data.

    required state State

    The state manager.

    required"},{"location":"core/core_bolt/#core.bolt.Bolt.create","title":"create(klass, input_type, output_type, state_type, id=None, **kwargs) staticmethod","text":"

    Create a bolt of a specific type.

    This static method is used to create a bolt of a specific type. It takes in an input type, an output type, a state type, and additional keyword arguments for initializing the bolt.

    The method creates the input, output, and state manager based on the provided types, and then creates and returns a bolt using these configurations.

    Parameters:

    Name Type Description Default klass type

    The Bolt class to create.

    required input_type str

    The type of input (\"batch\" or \"streaming\").

    required output_type str

    The type of output (\"batch\" or \"streaming\").

    required state_type str

    The type of state manager (\"none\", \"redis\", \"postgres\", or \"dynamodb\").

    required **kwargs

    Additional keyword arguments for initializing the bolt.

    Keyword Arguments:\n    Batch input:\n    - input_folder (str): The input folder argument.\n    - input_s3_bucket (str): The input bucket argument.\n    - input_s3_folder (str): The input S3 folder argument.\n    Batch output config:\n    - output_folder (str): The output folder argument.\n    - output_s3_bucket (str): The output bucket argument.\n    - output_s3_folder (str): The output S3 folder argument.\n    Streaming input:\n    - input_kafka_cluster_connection_string (str): The input Kafka servers argument.\n    - input_kafka_topic (str): The input kafka topic argument.\n    - input_kafka_consumer_group_id (str): The Kafka consumer group id.\n    Streaming output:\n    - output_kafka_cluster_connection_string (str): The output Kafka servers argument.\n    - output_kafka_topic (str): The output kafka topic argument.\n    Stream-to-Batch input:\n    - buffer_size (int): Number of messages to buffer.\n    - input_kafka_cluster_connection_string (str): The input Kafka servers argument.\n    - input_kafka_topic (str): The input kafka topic argument.\n    - input_kafka_consumer_group_id (str): The Kafka consumer group id.\n    Batch-to-Streaming input:\n    - buffer_size (int): Number of messages to buffer.\n    - input_folder (str): The input folder argument.\n    - input_s3_bucket (str): The input bucket argument.\n    - input_s3_folder (str): The input S3 folder argument.\n    Stream-to-Batch output:\n    - buffer_size (int): Number of messages to buffer.\n    - output_folder (str): The output folder argument.\n    - output_s3_bucket (str): The output bucket argument.\n    - output_s3_folder (str): The output S3 folder argument.\n    Redis state manager config:\n    - redis_host (str): The Redis host argument.\n    - redis_port (str): The Redis port argument.\n    - redis_db (str): The Redis database argument.\n    Postgres state manager config:\n    - postgres_host (str): The PostgreSQL host argument.\n    - postgres_port (str): The PostgreSQL port argument.\n    - postgres_user (str): The PostgreSQL user argument.\n    - postgres_password (str): The PostgreSQL password argument.\n    - postgres_database (str): The PostgreSQL database argument.\n    - postgres_table (str): The PostgreSQL table argument.\n    DynamoDB state manager config:\n    - dynamodb_table_name (str): The DynamoDB table name argument.\n    - dynamodb_region_name (str): The DynamoDB region name argument.\n

    {}

    Returns:

    Name Type Description Bolt Bolt

    The created bolt.

    Raises:

    Type Description ValueError

    If an invalid input type, output type, or state type is provided.

    "},{"location":"core/core_data_batch_input/","title":"Batch data input","text":"

    Batch input manager

    "},{"location":"core/core_data_batch_input/#core.data.batch_input.BatchInput","title":"BatchInput","text":"

    Bases: Input

    \ud83d\udcc1 BatchInput: Manages batch input data.

    Attributes:

    Name Type Description input_folder str

    Folder to read input files.

    bucket str

    S3 bucket name.

    s3_folder str

    Folder within the S3 bucket.

    partition_scheme Optional[str]

    Partitioning scheme for S3, e.g., \"year/month/day\".

    Raises:

    Type Description FileNotExistError

    If the file does not exist.

    Parameters:

    Name Type Description Default input_folder str

    Folder to read input files from.

    required bucket str

    S3 bucket name.

    required s3_folder str

    Folder within the S3 bucket.

    required partition_scheme Optional[str]

    Partitioning scheme for S3, e.g., \"year/month/day\".

    None Usage"},{"location":"core/core_data_batch_input/#core.data.batch_input.BatchInput--initialize-batchinput","title":"Initialize BatchInput","text":"
    input = BatchInput(\"/path/to/input\", \"my_bucket\", \"s3/folder\")\n
    "},{"location":"core/core_data_batch_input/#core.data.batch_input.BatchInput--get-the-input-folder","title":"Get the input folder","text":"
    folder = input.get()\n
    "},{"location":"core/core_data_batch_input/#core.data.batch_input.BatchInput--save-a-spark-dataframe-to-the-input-folder","title":"Save a Spark DataFrame to the input folder","text":"
    input.from_spark(my_dataframe)\n
    "},{"location":"core/core_data_batch_input/#core.data.batch_input.BatchInput--compose-multiple-batchinput-instances","title":"Compose multiple BatchInput instances","text":"
    composed = input.compose(input1, input2)\n
    "},{"location":"core/core_data_batch_input/#core.data.batch_input.BatchInput--copy-files-from-s3-to-the-input-folder","title":"Copy files from S3 to the input folder","text":"
    input.from_s3()\n
    "},{"location":"core/core_data_batch_input/#core.data.batch_input.BatchInput--collect-metrics","title":"Collect metrics","text":"
    metrics = input.collect_metrics()\n
    "},{"location":"core/core_data_batch_input/#core.data.batch_input.BatchInput.__init__","title":"__init__(input_folder, bucket, s3_folder, partition_scheme=None)","text":"

    Initialize a new BatchInput instance.

    "},{"location":"core/core_data_batch_input/#core.data.batch_input.BatchInput.collect_metrics","title":"collect_metrics()","text":"

    Collect and return metrics, then clear them for future collection.

    Returns:

    Type Description Dict[str, float]

    Dict[str, float]: Dictionary containing metrics.

    "},{"location":"core/core_data_batch_input/#core.data.batch_input.BatchInput.compose","title":"compose(*inputs)","text":"

    Compose multiple BatchInput instances by merging their input folders.

    Parameters:

    Name Type Description Default inputs Input

    Variable number of BatchInput instances.

    ()

    Returns:

    Type Description Union[bool, str]

    Union[bool, str]: True if successful, error message otherwise.

    "},{"location":"core/core_data_batch_input/#core.data.batch_input.BatchInput.from_kafka","title":"from_kafka(input_topic, kafka_cluster_connection_string, nr_messages=1000, group_id='geniusrise', partition_scheme=None)","text":"

    Consume messages from a Kafka topic and save them as JSON files in the input folder. Stops consuming after reaching the latest message or the specified number of messages.

    Parameters:

    Name Type Description Default input_topic str

    Kafka topic to consume data from.

    required kafka_cluster_connection_string str

    Connection string for the Kafka cluster.

    required nr_messages int

    Number of messages to consume. Defaults to 1000.

    1000 group_id str

    Kafka consumer group ID. Defaults to \"geniusrise\".

    'geniusrise' partition_scheme Optional[str]

    Optional partitioning scheme for Kafka, e.g., \"year/month/day\".

    None

    Returns:

    Name Type Description str str

    The path to the folder where the consumed messages are saved as JSON files.

    Raises:

    Type Description KafkaConnectionError

    If unable to connect to Kafka.

    Exception

    If any other error occurs during processing.

    "},{"location":"core/core_data_batch_input/#core.data.batch_input.BatchInput.from_s3","title":"from_s3(bucket=None, s3_folder=None)","text":"

    Copy contents from a given S3 bucket and location to the input folder.

    Raises:

    Type Description Exception

    If the input folder is not specified.

    "},{"location":"core/core_data_batch_input/#core.data.batch_input.BatchInput.from_spark","title":"from_spark(df)","text":"

    Save the contents of a Spark DataFrame to the input folder with optional partitioning.

    Parameters:

    Name Type Description Default df DataFrame

    The Spark DataFrame to save.

    required

    Raises:

    Type Description FileNotExistError

    If the input folder does not exist.

    "},{"location":"core/core_data_batch_input/#core.data.batch_input.BatchInput.get","title":"get()","text":"

    Get the input folder path.

    Returns:

    Name Type Description str str

    The path to the input folder.

    "},{"location":"core/core_data_batch_input/#core.data.batch_input.FileNotExistError","title":"FileNotExistError","text":"

    Bases: Exception

    \u274c Custom exception for file not existing.

    "},{"location":"core/core_data_batch_input/#core.data.batch_input.KafkaConnectionError","title":"KafkaConnectionError","text":"

    Bases: Exception

    \u274c Custom exception for kafka connection problems.

    "},{"location":"core/core_data_batch_output/","title":"Batch data output","text":"

    Batch output manager

    "},{"location":"core/core_data_batch_output/#core.data.batch_output.BatchOutput","title":"BatchOutput","text":"

    Bases: Output

    \ud83d\udcc1 BatchOutput: Manages batch output data.

    Attributes:

    Name Type Description output_folder str

    Folder to save output files.

    bucket str

    S3 bucket name.

    s3_folder str

    Folder within the S3 bucket.

    partition_scheme Optional[str]

    Partitioning scheme for S3, e.g., \"year/month/day\".

    Raises:

    Type Description FileNotExistError

    If the output folder does not exist.

    Parameters:

    Name Type Description Default output_folder str

    Folder to save output files.

    required bucket str

    S3 bucket name.

    required s3_folder str

    Folder within the S3 bucket.

    required partition_scheme Optional[str]

    Partitioning scheme for S3, e.g., \"year/month/day\".

    None Usage
    # Initialize the BatchOutput instance\nconfig = BatchOutput(\"/path/to/output\", \"my_bucket\", \"s3/folder\", partition_scheme=\"%Y/%m/%d\")\n# Save data to a file\nconfig.save({\"key\": \"value\"}, \"example.json\")\n# Compose multiple BatchOutput instances\nresult = config1.compose(config2, config3)\n# Convert output to a Spark DataFrame\nspark_df = config.to_spark(spark_session)\n# Copy files to a remote S3 bucket\nconfig.to_s3()\n# Flush the output to S3\nconfig.flush()\n# Collect metrics\nmetrics = config.collect_metrics()\n
    "},{"location":"core/core_data_batch_output/#core.data.batch_output.BatchOutput.__init__","title":"__init__(output_folder, bucket, s3_folder, partition_scheme=None)","text":"

    Initialize a new batch output data.

    Parameters:

    Name Type Description Default output_folder str

    Folder to save output files.

    required bucket str

    S3 bucket name.

    required s3_folder str

    Folder within the S3 bucket.

    required"},{"location":"core/core_data_batch_output/#core.data.batch_output.BatchOutput.collect_metrics","title":"collect_metrics()","text":"

    Collect and return metrics, then clear them for future collection.

    Returns:

    Type Description Dict[str, float]

    Dict[str, float]: Dictionary containing metrics.

    "},{"location":"core/core_data_batch_output/#core.data.batch_output.BatchOutput.compose","title":"compose(*outputs)","text":"

    Compose multiple BatchOutput instances by merging their output folders.

    Parameters:

    Name Type Description Default outputs Output

    Variable number of BatchOutput instances.

    ()

    Returns:

    Type Description Union[bool, str]

    Union[bool, str]: True if successful, error message otherwise.

    "},{"location":"core/core_data_batch_output/#core.data.batch_output.BatchOutput.flush","title":"flush()","text":"

    \ud83d\udd04 Flush the output by copying all files and directories from the output folder to a given S3 bucket and folder.

    "},{"location":"core/core_data_batch_output/#core.data.batch_output.BatchOutput.save","title":"save(data, filename=None, **kwargs)","text":"

    \ud83d\udcbe Save data to a file in the output folder.

    Parameters:

    Name Type Description Default data Any

    The data to save.

    required filename Optional[str]

    The filename to use when saving the data to a file.

    None"},{"location":"core/core_data_batch_output/#core.data.batch_output.BatchOutput.to_kafka","title":"to_kafka(output_topic, kafka_cluster_connection_string)","text":"

    Produce messages to a Kafka topic from the files in the output folder.

    Parameters:

    Name Type Description Default output_topic str

    Kafka topic to produce data to.

    required kafka_cluster_connection_string str

    Connection string for the Kafka cluster.

    required key_serializer Optional[str]

    Serializer for message keys. Defaults to None.

    required

    Raises:

    Type Description KafkaConnectionError

    If unable to connect to Kafka.

    Exception

    If any other error occurs during processing.

    "},{"location":"core/core_data_batch_output/#core.data.batch_output.BatchOutput.to_s3","title":"to_s3()","text":"

    \u2601\ufe0f Recursively copy all files and directories from the output folder to a given S3 bucket and folder.

    "},{"location":"core/core_data_batch_output/#core.data.batch_output.BatchOutput.to_spark","title":"to_spark(spark)","text":"

    Get a Spark DataFrame from the output folder.

    Returns:

    Type Description pyspark.sql.DataFrame

    pyspark.sql.DataFrame: A Spark DataFrame where each row corresponds to a file in the output folder.

    Raises:

    Type Description FileNotExistError

    If the output folder does not exist.

    "},{"location":"core/core_data_batch_output/#core.data.batch_output.FileNotExistError","title":"FileNotExistError","text":"

    Bases: Exception

    \u274c Custom exception for file not existing.

    "},{"location":"core/core_data_batch_output/#core.data.batch_output.KafkaConnectionError","title":"KafkaConnectionError","text":"

    Bases: Exception

    \u274c Custom exception for Kafka connection problems.

    "},{"location":"core/core_data_input/","title":"Data input","text":"

    Input manager base class

    "},{"location":"core/core_data_input/#core.data.input.Input","title":"Input","text":"

    Bases: ABC

    Abstract class for managing input data.

    Attributes:

    Name Type Description log logging.Logger

    Logger instance.

    "},{"location":"core/core_data_input/#core.data.input.Input.__add__","title":"__add__(*inputs)","text":"

    Compose multiple inputs.

    Parameters:

    Name Type Description Default inputs Input

    Variable number of Input instances.

    ()

    Returns:

    Type Description Union[bool, str]

    Union[bool, str]: True if successful, error message otherwise.

    "},{"location":"core/core_data_input/#core.data.input.Input.collect_metrics","title":"collect_metrics() abstractmethod","text":"

    Collect metrics like latency.

    Returns:

    Type Description Dict[str, float]

    Dict[str, float]: A dictionary containing metrics.

    "},{"location":"core/core_data_input/#core.data.input.Input.compose","title":"compose(*inputs) abstractmethod","text":"

    Compose multiple inputs.

    Parameters:

    Name Type Description Default inputs Input

    Variable number of Input instances.

    ()

    Returns:

    Type Description Union[bool, str]

    Union[bool, str]: True if successful, error message otherwise.

    "},{"location":"core/core_data_input/#core.data.input.Input.get","title":"get() abstractmethod","text":"

    Abstract method to get data from the input source.

    Returns:

    Name Type Description Any Any

    The data from the input source.

    "},{"location":"core/core_data_input/#core.data.input.Input.retryable_get","title":"retryable_get()","text":"

    Retryable get method.

    Returns:

    Name Type Description Any Any

    The data from the input source.

    "},{"location":"core/core_data_output/","title":"Data output","text":"

    Output manager base class

    "},{"location":"core/core_data_output/#core.data.output.Output","title":"Output","text":"

    Bases: ABC

    Abstract base class for managing output data.

    "},{"location":"core/core_data_output/#core.data.output.Output.flush","title":"flush() abstractmethod","text":"

    Flush the output. This method should be implemented by subclasses.

    "},{"location":"core/core_data_output/#core.data.output.Output.save","title":"save(data, **kwargs) abstractmethod","text":"

    Save data to a file or ingest it into a Kafka topic.

    Parameters:

    Name Type Description Default data Any

    The data to save or ingest.

    required filename str

    The filename to use when saving the data to a file.

    required"},{"location":"core/core_data_streaming_input/","title":"Streaming data input","text":"

    Streaming input manager

    "},{"location":"core/core_data_streaming_input/#core.data.streaming_input.KafkaConnectionError","title":"KafkaConnectionError","text":"

    Bases: Exception

    \u274c Custom exception for kafka connection problems.

    "},{"location":"core/core_data_streaming_input/#core.data.streaming_input.StreamingInput","title":"StreamingInput","text":"

    Bases: Input

    \ud83d\udce1 StreamingInput: Manages streaming input data from Kafka and other streaming sources.

    Attributes:

    Name Type Description input_topic str

    Kafka topic to consume data from.

    kafka_cluster_connection_string str

    Connection string for the Kafka cluster.

    group_id str

    Kafka consumer group ID.

    consumer KafkaConsumer

    Kafka consumer instance.

    Usage

    input = StreamingInput(\"my_topic\", \"localhost:9094\") for message in input.get(): print(message.value)

    Parameters:

    Name Type Description Default input_topic str

    Kafka topic to consume data from.

    required kafka_cluster_connection_string str

    Connection string for the Kafka cluster.

    required group_id str

    Kafka consumer group ID. Defaults to \"geniusrise\".

    'geniusrise' **kwargs

    Additional keyword arguments for KafkaConsumer.

    {}

    Raises:

    Type Description KafkaConnectionError

    If unable to connect to Kafka.

    Usage"},{"location":"core/core_data_streaming_input/#core.data.streaming_input.StreamingInput--using-get-method-to-consume-from-kafka","title":"Using get method to consume from Kafka","text":"
    input = StreamingInput(\"my_topic\", \"localhost:9094\")\nconsumer = input.get()\nfor message in consumer:\nprint(message.value)\n
    "},{"location":"core/core_data_streaming_input/#core.data.streaming_input.StreamingInput--using-from_streamz-method-to-process-streamz-dataframe","title":"Using from_streamz method to process streamz DataFrame","text":"
    input = StreamingInput(\"my_topic\", \"localhost:9094\")\nstreamz_df = ...  # Assume this is a streamz DataFrame\nfor row in input.from_streamz(streamz_df):\nprint(row)\n
    "},{"location":"core/core_data_streaming_input/#core.data.streaming_input.StreamingInput--using-from_spark-method-to-process-spark-dataframe","title":"Using from_spark method to process Spark DataFrame","text":"
    input = StreamingInput(\"my_topic\", \"localhost:9094\")\nspark_df = ...  # Assume this is a Spark DataFrame\nmap_func = lambda row: {\"key\": row.key, \"value\": row.value}\nquery_or_rdd = input.from_spark(spark_df, map_func)\n
    "},{"location":"core/core_data_streaming_input/#core.data.streaming_input.StreamingInput--using-compose-method-to-merge-multiple-streaminginput-instances","title":"Using compose method to merge multiple StreamingInput instances","text":"
    input1 = StreamingInput(\"topic1\", \"localhost:9094\")\ninput2 = StreamingInput(\"topic2\", \"localhost:9094\")\nresult = input1.compose(input2)\n
    "},{"location":"core/core_data_streaming_input/#core.data.streaming_input.StreamingInput--using-close-method-to-close-the-kafka-consumer","title":"Using close method to close the Kafka consumer","text":"
    input = StreamingInput(\"my_topic\", \"localhost:9094\")\ninput.close()\n
    "},{"location":"core/core_data_streaming_input/#core.data.streaming_input.StreamingInput--using-seek-method-to-seek-to-a-specific-offset","title":"Using seek method to seek to a specific offset","text":"
    input = StreamingInput(\"my_topic\", \"localhost:9094\")\ninput.seek(42)\n
    "},{"location":"core/core_data_streaming_input/#core.data.streaming_input.StreamingInput--using-commit-method-to-manually-commit-offsets","title":"Using commit method to manually commit offsets","text":"
    input = StreamingInput(\"my_topic\", \"localhost:9094\")\ninput.commit()\n
    "},{"location":"core/core_data_streaming_input/#core.data.streaming_input.StreamingInput--using-collect_metrics-method-to-collect-kafka-metrics","title":"Using collect_metrics method to collect Kafka metrics","text":"
    input = StreamingInput(\"my_topic\", \"localhost:9094\")\nmetrics = input.collect_metrics()\nprint(metrics)\n
    "},{"location":"core/core_data_streaming_input/#core.data.streaming_input.StreamingInput.__init__","title":"__init__(input_topic, kafka_cluster_connection_string, group_id='geniusrise', **kwargs)","text":"

    \ud83d\udca5 Initialize a new streaming input data.

    Parameters:

    Name Type Description Default input_topic str

    Kafka topic to consume data.

    required kafka_cluster_connection_string str

    Kafka cluster connection string.

    required group_id str

    Kafka consumer group id. Defaults to \"geniusrise\".

    'geniusrise'"},{"location":"core/core_data_streaming_input/#core.data.streaming_input.StreamingInput.close","title":"close()","text":"

    \ud83d\udeaa Close the Kafka consumer.

    Raises:

    Type Description Exception

    If an error occurs while closing the consumer.

    "},{"location":"core/core_data_streaming_input/#core.data.streaming_input.StreamingInput.collect_metrics","title":"collect_metrics()","text":"

    \ud83d\udcca Collect metrics related to the Kafka consumer.

    Returns:

    Type Description Dict[str, Union[int, float]]

    Dict[str, Union[int, float]]: A dictionary containing metrics like latency.

    "},{"location":"core/core_data_streaming_input/#core.data.streaming_input.StreamingInput.commit","title":"commit()","text":"

    \u2705 Manually commit offsets.

    Raises:

    Type Description Exception

    If an error occurs while committing offsets.

    "},{"location":"core/core_data_streaming_input/#core.data.streaming_input.StreamingInput.compose","title":"compose(*inputs)","text":"

    Compose multiple StreamingInput instances by merging their iterators.

    Parameters:

    Name Type Description Default inputs StreamingInput

    Variable number of StreamingInput instances.

    ()

    Returns:

    Type Description Union[bool, str]

    Union[bool, str]: True if successful, error message otherwise.

    Caveat

    On merging different topics, other operations such as

    "},{"location":"core/core_data_streaming_input/#core.data.streaming_input.StreamingInput.from_spark","title":"from_spark(spark_df, map_func)","text":"

    Process a Spark DataFrame as a stream, similar to Kafka processing.

    Parameters:

    Name Type Description Default spark_df DataFrame

    The Spark DataFrame to process.

    required map_func Callable[[Row], Any]

    Function to map each row of the DataFrame.

    required

    Returns:

    Type Description Union[StreamingQuery, RDD[Any]]

    Union[StreamingQuery, RDD[Any]]: Returns a StreamingQuery for streaming DataFrames, and an RDD for batch DataFrames.

    Raises:

    Type Description Exception

    If an error occurs during processing.

    "},{"location":"core/core_data_streaming_input/#core.data.streaming_input.StreamingInput.from_streamz","title":"from_streamz(streamz_df, sentinel=None, timeout=5)","text":"

    Process a streamz DataFrame as a stream, similar to Kafka processing.

    Parameters:

    Name Type Description Default streamz_df ZDataFrame

    The streamz DataFrame to process.

    required sentinel Any

    The value that, when received, will stop the generator.

    None timeout int

    The time to wait for an item from the queue before raising an exception.

    5

    Yields:

    Name Type Description Any Any

    Yields each row as a dictionary.

    "},{"location":"core/core_data_streaming_input/#core.data.streaming_input.StreamingInput.get","title":"get()","text":"

    \ud83d\udce5 Get data from the input topic.

    Returns:

    Name Type Description KafkaConsumer KafkaConsumer

    The Kafka consumer.

    Raises:

    Type Description Exception

    If no input source or consumer is specified.

    "},{"location":"core/core_data_streaming_output/","title":"Streaming data output","text":"

    Streaming output manager

    "},{"location":"core/core_data_streaming_output/#core.data.streaming_output.StreamingOutput","title":"StreamingOutput","text":"

    Bases: Output

    \ud83d\udce1 StreamingOutput: Manages streaming output data.

    Attributes:

    Name Type Description output_topic str

    Kafka topic to ingest data.

    producer KafkaProducer

    Kafka producer for ingesting data.

    Usage:

    config = StreamingOutput(\"my_topic\", \"localhost:9094\")\nconfig.save({\"key\": \"value\"}, \"ignored_filename\")\nconfig.flush()\n

    Note: - Ensure the Kafka cluster is running and accessible.

    "},{"location":"core/core_data_streaming_output/#core.data.streaming_output.StreamingOutput.__init__","title":"__init__(output_topic, kafka_servers)","text":"

    Initialize a new streaming output data.

    Parameters:

    Name Type Description Default output_topic str

    Kafka topic to ingest data.

    required kafka_servers str

    Kafka bootstrap servers.

    required"},{"location":"core/core_data_streaming_output/#core.data.streaming_output.StreamingOutput.close","title":"close()","text":"

    \ud83d\udeaa Close the Kafka producer.

    Raises:

    Type Description Exception

    If no Kafka producer is available.

    "},{"location":"core/core_data_streaming_output/#core.data.streaming_output.StreamingOutput.flush","title":"flush()","text":"

    \ud83d\udd04 Flush the output by flushing the Kafka producer.

    Raises:

    Type Description Exception

    If no Kafka producer is available.

    "},{"location":"core/core_data_streaming_output/#core.data.streaming_output.StreamingOutput.partition_available","title":"partition_available(partition)","text":"

    \ud83e\uddd0 Check if a partition is available in the Kafka topic.

    Parameters:

    Name Type Description Default partition int

    The partition to check.

    required

    Returns:

    Name Type Description bool bool

    True if the partition is available, False otherwise.

    Raises:

    Type Description Exception

    If no Kafka producer is available.

    "},{"location":"core/core_data_streaming_output/#core.data.streaming_output.StreamingOutput.save","title":"save(data, **kwargs)","text":"

    \ud83d\udce4 Ingest data into the Kafka topic.

    Parameters:

    Name Type Description Default data Any

    The data to ingest.

    required filename str

    This argument is ignored for streaming outputs.

    required

    Raises:

    Type Description Exception

    If no Kafka producer is available or an error occurs.

    "},{"location":"core/core_data_streaming_output/#core.data.streaming_output.StreamingOutput.save_to_partition","title":"save_to_partition(value, partition)","text":"

    \ud83c\udfaf Send a message to a specific partition in the Kafka topic.

    Parameters:

    Name Type Description Default value Any

    The value of the message.

    required partition int

    The partition to send the message to.

    required

    Raises:

    Type Description Exception

    If no Kafka producer is available or an error occurs.

    "},{"location":"core/core_spout/","title":"Spout","text":"

    Core Spout class

    "},{"location":"core/core_spout/#core.spout.Spout","title":"Spout","text":"

    Bases: Task

    Base class for all spouts.

    "},{"location":"core/core_spout/#core.spout.Spout.__call__","title":"__call__(method_name, *args, **kwargs)","text":"

    Execute a method locally and manage the state.

    Parameters:

    Name Type Description Default method_name str

    The name of the method to execute.

    required *args

    Positional arguments to pass to the method.

    () **kwargs

    Keyword arguments to pass to the method. Keyword Arguments: - Additional keyword arguments specific to the method.

    {}

    Returns:

    Name Type Description Any Any

    The result of the method.

    "},{"location":"core/core_spout/#core.spout.Spout.__init__","title":"__init__(output, state, id=None, **kwargs)","text":"

    The Spout class is a base class for all spouts in the given context. It inherits from the Task class and provides methods for executing tasks both locally and remotely, as well as managing their state, with state management options including in-memory, Redis, PostgreSQL, and DynamoDB, and output data for batch or streaming data.

    The Spout class uses the Output and State classes, which are abstract base classes for managing output data and states, respectively. The Output class has two subclasses: StreamingOutput and BatchOutput, which manage streaming and batch output data, respectively. The State class is used to get and set state, and it has several subclasses for different types of state managers.

    The Spout class also uses the ECSManager and K8sManager classes in the execute_remote method, which are used to manage tasks on Amazon ECS and Kubernetes, respectively.

    Usage
    • Create an instance of the Spout class by providing an Output object and a State object.
    • The Output object specifies the output data for the spout.
    • The State object handles the management of the spout's state.
    Example

    output = Output(...) state = State(...) spout = Spout(output, state)

    Parameters:

    Name Type Description Default output Output

    The output data.

    required state State

    The state manager.

    required"},{"location":"core/core_spout/#core.spout.Spout.create","title":"create(klass, output_type, state_type, id=None, **kwargs) staticmethod","text":"

    Create a spout of a specific type.

    Parameters:

    Name Type Description Default klass type

    The Spout class to create.

    required output_type str

    The type of output (\"batch\" or \"streaming\").

    required state_type str

    The type of state manager (\"none\", \"redis\", \"postgres\", or \"dynamodb\").

    required **kwargs

    Additional keyword arguments for initializing the spout.

    Keyword Arguments:\n    Batch output:\n    - output_folder (str): The directory where output files should be stored temporarily.\n    - output_s3_bucket (str): The name of the S3 bucket for output storage.\n    - output_s3_folder (str): The S3 folder for output storage.\n    Streaming output:\n    - output_kafka_topic (str): Kafka output topic for streaming spouts.\n    - output_kafka_cluster_connection_string (str): Kafka connection string for streaming spouts.\n    Stream to Batch output:\n    - output_folder (str): The directory where output files should be stored temporarily.\n    - output_s3_bucket (str): The name of the S3 bucket for output storage.\n    - output_s3_folder (str): The S3 folder for output storage.\n    - buffer_size (int): Number of messages to buffer.\n    Redis state manager config:\n    - redis_host (str): The host address for the Redis server.\n    - redis_port (int): The port number for the Redis server.\n    - redis_db (int): The Redis database to be used.\n    Postgres state manager config:\n    - postgres_host (str): The host address for the PostgreSQL server.\n    - postgres_port (int): The port number for the PostgreSQL server.\n    - postgres_user (str): The username for the PostgreSQL server.\n    - postgres_password (str): The password for the PostgreSQL server.\n    - postgres_database (str): The PostgreSQL database to be used.\n    - postgres_table (str): The PostgreSQL table to be used.\n    DynamoDB state manager config:\n    - dynamodb_table_name (str): The name of the DynamoDB table.\n    - dynamodb_region_name (str): The AWS region for DynamoDB\n

    {}

    Returns:

    Name Type Description Spout Spout

    The created spout.

    Raises:

    Type Description ValueError

    If an invalid output type or state type is provided.

    "},{"location":"core/core_state_base/","title":"State","text":"

    Base class for task state mnager

    "},{"location":"core/core_state_base/#core.state.base.State","title":"State","text":"

    Bases: ABC

    Abstract base class for a state manager.

    This class is responsible for managing task states. It provides an interface for state management and captures task-related metrics.

    Attributes:

    Name Type Description buffer Dict[str, Any]

    Buffer for state data.

    log logging.Logger

    Logger for capturing logs.

    task_id str

    Identifier for the task.

    "},{"location":"core/core_state_base/#core.state.base.State.__del__","title":"__del__()","text":"

    Destructor to flush the buffer before object deletion.

    This ensures that any buffered state data is not lost when the object is deleted.

    "},{"location":"core/core_state_base/#core.state.base.State.flush","title":"flush()","text":"

    Flush the buffer to the state storage.

    This method is responsible for writing the buffered state data to the underlying storage mechanism.

    "},{"location":"core/core_state_base/#core.state.base.State.get","title":"get(task_id, key) abstractmethod","text":"

    Abstract method to get the state associated with a task and key.

    Parameters:

    Name Type Description Default task_id str

    The task identifier.

    required key str

    The key to get the state for.

    required

    Returns:

    Type Description Optional[Dict[str, Any]]

    Optional[Dict[str, Any]]: The state associated with the task and key, if it exists.

    "},{"location":"core/core_state_base/#core.state.base.State.get_state","title":"get_state(key)","text":"

    Get the state associated with a key from the buffer or underlying storage.

    Parameters:

    Name Type Description Default key str

    The key to get the state for.

    required

    Returns:

    Type Description Optional[Dict[str, Any]]

    Optional[Dict[str, Any]]: The state associated with the key.

    "},{"location":"core/core_state_base/#core.state.base.State.set","title":"set(task_id, key, value) abstractmethod","text":"

    Abstract method to set the state associated with a task and key.

    Parameters:

    Name Type Description Default task_id str

    The task identifier.

    required key str

    The key to set the state for.

    required value Dict[str, Any]

    The state to set.

    required"},{"location":"core/core_state_base/#core.state.base.State.set_state","title":"set_state(key, value)","text":"

    Set the state associated with a key in the buffer.

    Parameters:

    Name Type Description Default key str

    The key to set the state for.

    required value Dict[str, Any]

    The state to set.

    required"},{"location":"core/core_state_dynamo/","title":"DynamoDB State","text":"

    State manager using dynamoDB

    "},{"location":"core/core_state_dynamo/#core.state.dynamo.DynamoDBState","title":"DynamoDBState","text":"

    Bases: State

    DynamoDBState: A state manager that stores state in DynamoDB.

    Attributes:

    Name Type Description dynamodb boto3.resources.factory.dynamodb.ServiceResource

    The DynamoDB service resource.

    table boto3.resources.factory.dynamodb.Table

    The DynamoDB table.

    "},{"location":"core/core_state_dynamo/#core.state.dynamo.DynamoDBState.__init__","title":"__init__(task_id, table_name, region_name)","text":"

    Initialize a new DynamoDB state manager.

    Parameters:

    Name Type Description Default task_id str

    The task identifier.

    required table_name str

    The name of the DynamoDB table.

    required region_name str

    The name of the AWS region.

    required"},{"location":"core/core_state_dynamo/#core.state.dynamo.DynamoDBState.get","title":"get(task_id, key)","text":"

    Get the state associated with a task and key.

    Parameters:

    Name Type Description Default task_id str

    The task identifier.

    required key str

    The key to get the state for.

    required

    Returns:

    Type Description Optional[Dict[str, Any]]

    Optional[Dict[str, Any]]: The state associated with the task and key, if it exists.

    "},{"location":"core/core_state_dynamo/#core.state.dynamo.DynamoDBState.set","title":"set(task_id, key, value)","text":"

    Set the state associated with a task and key.

    Parameters:

    Name Type Description Default task_id str

    The task identifier.

    required key str

    The key to set the state for.

    required value Dict[str, Any]

    The state to set.

    required"},{"location":"core/core_state_memory/","title":"In-memory State","text":"

    State manager using local memory

    "},{"location":"core/core_state_memory/#core.state.memory.InMemoryState","title":"InMemoryState","text":"

    Bases: State

    \ud83e\udde0 InMemoryState: A state manager that stores state in memory.

    This manager is useful for temporary storage or testing purposes. Since it's in-memory, the data will be lost once the application stops.

    "},{"location":"core/core_state_memory/#core.state.memory.InMemoryState--attributes","title":"Attributes:","text":"
    • store (Dict[str, Dict]): The in-memory store for states.
    "},{"location":"core/core_state_memory/#core.state.memory.InMemoryState--usage","title":"Usage:","text":"
    manager = InMemoryState()\nmanager.set_state(\"user123\", {\"status\": \"active\"})\nstate = manager.get_state(\"user123\")\nprint(state)  # Outputs: {\"status\": \"active\"}\n

    Remember, this is an in-memory store. Do not use it for persistent storage!

    "},{"location":"core/core_state_memory/#core.state.memory.InMemoryState.__init__","title":"__init__(task_id)","text":"

    \ud83d\udca5 Initialize a new in-memory state manager.

    "},{"location":"core/core_state_memory/#core.state.memory.InMemoryState.get","title":"get(task_id, key)","text":"

    \ud83d\udcd6 Get the state associated with a key.

    Parameters:

    Name Type Description Default key str

    The key to get the state for.

    required

    Returns:

    Name Type Description Dict Optional[Dict]

    The state associated with the key, or None if not found.

    "},{"location":"core/core_state_memory/#core.state.memory.InMemoryState.set","title":"set(task_id, key, value)","text":"

    \ud83d\udcdd Set the state associated with a key.

    Parameters:

    Name Type Description Default key str

    The key to set the state for.

    required value Dict

    The state to set.

    required

    Example:

    manager.set_state(\"user123\", {\"status\": \"active\"})\n

    "},{"location":"core/core_state_postgres/","title":"Postgres State","text":"

    State manager using postgres database

    "},{"location":"core/core_state_postgres/#core.state.postgres.PostgresState","title":"PostgresState","text":"

    Bases: State

    \ud83d\uddc4\ufe0f PostgresState: A state manager that stores state in a PostgreSQL database.

    This manager provides a persistent storage solution using a PostgreSQL database.

    Attributes:

    Name Type Description conn psycopg2.extensions.connection

    The PostgreSQL connection.

    table str

    The table to use for storing state data.

    "},{"location":"core/core_state_postgres/#core.state.postgres.PostgresState.__init__","title":"__init__(task_id, host, port, user, password, database, table='geniusrise_state')","text":"

    Initialize a new PostgreSQL state manager.

    Parameters:

    Name Type Description Default task_id str

    The identifier for the task.

    required host str

    The host of the PostgreSQL server.

    required port int

    The port of the PostgreSQL server.

    required user str

    The user to connect as.

    required password str

    The user's password.

    required database str

    The database to connect to.

    required table str

    The table to use. Defaults to \"geniusrise_state\".

    'geniusrise_state'"},{"location":"core/core_state_postgres/#core.state.postgres.PostgresState.get","title":"get(task_id, key)","text":"

    Get the state associated with a task and key.

    Parameters:

    Name Type Description Default task_id str

    The task identifier.

    required key str

    The key to get the state for.

    required

    Returns:

    Type Description Optional[Dict]

    Optional[Dict]: The state associated with the task and key, or None if not found.

    "},{"location":"core/core_state_postgres/#core.state.postgres.PostgresState.set","title":"set(task_id, key, value)","text":"

    Set the state associated with a task and key.

    Parameters:

    Name Type Description Default task_id str

    The task identifier.

    required key str

    The key to set the state for.

    required value Dict

    The state to set.

    required"},{"location":"core/core_state_redis/","title":"Redis State","text":"

    State manager using redis

    "},{"location":"core/core_state_redis/#core.state.redis.RedisState","title":"RedisState","text":"

    Bases: State

    RedisState: A state manager that stores state in Redis.

    This manager provides a fast, in-memory storage solution using Redis.

    Attributes:

    Name Type Description redis redis.Redis

    The Redis connection.

    "},{"location":"core/core_state_redis/#core.state.redis.RedisState.__init__","title":"__init__(task_id, host, port, db)","text":"

    Initialize a new Redis state manager.

    Parameters:

    Name Type Description Default task_id str

    The task identifier.

    required host str

    The host of the Redis server.

    required port int

    The port of the Redis server.

    required db int

    The database number to connect to.

    required"},{"location":"core/core_state_redis/#core.state.redis.RedisState.get","title":"get(task_id, key)","text":"

    Get the state associated with a task and key.

    Parameters:

    Name Type Description Default task_id str

    The task identifier.

    required key str

    The key to get the state for.

    required

    Returns:

    Type Description Optional[Dict[str, Any]]

    Optional[Dict[str, Any]]: The state associated with the task and key, if it exists.

    "},{"location":"core/core_state_redis/#core.state.redis.RedisState.set","title":"set(task_id, key, value)","text":"

    Set the state associated with a task and key.

    Parameters:

    Name Type Description Default task_id str

    The task identifier.

    required key str

    The key to set the state for.

    required value Dict[str, Any]

    The state to set.

    required"},{"location":"core/core_task_base/","title":"Task","text":"

    Base class for Task

    "},{"location":"core/core_task_base/#core.task.base.Task","title":"Task","text":"

    Bases: ABC

    \ud83d\udee0\ufe0f Task: Class for managing tasks.

    This class provides a foundation for creating and managing tasks. Each task has a unique identifier and can be associated with specific input and output data.

    "},{"location":"core/core_task_base/#core.task.base.Task--attributes","title":"Attributes:","text":"
    • id (uuid.UUID): Unique identifier for the task.
    • input (Input): Configuration for input data.
    • output (Output): Configuration for output data.
    "},{"location":"core/core_task_base/#core.task.base.Task--usage","title":"Usage:","text":"
    task = Task()\ntask.execute(\"fetch_data\")\n

    !!! note Extend this class to implement specific task functionalities.

    "},{"location":"core/core_task_base/#core.task.base.Task.__init__","title":"__init__(id=None)","text":"

    Initialize a new task.

    Parameters:

    Name Type Description Default input Input

    Configuration for input data.

    required output Output

    Configuration for output data.

    required"},{"location":"core/core_task_base/#core.task.base.Task.__repr__","title":"__repr__()","text":"

    Return a string representation of the task.

    Returns:

    Name Type Description str str

    A string representation of the task.

    "},{"location":"core/core_task_base/#core.task.base.Task.execute","title":"execute(method_name, *args, **kwargs)","text":"

    \ud83d\ude80 Execute a given fetch_* method if it exists.

    Parameters:

    Name Type Description Default method_name str

    The name of the fetch_* method to execute.

    required *args

    Positional arguments to pass to the method.

    () **kwargs

    Keyword arguments to pass to the method.

    {}

    Returns:

    Name Type Description Any Any

    The result of the fetch_* method, or None if the method does not exist.

    Raises:

    Type Description AttributeError

    If the specified method doesn't exist.

    "},{"location":"core/core_task_base/#core.task.base.Task.get_methods","title":"get_methods() staticmethod","text":"

    \ud83d\udcdc Get all the fetch_* methods and their parameters along with their default values and docstrings.

    Returns:

    Type Description List[Tuple[str, List[str], Optional[str]]]

    List[Tuple[str, List[str], str]]: A list of tuples, where each tuple contains the name of a fetch_* method,

    List[Tuple[str, List[str], Optional[str]]]

    a list of its parameters along with their default values, and its docstring.

    "},{"location":"core/core_task_base/#core.task.base.Task.print_help","title":"print_help() staticmethod","text":"

    \ud83d\udda8\ufe0f Pretty print the fetch_* methods and their parameters along with their default values and docstrings. Also prints the class's docstring and init parameters.

    "},{"location":"core/docker/","title":"Docker Deployment","text":"

    DockerResourceManager is a utility for managing Docker resources, including containers and images. It provides a command-line interface (CLI) for various Docker operations, such as listing, inspecting, creating, starting, and stopping containers, as well as managing images.

    This class uses the Docker SDK for Python to interact with the Docker daemon, offering a convenient way to manage Docker containers and images from the command line.

    CLI Usage

    genius docker sub-command

    Sub-commands
    • list_containers: List all containers, with an option to include stopped containers. genius docker list_containers [--all]
    • inspect_container: Inspect a specific container by its ID. genius docker inspect_container <container_id>
    • create_container: Create a new container with specified image, command, and other parameters. genius docker create_container <image> [options]
    • start_container: Start a container by its ID. genius docker start_container <container_id>
    • stop_container: Stop a container by its ID. genius docker stop_container <container_id>
    • list_images: List all Docker images available on the local system. genius docker list_images
    • inspect_image: Inspect a specific image by its ID. genius docker inspect_image <image_id>
    • pull_image: Pull an image from a Docker registry. genius docker pull_image <image>
    • push_image: Push an image to a Docker registry. genius docker push_image <image>

    Each sub-command supports various options to specify the details of the container or image operation, such as environment variables, port mappings, volume mappings, and more.

    Attributes:

    Name Type Description client

    The Docker client connection to interact with the Docker daemon.

    log

    Logger for the class to log information, warnings, and errors.

    console

    Rich console object to print formatted and styled outputs.

    Methods
    • connect: Method to establish a connection to the Docker daemon.
    • list_containers: Method to list all containers, with an option to include stopped ones.
    • inspect_container: Method to inspect details of a specific container.
    • create_container: Method to create a new container with given parameters.
    • start_container: Method to start a specific container.
    • stop_container: Method to stop a specific container.
    • list_images: Method to list all Docker images.
    • inspect_image: Method to inspect a specific image.
    • pull_image: Method to pull an image from a Docker registry.
    • push_image: Method to push an image to a Docker registry.
    Note
    • Ensure that the Docker daemon is running and accessible at the specified URL.
    • Make sure to have the necessary permissions to interact with the Docker daemon and manage containers and images.
    "},{"location":"core/docker/#runners.docker.base.DockerResourceManager.__init__","title":"__init__()","text":"

    Initialize the Docker Resource Manager.

    "},{"location":"core/docker/#runners.docker.base.DockerResourceManager.connect","title":"connect(base_url='unix://var/run/docker.sock')","text":"

    Connect to the Docker daemon.

    Parameters:

    Name Type Description Default base_url str

    URL to the Docker daemon.

    'unix://var/run/docker.sock'"},{"location":"core/docker/#runners.docker.base.DockerResourceManager.create_container","title":"create_container(image, command=None, name=None, env_vars=None, ports=None, volumes=None, **kwargs)","text":"

    Create a new container.

    Parameters:

    Name Type Description Default image str

    Name of the image to create the container from.

    required command Optional[str]

    Command to run in the container.

    None name Optional[str]

    Name of the container.

    None env_vars Optional[Dict[str, str]]

    Environment variables.

    None ports Optional[Dict[str, str]]

    Port mappings.

    None volumes Optional[Dict[str, Dict[str, str]]]

    Volume mappings.

    None

    Returns:

    Name Type Description str str

    ID of the created container.

    "},{"location":"core/docker/#runners.docker.base.DockerResourceManager.create_parser","title":"create_parser(parser)","text":"

    Create a parser for CLI commands.

    Returns:

    Name Type Description ArgumentParser ArgumentParser

    The parser for Docker operations.

    "},{"location":"core/docker/#runners.docker.base.DockerResourceManager.inspect_container","title":"inspect_container(container_id)","text":"

    Inspect a specific container.

    Parameters:

    Name Type Description Default container_id str

    ID of the container to inspect.

    required

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: Container details.

    "},{"location":"core/docker/#runners.docker.base.DockerResourceManager.inspect_image","title":"inspect_image(image_id)","text":"

    Inspect a specific image.

    Parameters:

    Name Type Description Default image_id str

    ID of the image to inspect.

    required

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: Image details.

    "},{"location":"core/docker/#runners.docker.base.DockerResourceManager.list_containers","title":"list_containers(all_containers=False)","text":"

    List all containers.

    Parameters:

    Name Type Description Default all_containers bool

    Flag to list all containers, including stopped ones.

    False

    Returns:

    Type Description List[Any]

    List[Any]: List of containers.

    "},{"location":"core/docker/#runners.docker.base.DockerResourceManager.list_images","title":"list_images()","text":"

    List all Docker images.

    Returns:

    Type Description List[Any]

    List[Any]: List of images.

    "},{"location":"core/docker/#runners.docker.base.DockerResourceManager.pull_image","title":"pull_image(image)","text":"

    Pull an image from a Docker registry.

    Parameters:

    Name Type Description Default image str

    Name of the image to pull.

    required"},{"location":"core/docker/#runners.docker.base.DockerResourceManager.push_image","title":"push_image(image)","text":"

    Push an image to a Docker registry.

    Parameters:

    Name Type Description Default image str

    Name of the image to push.

    required"},{"location":"core/docker/#runners.docker.base.DockerResourceManager.run","title":"run(args)","text":"

    Run the Docker Resource Manager based on the parsed CLI arguments.

    Parameters:

    Name Type Description Default args Namespace

    The parsed CLI arguments.

    required"},{"location":"core/docker/#runners.docker.base.DockerResourceManager.start_container","title":"start_container(container_id)","text":"

    Start a container.

    Parameters:

    Name Type Description Default container_id str

    ID of the container to start.

    required"},{"location":"core/docker/#runners.docker.base.DockerResourceManager.stop_container","title":"stop_container(container_id)","text":"

    Stop a container.

    Parameters:

    Name Type Description Default container_id str

    ID of the container to stop.

    required"},{"location":"core/docker_swarm/","title":"Docker Swarm Deployment","text":"

    Bases: DockerResourceManager

    DockerSwarmManager is a utility for managing Docker Swarm services, including creating, inspecting, updating, and removing services. It extends DockerResourceManager to provide swarm-specific functionalities and commands via a command-line interface (CLI).

    The manager interacts with the Docker Swarm API, offering a convenient way to manage Swarm services, nodes, and other swarm-related tasks from the command line.

    CLI Usage

    genius docker swarm sub-command

    Sub-commands
    • list_nodes: List all nodes in the Docker Swarm. genius docker swarm list_nodes
    • inspect_node: Inspect a specific Swarm node by its ID. genius docker swarm inspect_node <node_id>
    • create_service: Create a new service in the Docker Swarm with comprehensive specifications. genius docker swarm create_service [options]
    • list_services: List all services in the Docker Swarm. genius docker swarm list_services
    • inspect_service: Inspect a specific service by its ID. genius docker swarm inspect_service <service_id>
    • update_service: Update an existing service with new parameters. genius docker swarm update_service <service_id> [options]
    • remove_service: Remove a service from the Docker Swarm. genius docker swarm remove_service <service_id>
    • service_logs: Retrieve logs of a Docker Swarm service. genius docker swarm service_logs <service_id> [--tail] [--follow]
    • scale_service: Scale a service to a specified number of replicas. genius docker swarm scale_service <service_id> <replicas>

    Each sub-command supports various options to specify the details of the swarm node or service operation. These options include node and service IDs, image and command specifications for services, environment variables, resource limits, and much more.

    Attributes:

    Name Type Description swarm_client

    The Docker Swarm client connection to interact with the Docker Swarm API.

    log

    Logger for the class to log information, warnings, and errors.

    console

    Rich console object to print formatted and styled outputs.

    Methods
    • connect_to_swarm: Method to establish a connection to the Docker Swarm.
    • list_nodes: Method to list all nodes in the Docker Swarm.
    • inspect_node: Method to inspect details of a specific Swarm node.
    • create_service: Method to create a new service with given specifications.
    • list_services: Method to list all services in the Docker Swarm.
    • inspect_service: Method to inspect a specific service.
    • update_service: Method to update an existing service with new parameters.
    • remove_service: Method to remove a service from the Docker Swarm.
    • get_service_logs: Method to retrieve logs of a Docker Swarm service.
    • scale_service: Method to scale a service to a specified number of replicas.
    Note
    • Ensure that the Docker Swarm is initialized and running.
    • Make sure to have the necessary permissions to interact with the Docker Swarm and manage services and nodes.
    "},{"location":"core/docker_swarm/#runners.docker.swarm.DockerSwarmManager.__init__","title":"__init__()","text":"

    Initialize the Docker Swarm Manager.

    "},{"location":"core/docker_swarm/#runners.docker.swarm.DockerSwarmManager.connect_to_swarm","title":"connect_to_swarm(base_url='unix://var/run/docker.sock')","text":"

    Connect to the Docker Swarm.

    Parameters:

    Name Type Description Default base_url str

    URL to the Docker daemon.

    'unix://var/run/docker.sock'"},{"location":"core/docker_swarm/#runners.docker.swarm.DockerSwarmManager.create_parser","title":"create_parser(parser)","text":"

    Extend the parser for CLI commands to include Docker Swarm operations.

    Parameters:

    Name Type Description Default parser ArgumentParser

    The existing parser.

    required

    Returns:

    Name Type Description ArgumentParser ArgumentParser

    The extended parser with Docker Swarm operations.

    "},{"location":"core/docker_swarm/#runners.docker.swarm.DockerSwarmManager.create_service","title":"create_service(image, command, args)","text":"

    Create a new service in the Docker Swarm with comprehensive specifications.

    Parameters:

    Name Type Description Default image str

    Docker image to use for the service.

    required command Union[str, List[str]]

    Command to run in the service.

    required args Namespace

    Arguments from the CLI for service creation.

    required

    Returns:

    Name Type Description str str

    ID of the created service.

    "},{"location":"core/docker_swarm/#runners.docker.swarm.DockerSwarmManager.get_service_logs","title":"get_service_logs(service_id, tail=100, follow=False)","text":"

    Retrieve logs of a Docker Swarm service.

    Parameters:

    Name Type Description Default service_id str

    ID of the service.

    required tail int

    Number of lines to tail from the end of the logs. Defaults to 100.

    100 follow bool

    Follow log output. Defaults to False.

    False"},{"location":"core/docker_swarm/#runners.docker.swarm.DockerSwarmManager.inspect_node","title":"inspect_node(node_id)","text":"

    Inspect a specific Swarm node.

    Parameters:

    Name Type Description Default node_id str

    ID of the node to inspect.

    required

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: Node details.

    "},{"location":"core/docker_swarm/#runners.docker.swarm.DockerSwarmManager.inspect_service","title":"inspect_service(service_id)","text":"

    Inspect a specific service in the Docker Swarm.

    Parameters:

    Name Type Description Default service_id str

    ID of the service to inspect.

    required

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: Service details.

    "},{"location":"core/docker_swarm/#runners.docker.swarm.DockerSwarmManager.list_nodes","title":"list_nodes()","text":"

    List all nodes in the Docker Swarm.

    Returns:

    Type Description List[Any]

    List[Any]: List of Swarm nodes.

    "},{"location":"core/docker_swarm/#runners.docker.swarm.DockerSwarmManager.list_services","title":"list_services()","text":"

    List all services in the Docker Swarm.

    Returns:

    Type Description List[Any]

    List[Any]: List of services.

    "},{"location":"core/docker_swarm/#runners.docker.swarm.DockerSwarmManager.remove_service","title":"remove_service(service_id)","text":"

    Remove a service from the Docker Swarm.

    Parameters:

    Name Type Description Default service_id str

    ID of the service to remove.

    required"},{"location":"core/docker_swarm/#runners.docker.swarm.DockerSwarmManager.run","title":"run(args)","text":"

    Run the Docker Swarm Manager based on the parsed CLI arguments.

    Parameters:

    Name Type Description Default args Namespace

    The parsed CLI arguments.

    required"},{"location":"core/docker_swarm/#runners.docker.swarm.DockerSwarmManager.scale_service","title":"scale_service(service_id, replicas)","text":"

    Scale a Docker Swarm service to a specified number of replicas.

    Parameters:

    Name Type Description Default service_id str

    ID of the service to scale.

    required replicas int

    Desired number of replicas.

    required"},{"location":"core/docker_swarm/#runners.docker.swarm.DockerSwarmManager.update_service","title":"update_service(service_id, image, command, args)","text":"

    Update an existing service in the Docker Swarm.

    Parameters:

    Name Type Description Default service_id str

    ID of the service to update.

    required args Namespace

    Arguments from the CLI for service update.

    required"},{"location":"core/k8s_base/","title":"Kubernetes","text":""},{"location":"core/k8s_base/#runners.k8s.base.K8sResourceManager.__create_image_pull_secret","title":"__create_image_pull_secret(name, registry, username, password)","text":"

    \ud83d\udd11 Create an image pull secret for a Docker registry.

    Parameters:

    Name Type Description Default name str

    Name of the secret.

    required registry str

    Docker registry URL.

    required username str

    Username for the registry.

    required password str

    Password for the registry.

    required"},{"location":"core/k8s_base/#runners.k8s.base.K8sResourceManager.__init__","title":"__init__()","text":"

    \ud83d\ude80 Initialize the Kubernetes Resource Manager.

    Attributes:

    Name Type Description api_instance

    Core API instance for Kubernetes

    apps_api_instance

    Apps API instance for Kubernetes

    cluster_name

    Name of the Kubernetes cluster

    context_name

    Name of the kubeconfig context

    namespace

    Kubernetes namespace

    labels

    Labels for Kubernetes resources

    annotations

    Annotations for Kubernetes resources

    "},{"location":"core/k8s_base/#runners.k8s.base.K8sResourceManager.__wait_for_pod_completion","title":"__wait_for_pod_completion(pod_name, timeout=600, poll_interval=5)","text":"

    \u23f3 Wait for a Pod to complete its execution.

    Parameters:

    Name Type Description Default pod_name str

    Name of the Pod.

    required timeout int

    Maximum time to wait in seconds.

    600 poll_interval int

    Time between status checks in seconds.

    5

    Returns:

    Name Type Description bool bool

    True if the Pod succeeded, False otherwise.

    Raises:

    Type Description TimeoutError

    If waiting for the Pod times out.

    "},{"location":"core/k8s_base/#runners.k8s.base.K8sResourceManager.connect","title":"connect(kube_config_path, cluster_name, context_name, namespace='default', labels={}, annotations={}, api_key=None, api_host=None, verify_ssl=True, ssl_ca_cert=None)","text":"

    \ud83c\udf10 Connect to a Kubernetes cluster.

    Parameters:

    Name Type Description Default kube_config_path str

    Path to the kubeconfig file.

    required cluster_name str

    Name of the Kubernetes cluster.

    required context_name str

    Name of the kubeconfig context.

    required namespace str

    Kubernetes namespace.

    'default' labels dict

    Labels for Kubernetes resources.

    {} annotations dict

    Annotations for Kubernetes resources.

    {} api_key str

    API key for Kubernetes cluster.

    None api_host str

    API host for Kubernetes cluster.

    None verify_ssl bool

    Whether to verify SSL certificates.

    True ssl_ca_cert str

    Path to the SSL CA certificate.

    None

    Raises:

    Type Description ValueError

    If neither kube_config_path and context_name nor api_key and api_host are provided.

    "},{"location":"core/k8s_base/#runners.k8s.base.K8sResourceManager.describe","title":"describe(pod_name)","text":"

    \ud83d\udcdd Describe a Kubernetes pod.

    Parameters:

    Name Type Description Default pod_name str

    Name of the pod.

    required

    Returns:

    Name Type Description dict V1Pod

    Description of the pod.

    "},{"location":"core/k8s_base/#runners.k8s.base.K8sResourceManager.logs","title":"logs(name, tail=10, follow=True)","text":"

    \ud83d\udcdc Get logs of a Kubernetes pod.

    Parameters:

    Name Type Description Default name str

    Name of the pod.

    required tail int

    Number of lines to tail.

    10

    Returns:

    Name Type Description str str

    Logs of the pod.

    "},{"location":"core/k8s_base/#runners.k8s.base.K8sResourceManager.run","title":"run(args)","text":"

    \ud83d\ude80 Run the Kubernetes resource manager.

    Parameters:

    Name Type Description Default args Namespace

    The parsed command line arguments.

    required"},{"location":"core/k8s_base/#runners.k8s.base.K8sResourceManager.show","title":"show()","text":"

    \ud83d\udccb Show all pods in the namespace.

    Returns:

    Name Type Description list List[V1Pod]

    List of pods.

    "},{"location":"core/k8s_base/#runners.k8s.base.K8sResourceManager.status","title":"status(pod_name)","text":"

    \ud83d\udcdc Get the status of a Pod.

    Parameters:

    Name Type Description Default pod_name str

    Name of the Pod.

    required

    Returns:

    Name Type Description str V1Pod

    The status of the Pod.

    "},{"location":"core/k8s_cron_job/","title":"Kubernetes CronJob","text":"

    Bases: Job

    \ud83d\ude80 The CronJob class is responsible for managing Kubernetes CronJobs. It extends the Job class and provides additional functionalities specific to Kubernetes CronJobs.

    CLI Usage

    genius cronjob sub-command Examples:

    genius cronjob create_cronjob --name example-cronjob --image example-image --command \"echo hello\" --schedule \"*/5 * * * *\" --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n

    ```bash\ngenius cronjob delete_cronjob --name example-cronjob --namespace geniusrise \\\n    --context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n```\n\n```bash\ngenius cronjob get_cronjob_status --name example-cronjob --namespace geniusrise \\\n    --context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n```\n

    YAML Configuration:

        version: \"1.0\"\ncronjobs:\n- name: \"example-cronjob\"\nimage: \"example-image\"\ncommand: \"example-command\"\nschedule: \"*/5 * * * *\"\nenv_vars:\nKEY: \"value\"\ncpu: \"100m\"\nmemory: \"256Mi\"\nstorage: \"1Gi\"\ngpu: \"1\"\n

    Extended CLI Examples
    genius cronjob create_cronjob \\\n--k8s_kind cronjob \\\n--k8s_namespace geniusrise \\\n--k8s_context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev \\\n--k8s_name example-cronjob \\\n--k8s_image \"genius-dev.dkr.ecr.ap-south-1.amazonaws.com/geniusrise\" \\\n--k8s_schedule \"*/5 * * * *\" \\\n--k8s_env_vars '{\"AWS_DEFAULT_REGION\": \"ap-south-1\", \"AWS_SECRET_ACCESS_KEY\": \"\", \"AWS_ACCESS_KEY_ID\": \"\"}' \\\n--k8s_cpu \"100m\" \\\n--k8s_memory \"256Mi\"\n
    genius cronjob delete_cronjob \\\nexample-cronjob \\\n--namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    genius cronjob get_cronjob_status \\\nexample-cronjob \\\n--namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"core/k8s_cron_job/#runners.k8s.cron_job.CronJob.__create_cronjob_spec","title":"__create_cronjob_spec(image, command, schedule, env_vars={}, cpu=None, memory=None, storage=None, gpu=None, image_pull_secret_name=None)","text":"

    \ud83d\udce6 Create a Kubernetes CronJob specification.

    Parameters:

    Name Type Description Default image str

    Docker image for the CronJob.

    required command str

    Command to run in the container.

    required env_vars dict

    Environment variables for the CronJob.

    {} cpu Optional[str]

    CPU requirements.

    None memory Optional[str]

    Memory requirements.

    None storage Optional[str]

    Storage requirements.

    None gpu Optional[str]

    GPU requirements.

    None image_pull_secret_name Optional[str]

    Name of the image pull secret.

    None

    Returns:

    Type Description client.V1CronJobSpec

    client.V1CronJobSpec: The CronJob specification.

    "},{"location":"core/k8s_cron_job/#runners.k8s.cron_job.CronJob.__init__","title":"__init__()","text":"

    \ud83d\ude80 Initialize the CronJob class for managing Kubernetes Cron Jobs.

    "},{"location":"core/k8s_cron_job/#runners.k8s.cron_job.CronJob.create","title":"create(name, image, schedule, command, env_vars={}, cpu=None, memory=None, storage=None, gpu=None, image_pull_secret_name=None, **kwargs)","text":"

    \ud83d\udee0 Create a Kubernetes CronJob.

    Parameters:

    Name Type Description Default name str

    Name of the CronJob.

    required image str

    Docker image for the CronJob.

    required command str

    Command to run in the container.

    required schedule str

    Cron schedule.

    required env_vars dict

    Environment variables for the CronJob.

    {}"},{"location":"core/k8s_cron_job/#runners.k8s.cron_job.CronJob.create_parser","title":"create_parser(parser)","text":"

    \ud83c\udf9b Create a parser for CLI commands related to Cron Job functionalities.

    Parameters:

    Name Type Description Default parser ArgumentParser

    The main parser.

    required

    Returns:

    Name Type Description ArgumentParser ArgumentParser

    The parser with subparsers for each command.

    "},{"location":"core/k8s_cron_job/#runners.k8s.cron_job.CronJob.delete","title":"delete(name)","text":"

    \ud83d\uddd1 Delete a Kubernetes CronJob.

    Parameters:

    Name Type Description Default name str

    Name of the CronJob to delete.

    required"},{"location":"core/k8s_cron_job/#runners.k8s.cron_job.CronJob.run","title":"run(args)","text":"

    \ud83d\ude80 Run the Cron Job manager.

    Parameters:

    Name Type Description Default args Namespace

    The parsed command line arguments.

    required"},{"location":"core/k8s_cron_job/#runners.k8s.cron_job.CronJob.status","title":"status(name)","text":"

    \ud83d\udcca Get the status of a Kubernetes CronJob.

    Parameters:

    Name Type Description Default name str

    Name of the CronJob.

    required

    Returns:

    Name Type Description dict V1CronJob

    Status of the CronJob.

    "},{"location":"core/k8s_deployment/","title":"Kubernetes Deployment","text":"

    Bases: K8sResourceManager

    \ud83d\ude80 Initialize the Deployment class for managing Kubernetes Deployments.

    CLI Usage

    geniusrise deployment sub-command Examples:

    genius deployment create --name example-deployment --image example-image --command \"echo hello\" --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    genius deployment scale --name example-deployment --replicas 3 --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    genius deployment describe --name example-deployment --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    genius deployment delete --name example-deployment --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    genius deployment status --name example-deployment --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n

    YAML Configuration:

    version: \"1.0\"\ndeployments:\n- name: \"example-deployment\"\nimage: \"example-image\"\ncommand: \"example-command\"\nreplicas: 3\nenv_vars:\nKEY: \"value\"\ncpu: \"100m\"\nmemory: \"256Mi\"\nstorage: \"1Gi\"\ngpu: \"1\"\n
    "},{"location":"core/k8s_deployment/#runners.k8s.deployment.Deployment.__create_deployment_spec","title":"__create_deployment_spec(image, command, replicas, image_pull_secret_name, env_vars, cpu=None, memory=None, storage=None, gpu=None)","text":"

    \ud83d\udce6 Create a Kubernetes Deployment specification.

    Parameters:

    Name Type Description Default image str

    Docker image for the Deployment.

    required command str

    Command to run in the container.

    required replicas int

    Number of replicas.

    required image_pull_secret_name str

    Name of the image pull secret.

    required env_vars dict

    Environment variables for the Deployment.

    required cpu str

    CPU requirements.

    None memory str

    Memory requirements.

    None storage str

    Storage requirements.

    None gpu str

    GPU requirements.

    None

    Returns:

    Type Description client.V1DeploymentSpec

    client.V1DeploymentSpec: The Deployment specification.

    "},{"location":"core/k8s_deployment/#runners.k8s.deployment.Deployment.__init__","title":"__init__()","text":"

    \ud83d\ude80 Initialize the Deployment class for managing Kubernetes Deployments.

    "},{"location":"core/k8s_deployment/#runners.k8s.deployment.Deployment.create","title":"create(name, image, command, registry_creds=None, replicas=1, env_vars={}, cpu=None, memory=None, storage=None, gpu=None, **kwargs)","text":"

    \ud83d\udee0 Create a Kubernetes resource Deployment.

    Parameters:

    Name Type Description Default name str

    Name of the resource.

    required image str

    Docker image for the resource.

    required command str

    Command to run in the container.

    required registry_creds dict

    Credentials for Docker registry.

    None replicas int

    Number of replicas for Deployment.

    1 env_vars dict

    Environment variables for the resource.

    {} cpu str

    CPU requirements.

    None memory str

    Memory requirements.

    None storage str

    Storage requirements.

    None gpu str

    GPU requirements.

    None"},{"location":"core/k8s_deployment/#runners.k8s.deployment.Deployment.create_parser","title":"create_parser(parser)","text":"

    \ud83c\udf9b Create a parser for CLI commands related to Deployment functionalities.

    Parameters:

    Name Type Description Default parser ArgumentParser

    The main parser.

    required

    Returns:

    Name Type Description ArgumentParser ArgumentParser

    The parser with subparsers for each command.

    "},{"location":"core/k8s_deployment/#runners.k8s.deployment.Deployment.delete","title":"delete(name)","text":"

    \ud83d\uddd1 Delete a Kubernetes resource (Pod/Deployment/Service).

    Parameters:

    Name Type Description Default name str

    Name of the resource to delete.

    required"},{"location":"core/k8s_deployment/#runners.k8s.deployment.Deployment.describe","title":"describe(deployment_name)","text":"

    \ud83d\uddc2 Describe a Kubernetes deployment.

    Parameters:

    Name Type Description Default deployment_name str

    Name of the deployment.

    required

    Returns:

    Name Type Description dict V1Deployment

    Description of the deployment.

    "},{"location":"core/k8s_deployment/#runners.k8s.deployment.Deployment.run","title":"run(args)","text":"

    \ud83d\ude80 Run the Deployment manager.

    Parameters:

    Name Type Description Default args Namespace

    The parsed command line arguments.

    required"},{"location":"core/k8s_deployment/#runners.k8s.deployment.Deployment.scale","title":"scale(name, replicas)","text":"

    \ud83d\udcc8 Scale a Kubernetes deployment.

    Parameters:

    Name Type Description Default name str

    Name of the deployment.

    required replicas int

    Number of replicas.

    required"},{"location":"core/k8s_deployment/#runners.k8s.deployment.Deployment.show","title":"show()","text":"

    \ud83d\uddc2 List all deployments in the namespace.

    Returns:

    Name Type Description list List[V1Deployment]

    List of deployments.

    "},{"location":"core/k8s_deployment/#runners.k8s.deployment.Deployment.status","title":"status(name)","text":"

    \ud83d\udcca Get the status of a Kubernetes deployment.

    Parameters:

    Name Type Description Default name str

    Name of the deployment.

    required

    Returns:

    Name Type Description dict V1Deployment

    Status of the deployment.

    "},{"location":"core/k8s_job/","title":"Kubernetes Job","text":"

    Bases: Deployment

    \ud83d\ude80 The Job class is responsible for managing Kubernetes Jobs. It extends the Deployment class and provides additional functionalities specific to Kubernetes Jobs.

    CLI Usage

    genius job sub-command Examples:

    genius job create --name example-job --image example-image --command \"echo hello\" --cpu \"100m\" --memory \"256Mi\" --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n

    ```bash\ngenius job delete --name example-job --namespace geniusrise \\\n    --context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n```\n\n```bash\ngenius job status --name example-job --namespace geniusrise \\\n    --context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n```\n

    YAML Configuration:

        version: \"1.0\"\njobs:\n- name: \"example-job\"\nimage: \"example-image\"\ncommand: \"example-command\"\nenv_vars:\nKEY: \"value\"\ncpu: \"100m\"\nmemory: \"256Mi\"\nstorage: \"1Gi\"\ngpu: \"1\"\n

    Extended CLI Examples:

        genius job create \\\n--k8s_kind job \\\n--k8s_namespace geniusrise \\\n--k8s_context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev \\\n--k8s_name example-job \\\n--k8s_image \"genius-dev.dkr.ecr.ap-south-1.amazonaws.com/geniusrise\" \\\n--k8s_env_vars '{\"AWS_DEFAULT_REGION\": \"ap-south-1\", \"AWS_SECRET_ACCESS_KEY\": \"\", \"AWS_ACCESS_KEY_ID\": \"\"}' \\\n--k8s_cpu \"100m\" \\\n--k8s_memory \"256Mi\"\n
        genius job delete \\\nexample-job \\\n--namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
        genius job status \\\nexample-job \\\n--namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"core/k8s_job/#runners.k8s.job.Job.__init__","title":"__init__()","text":"

    \ud83d\ude80 Initialize the Job class for managing Kubernetes Jobs.

    "},{"location":"core/k8s_job/#runners.k8s.job.Job.create","title":"create(name, image, command, env_vars={}, cpu=None, memory=None, storage=None, gpu=None, image_pull_secret_name=None, **kwargs)","text":"

    \ud83d\udee0 Create a Kubernetes Job.

    Parameters:

    Name Type Description Default name str

    Name of the Job.

    required image str

    Docker image for the Job.

    required command str

    Command to run in the container.

    required env_vars dict

    Environment variables for the Job.

    {}"},{"location":"core/k8s_job/#runners.k8s.job.Job.create_parser","title":"create_parser(parser)","text":"

    \ud83c\udf9b Create a parser for CLI commands related to Job functionalities.

    Parameters:

    Name Type Description Default parser ArgumentParser

    The main parser.

    required

    Returns:

    Name Type Description ArgumentParser ArgumentParser

    The parser with subparsers for each command.

    "},{"location":"core/k8s_job/#runners.k8s.job.Job.delete","title":"delete(name)","text":"

    \ud83d\uddd1 Delete a Kubernetes Job.

    Parameters:

    Name Type Description Default name str

    Name of the Job to delete.

    required"},{"location":"core/k8s_job/#runners.k8s.job.Job.run","title":"run(args)","text":"

    \ud83d\ude80 Run the Job manager.

    Parameters:

    Name Type Description Default args Namespace

    The parsed command line arguments.

    required"},{"location":"core/k8s_job/#runners.k8s.job.Job.status","title":"status(name)","text":"

    \ud83d\udcca Get the status of a Kubernetes Job.

    Parameters:

    Name Type Description Default name str

    Name of the Job.

    required

    Returns:

    Name Type Description dict V1Job

    Status of the Job.

    "},{"location":"core/k8s_service/","title":"Kubernetes Service","text":"

    Bases: Deployment

    \ud83d\ude80 Initialize the Service class for managing Kubernetes Services.

    CLI Usage

    genius service sub-command Examples:

    genius service create --name example-service --image example-image --command \"echo hello\" --port 8080 --target_port 8080 --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    genius service delete --name example-service --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    genius service describe --name example-service --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    genius service show --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n

    YAML Configuration:

    version: \"1.0\"\nservices:\n- name: \"example-service\"\nimage: \"example-image\"\ncommand: \"example-command\"\nreplicas: 3\nport: 8080\ntarget_port: 8080\nenv_vars:\nKEY: \"value\"\ncpu: \"100m\"\nmemory: \"256Mi\"\nstorage: \"1Gi\"\ngpu: \"1\"\n
    Extended CLI Examples
        genius service deploy \\\n--k8s_kind service \\\n--k8s_namespace geniusrise \\\n--k8s_context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev \\\n--k8s_name webhook \\\n--k8s_image \"genius-dev.dkr.ecr.ap-south-1.amazonaws.com/geniusrise\" \\\n--k8s_env_vars '{\"AWS_DEFAULT_REGION\": \"ap-south-1\", \"AWS_SECRET_ACCESS_KEY\": \"\", \"AWS_ACCESS_KEY_ID\": \"\"}' \\\n--k8s_port 8080 \\\n--k8s_target_port 8080\n
        genius service delete \\\nwebhook \\\n--namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"core/k8s_service/#runners.k8s.service.Service.__create_service_spec","title":"__create_service_spec(node_port, port, target_port)","text":"

    \ud83d\udce6 Create a Kubernetes Service specification.

    Parameters:

    Name Type Description Default port int

    Service port.

    required target_port int

    Container target port.

    required

    Returns:

    Type Description client.V1ServiceSpec

    client.V1ServiceSpec: The Service specification.

    "},{"location":"core/k8s_service/#runners.k8s.service.Service.__init__","title":"__init__()","text":"

    \ud83d\ude80 Initialize the Service class for managing Kubernetes Services.

    "},{"location":"core/k8s_service/#runners.k8s.service.Service.create","title":"create(name, image, command, registry_creds=None, replicas=1, node_port=80, port=80, target_port=8080, env_vars={}, cpu=None, memory=None, storage=None, gpu=None, **kwargs)","text":"

    \ud83d\udee0 Create a Kubernetes resource Service.

    Parameters:

    Name Type Description Default name str

    Name of the resource.

    required image str

    Docker image for the resource.

    required command str

    Command to run in the container.

    required registry_creds dict

    Credentials for Docker registry.

    None replicas int

    Number of replicas for Deployment.

    1 node_port int

    Service port that is exposed.

    80 port int

    Service port.

    80 target_port int

    Container target port.

    8080 env_vars dict

    Environment variables for the resource.

    {} cpu str

    CPU requirements.

    None memory str

    Memory requirements.

    None storage str

    Storage requirements.

    None gpu str

    GPU requirements.

    None"},{"location":"core/k8s_service/#runners.k8s.service.Service.create_parser","title":"create_parser(parser)","text":"

    \ud83c\udf9b Create a parser for CLI commands related to Service functionalities.

    Parameters:

    Name Type Description Default parser ArgumentParser

    The main parser.

    required

    Returns:

    Name Type Description ArgumentParser ArgumentParser

    The parser with subparsers for each command.

    "},{"location":"core/k8s_service/#runners.k8s.service.Service.delete","title":"delete(name)","text":"

    \ud83d\uddd1 Delete a Kubernetes resource (Pod/Deployment/Service).

    Parameters:

    Name Type Description Default name str

    Name of the resource to delete.

    required"},{"location":"core/k8s_service/#runners.k8s.service.Service.describe","title":"describe(service_name)","text":"

    \ud83c\udf10 Describe a Kubernetes service.

    Parameters:

    Name Type Description Default service_name str

    Name of the service.

    required

    Returns:

    Name Type Description dict V1Service

    Description of the service.

    "},{"location":"core/k8s_service/#runners.k8s.service.Service.run","title":"run(args)","text":"

    \ud83d\ude80 Run the Service manager.

    Parameters:

    Name Type Description Default args Namespace

    The parsed command line arguments.

    required"},{"location":"core/k8s_service/#runners.k8s.service.Service.show","title":"show()","text":"

    \ud83c\udf10 Show all services in the namespace.

    Returns:

    Name Type Description list List[V1Service]

    List of services.

    "},{"location":"core/k8s_service/#runners.k8s.service.Service.status","title":"status(name)","text":"

    \ud83d\udcca Get the status of a Kubernetes service.

    Parameters:

    Name Type Description Default name str

    Name of the service.

    required

    Returns:

    Name Type Description dict V1Deployment

    Status of the service.

    "},{"location":"databases/arangodb/","title":"ArangoDB","text":"

    Bases: Spout

    "},{"location":"databases/arangodb/#geniusrise_databases.arangodb.ArangoDB.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Arango class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/arangodb/#geniusrise_databases.arangodb.ArangoDB.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Arango rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args host=myarangodb.example.com username=myusername password=mypassword database=mydb collection=mycollection\n
    "},{"location":"databases/arangodb/#geniusrise_databases.arangodb.ArangoDB.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_arangodb_spout:\nname: \"Arango\"\nmethod: \"fetch\"\nargs:\nhost: \"myarangodb.example.com\"\nusername: \"myusername\"\npassword: \"mypassword\"\ndatabase: \"mydb\"\ncollection: \"mycollection\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/arangodb/#geniusrise_databases.arangodb.ArangoDB.fetch","title":"fetch(host, username, password, database, collection)","text":"

    \ud83d\udcd6 Fetch data from an ArangoDB collection and save it in batch.

    Parameters:

    Name Type Description Default host str

    The ArangoDB host.

    required username str

    The ArangoDB username.

    required password str

    The ArangoDB password.

    required database str

    The ArangoDB database name.

    required collection str

    The name of the ArangoDB collection.

    required

    Raises:

    Type Description Exception

    If unable to connect to the ArangoDB server or execute the command.

    "},{"location":"databases/athena/","title":"Athena","text":"

    Bases: Spout

    "},{"location":"databases/athena/#geniusrise_databases.athena.Athena.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Athena class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/athena/#geniusrise_databases.athena.Athena.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Athena rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args region_name=us-east-1 output_location=s3://mybucket/output query=\"SELECT * FROM mytable\"\n
    "},{"location":"databases/athena/#geniusrise_databases.athena.Athena.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_athena_spout:\nname: \"Athena\"\nmethod: \"fetch\"\nargs:\nregion_name: \"us-east-1\"\noutput_location: \"s3://mybucket/output\"\nquery: \"SELECT * FROM mytable\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/athena/#geniusrise_databases.athena.Athena.fetch","title":"fetch(region_name, output_location, query)","text":"

    \ud83d\udcd6 Fetch data from an AWS Athena table and save it in batch.

    Parameters:

    Name Type Description Default region_name str

    The AWS region name.

    required output_location str

    The S3 output location for the query results.

    required query str

    The SQL query to execute.

    required

    Raises:

    Type Description Exception

    If unable to connect to the AWS Athena service or execute the query.

    "},{"location":"databases/azure_table/","title":"Athena","text":"

    Bases: Spout

    "},{"location":"databases/azure_table/#geniusrise_databases.azure_table.AzureTableStorage.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the AzureTableStorage class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/azure_table/#geniusrise_databases.azure_table.AzureTableStorage.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius AzureTableStorage rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args account_name=my_account account_key=my_key table_name=my_table\n
    "},{"location":"databases/azure_table/#geniusrise_databases.azure_table.AzureTableStorage.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_azure_table_spout:\nname: \"AzureTableStorage\"\nmethod: \"fetch\"\nargs:\naccount_name: \"my_account\"\naccount_key: \"my_key\"\ntable_name: \"my_table\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/azure_table/#geniusrise_databases.azure_table.AzureTableStorage.fetch","title":"fetch(account_name, account_key, table_name)","text":"

    \ud83d\udcd6 Fetch data from Azure Table Storage and save it in batch.

    Parameters:

    Name Type Description Default account_name str

    The Azure Storage account name.

    required account_key str

    The Azure Storage account key.

    required table_name str

    The Azure Table Storage table name.

    required

    Raises:

    Type Description Exception

    If unable to connect to Azure Table Storage or fetch the data.

    "},{"location":"databases/bigquery/","title":"Bigquery","text":"

    Bases: Spout

    "},{"location":"databases/bigquery/#geniusrise_databases.bigquery.BigQuery.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the BigQuery class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/bigquery/#geniusrise_databases.bigquery.BigQuery.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius BigQuery rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--project_id my_project_id dataset_id=my_dataset table_id=my_table\n
    "},{"location":"databases/bigquery/#geniusrise_databases.bigquery.BigQuery.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_bigquery_spout:\nname: \"BigQuery\"\nmethod: \"fetch\"\nargs:\nproject_id: \"my_project_id\"\ndataset_id: \"my_dataset\"\ntable_id: \"my_table\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/bigquery/#geniusrise_databases.bigquery.BigQuery.fetch","title":"fetch(project_id, dataset_id, table_id)","text":"

    \ud83d\udcd6 Fetch data from a BigQuery table and save it in batch.

    Parameters:

    Name Type Description Default project_id str

    The Google Cloud project ID.

    required dataset_id str

    The BigQuery dataset ID.

    required table_id str

    The BigQuery table ID.

    required

    Raises:

    Type Description Exception

    If unable to connect to the BigQuery server or execute the query.

    "},{"location":"databases/bigtable/","title":"BigTable","text":"

    Bases: Spout

    "},{"location":"databases/bigtable/#geniusrise_databases.bigtable.Bigtable.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Bigtable class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/bigtable/#geniusrise_databases.bigtable.Bigtable.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Bigtable rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args project_id=my_project instance_id=my_instance table_id=my_table\n
    "},{"location":"databases/bigtable/#geniusrise_databases.bigtable.Bigtable.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_bigtable_spout:\nname: \"Bigtable\"\nmethod: \"fetch\"\nargs:\nproject_id: \"my_project\"\ninstance_id: \"my_instance\"\ntable_id: \"my_table\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/bigtable/#geniusrise_databases.bigtable.Bigtable.fetch","title":"fetch(project_id, instance_id, table_id)","text":"

    \ud83d\udcd6 Fetch data from a Google Cloud Bigtable and save it in batch.

    Parameters:

    Name Type Description Default project_id str

    The Google Cloud Project ID.

    required instance_id str

    The Bigtable instance ID.

    required table_id str

    The Bigtable table ID.

    required

    Raises:

    Type Description Exception

    If unable to connect to the Bigtable server or fetch the data.

    "},{"location":"databases/cassandra/","title":"Cassandra","text":"

    Bases: Spout

    "},{"location":"databases/cassandra/#geniusrise_databases.cassandra.Cassandra.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Cassandra class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs Any

    Additional keyword arguments.

    {}"},{"location":"databases/cassandra/#geniusrise_databases.cassandra.Cassandra.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Cassandra rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args hosts=localhost keyspace=my_keyspace query=\"SELECT * FROM my_table\" page_size=100\n
    "},{"location":"databases/cassandra/#geniusrise_databases.cassandra.Cassandra.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_cassandra_spout:\nname: \"Cassandra\"\nmethod: \"fetch\"\nargs:\nhosts: \"localhost\"\nkeyspace: \"my_keyspace\"\nquery: \"SELECT * FROM my_table\"\npage_size: 100\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/cassandra/#geniusrise_databases.cassandra.Cassandra.fetch","title":"fetch(hosts, keyspace, query, page_size=100)","text":"

    \ud83d\udcd6 Fetch data from a Cassandra database and save it in batch.

    Parameters:

    Name Type Description Default hosts str

    Comma-separated list of Cassandra hosts.

    required keyspace str

    The Cassandra keyspace to use.

    required query str

    The CQL query to execute.

    required page_size int

    The number of rows to fetch per page. Defaults to 100.

    100

    Raises:

    Type Description Exception

    If unable to connect to the Cassandra cluster or execute the query.

    "},{"location":"databases/cloud_sql/","title":"Google Cloud SQL","text":"

    Bases: Spout

    "},{"location":"databases/cloud_sql/#geniusrise_databases.cloud_sql.GoogleCloudSQL.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the GoogleCloudSQL class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs Any

    Additional keyword arguments.

    {}"},{"location":"databases/cloud_sql/#geniusrise_databases.cloud_sql.GoogleCloudSQL.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius GoogleCloudSQL rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args host=127.0.0.1 port=3306 user=root password=root database=mydb query=\"SELECT * FROM table\" page_size=100\n
    "},{"location":"databases/cloud_sql/#geniusrise_databases.cloud_sql.GoogleCloudSQL.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_google_cloud_sql_spout:\nname: \"GoogleCloudSQL\"\nmethod: \"fetch\"\nargs:\nhost: \"127.0.0.1\"\nport: 3306\nuser: \"root\"\npassword: \"root\"\ndatabase: \"mydb\"\nquery: \"SELECT * FROM table\"\npage_size: 100\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/cloud_sql/#geniusrise_databases.cloud_sql.GoogleCloudSQL.fetch","title":"fetch(host, port, user, password, database, query, page_size=100)","text":"

    \ud83d\udcd6 Fetch data from a Google Cloud SQL database and save it in batch.

    Parameters:

    Name Type Description Default host str

    The Google Cloud SQL host.

    required port int

    The Google Cloud SQL port.

    required user str

    The Google Cloud SQL user.

    required password str

    The Google Cloud SQL password.

    required database str

    The Google Cloud SQL database name.

    required query str

    The SQL query to execute.

    required page_size int

    The number of rows to fetch per page. Defaults to 100.

    100

    Raises:

    Type Description Exception

    If unable to connect to the Google Cloud SQL or fetch the data.

    "},{"location":"databases/cockroach/","title":"CockroachDB","text":"

    Bases: Spout

    "},{"location":"databases/cockroach/#geniusrise_databases.cockroach.CockroachDB.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the CockroachDB class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs Any

    Additional keyword arguments.

    {}"},{"location":"databases/cockroach/#geniusrise_databases.cockroach.CockroachDB.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius CockroachDB rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args host=localhost port=26257 user=root password=root database=mydb query=\"SELECT * FROM table\" page_size=100\n
    "},{"location":"databases/cockroach/#geniusrise_databases.cockroach.CockroachDB.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_cockroachdb_spout:\nname: \"CockroachDB\"\nmethod: \"fetch\"\nargs:\nhost: \"localhost\"\nport: 26257\nuser: \"root\"\npassword: \"root\"\ndatabase: \"mydb\"\nquery: \"SELECT * FROM table\"\npage_size: 100\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/cockroach/#geniusrise_databases.cockroach.CockroachDB.fetch","title":"fetch(host, port, user, password, database, query, page_size=100)","text":"

    \ud83d\udcd6 Fetch data from a CockroachDB database and save it in batch.

    Parameters:

    Name Type Description Default host str

    The CockroachDB host.

    required port int

    The CockroachDB port.

    required user str

    The CockroachDB user.

    required password str

    The CockroachDB password.

    required database str

    The CockroachDB database name.

    required query str

    The SQL query to execute.

    required page_size int

    The number of rows to fetch per page. Defaults to 100.

    100

    Raises:

    Type Description Exception

    If unable to connect to the CockroachDB server or execute the query.

    "},{"location":"databases/cosmosdb/","title":"CosmosDB","text":"

    Bases: Spout

    "},{"location":"databases/cosmosdb/#geniusrise_databases.cosmosdb.CosmosDB.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Cosmos DB class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/cosmosdb/#geniusrise_databases.cosmosdb.CosmosDB.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius CosmosDB rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args endpoint=https://mycosmosdb.documents.azure.com:443/ my_database my_collection\n
    "},{"location":"databases/cosmosdb/#geniusrise_databases.cosmosdb.CosmosDB.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_cosmosdb_spout:\nname: \"CosmosDB\"\nmethod: \"fetch\"\nargs:\nendpoint: \"https://mycosmosdb.documents.azure.com:443/\"\ndatabase: \"my_database\"\ncollection: \"my_collection\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/cosmosdb/#geniusrise_databases.cosmosdb.CosmosDB.fetch","title":"fetch(endpoint, database, collection)","text":"

    \ud83d\udcd6 Fetch data from a Cosmos DB collection and save it in batch.

    Parameters:

    Name Type Description Default endpoint str

    The Cosmos DB endpoint URL.

    required database str

    The Cosmos DB database name.

    required collection str

    The Cosmos DB collection name.

    required

    Raises:

    Type Description Exception

    If unable to connect to the Cosmos DB server or execute the query.

    "},{"location":"databases/couchbase/","title":"Couchbase","text":"

    Bases: Spout

    "},{"location":"databases/couchbase/#geniusrise_databases.couchbase.Couchbase.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the CouchbaseSpout class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs Any

    Additional keyword arguments.

    {}"},{"location":"databases/couchbase/#geniusrise_databases.couchbase.Couchbase.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius CouchbaseSpout rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args host=localhost username=admin password=password bucket_name=my_bucket query=\"SELECT * FROM my_bucket\" page_size=100\n
    "},{"location":"databases/couchbase/#geniusrise_databases.couchbase.Couchbase.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_couchbase_spout:\nname: \"CouchbaseSpout\"\nmethod: \"fetch\"\nargs:\nhost: \"localhost\"\nusername: \"admin\"\npassword: \"password\"\nbucket_name: \"my_bucket\"\nquery: \"SELECT * FROM my_bucket\"\npage_size: 100\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/couchbase/#geniusrise_databases.couchbase.Couchbase.fetch","title":"fetch(host, username, password, bucket_name, query, page_size=100)","text":"

    \ud83d\udcd6 Fetch data from a Couchbase bucket and save it in batch.

    Parameters:

    Name Type Description Default host str

    The Couchbase host.

    required username str

    The Couchbase username.

    required password str

    The Couchbase password.

    required bucket_name str

    The Couchbase bucket name.

    required query str

    The N1QL query to execute.

    required page_size int

    The number of documents to fetch per page. Defaults to 100.

    100

    Raises:

    Type Description Exception

    If unable to connect to the Couchbase cluster or execute the query.

    "},{"location":"databases/db2/","title":"IBM DB2","text":"

    Bases: Spout

    "},{"location":"databases/db2/#geniusrise_databases.db2.DB2.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the DB2 class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/db2/#geniusrise_databases.db2.DB2.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius DB2 rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args hostname=mydb2.example.com port=50000 username=myusername password=mypassword database=mydb\n
    "},{"location":"databases/db2/#geniusrise_databases.db2.DB2.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_db2_spout:\nname: \"DB2\"\nmethod: \"fetch\"\nargs:\nhostname: \"mydb2.example.com\"\nport: 50000\nusername: \"myusername\"\npassword: \"mypassword\"\ndatabase: \"mydb\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/db2/#geniusrise_databases.db2.DB2.fetch","title":"fetch(hostname, port, username, password, database)","text":"

    \ud83d\udcd6 Fetch data from a DB2 database and save it in batch.

    Parameters:

    Name Type Description Default hostname str

    The DB2 hostname.

    required port int

    The DB2 port.

    required username str

    The DB2 username.

    required password str

    The DB2 password.

    required database str

    The DB2 database name.

    required

    Raises:

    Type Description Exception

    If unable to connect to the DB2 server or execute the command.

    "},{"location":"databases/documentdb/","title":"AWS DocumentDB","text":"

    Bases: Spout

    "},{"location":"databases/documentdb/#geniusrise_databases.documentdb.DocumentDB.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the DocumentDB class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/documentdb/#geniusrise_databases.documentdb.DocumentDB.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius DocumentDB rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args host=localhost port=27017 user=myuser password=mypassword database=mydb collection=mycollection query=\"{}\" page_size=100\n
    "},{"location":"databases/documentdb/#geniusrise_databases.documentdb.DocumentDB.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_documentdb_spout:\nname: \"DocumentDB\"\nmethod: \"fetch\"\nargs:\nhost: \"localhost\"\nport: 27017\nuser: \"myuser\"\npassword: \"mypassword\"\ndatabase: \"mydb\"\ncollection: \"mycollection\"\nquery: \"{}\"\npage_size: 100\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/documentdb/#geniusrise_databases.documentdb.DocumentDB.fetch","title":"fetch(host, port, user, password, database, collection, query, page_size=100)","text":"

    \ud83d\udcd6 Fetch data from a DocumentDB database and save it in batch.

    Parameters:

    Name Type Description Default host str

    The DocumentDB host.

    required port int

    The DocumentDB port.

    required user str

    The DocumentDB user.

    required password str

    The DocumentDB password.

    required database str

    The DocumentDB database name.

    required collection str

    The DocumentDB collection name.

    required query str

    The query to execute.

    required page_size int

    The number of documents to fetch per page. Defaults to 100.

    100

    Raises:

    Type Description Exception

    If unable to connect to the DocumentDB server or execute the query.

    "},{"location":"databases/dynamodb/","title":"AWS DynamoDB","text":"

    Bases: Spout

    "},{"location":"databases/dynamodb/#geniusrise_databases.dynamodb.DynamoDB.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the DynamoDB class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs Any

    Additional keyword arguments.

    {}"},{"location":"databases/dynamodb/#geniusrise_databases.dynamodb.DynamoDB.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius DynamoDB rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args table_name=my_table page_size=100\n
    "},{"location":"databases/dynamodb/#geniusrise_databases.dynamodb.DynamoDB.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_dynamodb_spout:\nname: \"DynamoDB\"\nmethod: \"fetch\"\nargs:\ntable_name: \"my_table\"\npage_size: 100\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/dynamodb/#geniusrise_databases.dynamodb.DynamoDB.fetch","title":"fetch(table_name, page_size=100)","text":"

    \ud83d\udcd6 Fetch data from a DynamoDB table and save it in batch.

    Parameters:

    Name Type Description Default table_name str

    The DynamoDB table name.

    required page_size int

    The number of rows to fetch per page. Defaults to 100.

    100

    Raises:

    Type Description Exception

    If unable to connect to the DynamoDB or fetch the data.

    "},{"location":"databases/elasticsearch/","title":"Elasticsearch","text":"

    Bases: Spout

    "},{"location":"databases/elasticsearch/#geniusrise_databases.elasticsearch.Elasticsearch.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Elasticsearch class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs Any

    Additional keyword arguments.

    {}"},{"location":"databases/elasticsearch/#geniusrise_databases.elasticsearch.Elasticsearch.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Elasticsearch rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args hosts=localhost:9200 index=my_index query='{\"query\": {\"match_all\": {}}}' page_size=100\n
    "},{"location":"databases/elasticsearch/#geniusrise_databases.elasticsearch.Elasticsearch.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_elasticsearch_spout:\nname: \"Elasticsearch\"\nmethod: \"fetch\"\nargs:\nhosts: \"localhost:9200\"\nindex: \"my_index\"\nquery: '{\"query\": {\"match_all\": {}}}'\npage_size: 100\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/elasticsearch/#geniusrise_databases.elasticsearch.Elasticsearch.fetch","title":"fetch(hosts, index, query, page_size=100)","text":"

    \ud83d\udcd6 Fetch data from an Elasticsearch index and save it in batch.

    Parameters:

    Name Type Description Default hosts str

    Comma-separated list of Elasticsearch hosts.

    required index str

    The Elasticsearch index to query.

    required query str

    The Elasticsearch query in JSON format.

    required page_size int

    The number of documents to fetch per page. Defaults to 100.

    100

    Raises:

    Type Description Exception

    If unable to connect to the Elasticsearch cluster or execute the query.

    "},{"location":"databases/firestore/","title":"Firestore","text":"

    Bases: Spout

    "},{"location":"databases/firestore/#geniusrise_databases.firestore.Firestore.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Firestore class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/firestore/#geniusrise_databases.firestore.Firestore.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Firestore rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args project_id=my-project collection_id=my-collection\n
    "},{"location":"databases/firestore/#geniusrise_databases.firestore.Firestore.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_firestore_spout:\nname: \"Firestore\"\nmethod: \"fetch\"\nargs:\nproject_id: \"my-project\"\ncollection_id: \"my-collection\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/firestore/#geniusrise_databases.firestore.Firestore.fetch","title":"fetch(project_id, collection_id)","text":"

    \ud83d\udcd6 Fetch data from a Firestore collection and save it in batch.

    Parameters:

    Name Type Description Default project_id str

    The Google Cloud project ID.

    required collection_id str

    The Firestore collection ID.

    required

    Raises:

    Type Description Exception

    If unable to connect to the Firestore server or execute the query.

    "},{"location":"databases/graphite/","title":"Graphite","text":"

    Bases: Spout

    "},{"location":"databases/graphite/#geniusrise_databases.graphite.Graphite.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Graphite class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/graphite/#geniusrise_databases.graphite.Graphite.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Graphite rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args url=http://localhost:8080 target=stats_counts.myapp output_format=json from=-1h until=now\n
    "},{"location":"databases/graphite/#geniusrise_databases.graphite.Graphite.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_graphite_spout:\nname: \"Graphite\"\nmethod: \"fetch\"\nargs:\nurl: \"http://localhost:8080\"\ntarget: \"stats_counts.myapp\"\noutput_format: \"json\"\nfrom: \"-1h\"\nuntil: \"now\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/graphite/#geniusrise_databases.graphite.Graphite.fetch","title":"fetch(url, target, output_format='json', from_time='-1h', until='now')","text":"

    \ud83d\udcd6 Fetch data from a Graphite database and save it in batch.

    Parameters:

    Name Type Description Default url str

    The Graphite API URL.

    required target str

    The target metric to fetch.

    required output_format str

    The output format. Defaults to \"json\".

    'json' from_time str

    The start time for fetching data. Defaults to \"-1h\".

    '-1h' until str

    The end time for fetching data. Defaults to \"now\".

    'now'

    Raises:

    Type Description Exception

    If unable to connect to the Graphite server or fetch the data.

    "},{"location":"databases/hbase/","title":"HBase","text":"

    Bases: Spout

    "},{"location":"databases/hbase/#geniusrise_databases.hbase.HBase.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the HBase class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/hbase/#geniusrise_databases.hbase.HBase.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius HBase rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args host=localhost table=my_table row_start=start row_stop=stop batch_size=100\n
    "},{"location":"databases/hbase/#geniusrise_databases.hbase.HBase.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_hbase_spout:\nname: \"HBase\"\nmethod: \"fetch\"\nargs:\nhost: \"localhost\"\ntable: \"my_table\"\nrow_start: \"start\"\nrow_stop: \"stop\"\nbatch_size: 100\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/hbase/#geniusrise_databases.hbase.HBase.fetch","title":"fetch(host, table, row_start, row_stop, batch_size=100)","text":"

    \ud83d\udcd6 Fetch data from an HBase table and save it in batch.

    Parameters:

    Name Type Description Default host str

    The HBase host.

    required table str

    The HBase table name.

    required row_start str

    The row key to start scanning from.

    required row_stop str

    The row key to stop scanning at.

    required batch_size int

    The number of rows to fetch per batch. Defaults to 100.

    100

    Raises:

    Type Description Exception

    If unable to connect to the HBase server or execute the scan.

    "},{"location":"databases/influxdb/","title":"InfluxDB","text":"

    Bases: Spout

    "},{"location":"databases/influxdb/#geniusrise_databases.influxdb.InfluxDB.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the InfluxDB class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/influxdb/#geniusrise_databases.influxdb.InfluxDB.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius InfluxDB rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args host=localhost port=8086 username=myusername password=mypassword database=mydatabase\n
    "},{"location":"databases/influxdb/#geniusrise_databases.influxdb.InfluxDB.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_influxdb_spout:\nname: \"InfluxDB\"\nmethod: \"fetch\"\nargs:\nhost: \"localhost\"\nport: 8086\nusername: \"myusername\"\npassword: \"mypassword\"\ndatabase: \"mydatabase\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/influxdb/#geniusrise_databases.influxdb.InfluxDB.fetch","title":"fetch(host, port, username, password, database)","text":"

    \ud83d\udcd6 Fetch data from an InfluxDB database and save it in batch.

    Parameters:

    Name Type Description Default host str

    The InfluxDB host.

    required port int

    The InfluxDB port.

    required username str

    The InfluxDB username.

    required password str

    The InfluxDB password.

    required database str

    The InfluxDB database name.

    required

    Raises:

    Type Description Exception

    If unable to connect to the InfluxDB server or execute the query.

    "},{"location":"databases/kairosdb/","title":"KairosDB","text":"

    Bases: Spout

    "},{"location":"databases/kairosdb/#geniusrise_databases.kairosdb.KairosDB.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the KairosDB class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/kairosdb/#geniusrise_databases.kairosdb.KairosDB.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius KairosDB rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args url=http://mykairosdbhost:8080/api/v1/datapoints query=\"SELECT * FROM mymetric\"\n
    "},{"location":"databases/kairosdb/#geniusrise_databases.kairosdb.KairosDB.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_kairosdb_spout:\nname: \"KairosDB\"\nmethod: \"fetch\"\nargs:\nurl: \"http://mykairosdbhost:8080/api/v1/datapoints\"\nquery: \"SELECT * FROM mymetric\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/kairosdb/#geniusrise_databases.kairosdb.KairosDB.fetch","title":"fetch(url, query)","text":"

    \ud83d\udcd6 Fetch data from a KairosDB metric and save it in batch.

    Parameters:

    Name Type Description Default url str

    The URL of the KairosDB API endpoint.

    required query str

    The SQL query to execute.

    required

    Raises:

    Type Description Exception

    If unable to connect to the KairosDB server or execute the query.

    "},{"location":"databases/keyspaces/","title":"AWSKeyspaces","text":"

    Bases: Spout

    "},{"location":"databases/keyspaces/#geniusrise_databases.keyspaces.AWSKeyspaces.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the AWS Keyspaces class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/keyspaces/#geniusrise_databases.keyspaces.AWSKeyspaces.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius AWSKeyspaces rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args region_name=us-east-1 cluster_name=mycluster table_name=mytable\n
    "},{"location":"databases/keyspaces/#geniusrise_databases.keyspaces.AWSKeyspaces.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_aws_keyspaces_spout:\nname: \"AWSKeyspaces\"\nmethod: \"fetch\"\nargs:\nregion_name: \"us-east-1\"\ncluster_name: \"mycluster\"\ntable_name: \"mytable\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/keyspaces/#geniusrise_databases.keyspaces.AWSKeyspaces.fetch","title":"fetch(region_name, cluster_name, table_name)","text":"

    \ud83d\udcd6 Fetch data from an AWS Keyspaces table and save it in batch.

    Parameters:

    Name Type Description Default region_name str

    The AWS region name.

    required cluster_name str

    The AWS Keyspaces cluster name.

    required table_name str

    The name of the AWS Keyspaces table.

    required

    Raises:

    Type Description Exception

    If unable to connect to the AWS Keyspaces cluster or execute the query.

    "},{"location":"databases/ldap/","title":"LDAP","text":"

    Bases: Spout

    "},{"location":"databases/ldap/#geniusrise_databases.ldap.LDAP.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the LDAP class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/ldap/#geniusrise_databases.ldap.LDAP.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius LDAP rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args url=ldap://myldap.example.com:389 bind_dn=\"cn=admin,dc=example,dc=com\" bind_password=\"password\" search_base=\"dc=example,dc=com\" search_filter=\"(objectClass=person)\" attributes=[\"cn\", \"givenName\", \"sn\"]\n
    "},{"location":"databases/ldap/#geniusrise_databases.ldap.LDAP.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_ldap_spout:\nname: \"LDAP\"\nmethod: \"fetch\"\nargs:\nurl: \"ldap://myldap.example.com:389\"\nbind_dn: \"cn=admin,dc=example,dc=com\"\nbind_password: \"password\"\nsearch_base: \"dc=example,dc=com\"\nsearch_filter: \"(objectClass=person)\"\nattributes: [\"cn\", \"givenName\", \"sn\"]\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/ldap/#geniusrise_databases.ldap.LDAP.fetch","title":"fetch(url, bind_dn, bind_password, search_base, search_filter, attributes)","text":"

    \ud83d\udcd6 Fetch data from an LDAP server and save it in batch.

    Parameters:

    Name Type Description Default url str

    The LDAP URL.

    required bind_dn str

    The DN to bind as.

    required bind_password str

    The password for the DN.

    required search_base str

    The search base.

    required search_filter str

    The search filter.

    required attributes list

    The list of attributes to retrieve.

    required

    Raises:

    Type Description Exception

    If unable to connect to the LDAP server or execute the search.

    "},{"location":"databases/memsql/","title":"MemSQL","text":"

    Bases: Spout

    "},{"location":"databases/memsql/#geniusrise_databases.memsql.MemSQL.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the MemSQL class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/memsql/#geniusrise_databases.memsql.MemSQL.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius MemSQL rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args host=mymemsqlhost user=myuser password=<PASSWORD> database=mydatabase query=\"SELECT * FROM mytable\"\n
    "},{"location":"databases/memsql/#geniusrise_databases.memsql.MemSQL.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_memsql_spout:\nname: \"MemSQL\"\nmethod: \"fetch\"\nargs:\nhost: \"mymemsqlhost\"\nuser: \"myuser\"\npassword: \"<PASSWORD>\"\ndatabase: \"mydatabase\"\nquery: \"SELECT * FROM mytable\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/memsql/#geniusrise_databases.memsql.MemSQL.fetch","title":"fetch(host, user, password, database, query)","text":"

    \ud83d\udcd6 Fetch data from a MemSQL database and save it in batch.

    Parameters:

    Name Type Description Default host str

    The MemSQL host.

    required user str

    The MemSQL user.

    required password str

    The MemSQL password.

    required database str

    The MemSQL database name.

    required query str

    The SQL query to execute.

    required

    Raises:

    Type Description Exception

    If unable to connect to the MemSQL server or execute the query.

    "},{"location":"databases/mongodb/","title":"MongoDB","text":"

    Bases: Spout

    "},{"location":"databases/mongodb/#geniusrise_databases.mongodb.MongoDB.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the MongoDB class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/mongodb/#geniusrise_databases.mongodb.MongoDB.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius MongoDB rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args host=localhost port=27017 username=myusername password=mypassword database=mydatabase collection=mycollection\n
    "},{"location":"databases/mongodb/#geniusrise_databases.mongodb.MongoDB.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_mongodb_spout:\nname: \"MongoDB\"\nmethod: \"fetch\"\nargs:\nhost: \"localhost\"\nport: 27017\nusername: \"myusername\"\npassword: \"mypassword\"\ndatabase: \"mydatabase\"\ncollection: \"mycollection\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/mongodb/#geniusrise_databases.mongodb.MongoDB.fetch","title":"fetch(host, port, username, password, database, collection)","text":"

    \ud83d\udcd6 Fetch data from a MongoDB database and save it in batch.

    Parameters:

    Name Type Description Default host str

    The MongoDB host.

    required port int

    The MongoDB port.

    required username str

    The MongoDB username.

    required password str

    The MongoDB password.

    required database str

    The MongoDB database name.

    required collection str

    The MongoDB collection name.

    required

    Raises:

    Type Description Exception

    If unable to connect to the MongoDB server or execute the query.

    "},{"location":"databases/mysql/","title":"MySQL","text":"

    Bases: Spout

    "},{"location":"databases/mysql/#geniusrise_databases.mysql.MySQL.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the MySQL class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/mysql/#geniusrise_databases.mysql.MySQL.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius MySQL rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args host=localhost port=3306 user=root password=root database=mydb query=\"SELECT * FROM table\" page_size=100\n
    "},{"location":"databases/mysql/#geniusrise_databases.mysql.MySQL.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_mysql_spout:\nname: \"MySQL\"\nmethod: \"fetch\"\nargs:\nhost: \"localhost\"\nport: 3306\nuser: \"root\"\npassword: \"root\"\ndatabase: \"mydb\"\nquery: \"SELECT * FROM table\"\npage_size: 100\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/mysql/#geniusrise_databases.mysql.MySQL.fetch","title":"fetch(host, port, user, password, database, query, page_size=100)","text":"

    \ud83d\udcd6 Fetch data from a MySQL database and save it in batch.

    Parameters:

    Name Type Description Default host str

    The MySQL host.

    required port int

    The MySQL port.

    required user str

    The MySQL user.

    required password str

    The MySQL password.

    required database str

    The MySQL database name.

    required query str

    The SQL query to execute.

    required page_size int

    The number of rows to fetch per page. Defaults to 100.

    100

    Raises:

    Type Description Exception

    If unable to connect to the MySQL server or execute the query.

    "},{"location":"databases/neo4j/","title":"Neo4j","text":"

    Bases: Spout

    "},{"location":"databases/neo4j/#geniusrise_databases.neo4j.Neo4j.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Neo4j class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/neo4j/#geniusrise_databases.neo4j.Neo4j.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Neo4j rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args host=localhost port=7687 username=myusername password=mypassword\n
    "},{"location":"databases/neo4j/#geniusrise_databases.neo4j.Neo4j.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_neo4j_spout:\nname: \"Neo4j\"\nmethod: \"fetch\"\nargs:\nhost: \"localhost\"\nport: 7687\nusername: \"myusername\"\npassword: \"mypassword\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/neo4j/#geniusrise_databases.neo4j.Neo4j.fetch","title":"fetch(host, port, username, password)","text":"

    \ud83d\udcd6 Fetch data from a Neo4j database and save it in batch.

    Parameters:

    Name Type Description Default host str

    The Neo4j host.

    required port int

    The Neo4j port.

    required username str

    The Neo4j username.

    required password str

    The Neo4j password.

    required

    Raises:

    Type Description Exception

    If unable to connect to the Neo4j server or execute the query.

    "},{"location":"databases/nuodb/","title":"NuoDB","text":"

    Bases: Spout

    "},{"location":"databases/nuodb/#geniusrise_databases.nuodb.NuoDB.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the NuoDB class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/nuodb/#geniusrise_databases.nuodb.NuoDB.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius NuoDB rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args url=http://mynuodbhost:8080/v1/statement query=\"SELECT * FROM mytable\"\n
    "},{"location":"databases/nuodb/#geniusrise_databases.nuodb.NuoDB.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_nuodb_spout:\nname: \"NuoDB\"\nmethod: \"fetch\"\nargs:\nurl: \"http://mynuodbhost:8080/v1/statement\"\nquery: \"SELECT * FROM mytable\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/nuodb/#geniusrise_databases.nuodb.NuoDB.fetch","title":"fetch(url, query)","text":"

    \ud83d\udcd6 Fetch data from a NuoDB table and save it in batch.

    Parameters:

    Name Type Description Default url str

    The URL of the NuoDB API endpoint.

    required query str

    The SQL query to execute.

    required

    Raises:

    Type Description Exception

    If unable to connect to the NuoDB server or execute the query.

    "},{"location":"databases/opentsdb/","title":"OpenTSDB","text":"

    Bases: Spout

    "},{"location":"databases/opentsdb/#geniusrise_databases.opentsdb.OpenTSDB.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the OpenTSDB class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/opentsdb/#geniusrise_databases.opentsdb.OpenTSDB.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius OpenTSDB rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args host=http://localhost:4242\n
    "},{"location":"databases/opentsdb/#geniusrise_databases.opentsdb.OpenTSDB.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_opentsdb_spout:\nname: \"OpenTSDB\"\nmethod: \"fetch\"\nargs:\nhost: \"http://localhost:4242\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/opentsdb/#geniusrise_databases.opentsdb.OpenTSDB.fetch","title":"fetch(host)","text":"

    \ud83d\udcd6 Fetch data from an OpenTSDB database and save it in batch.

    Parameters:

    Name Type Description Default host str

    The URL of the OpenTSDB instance.

    required

    Raises:

    Type Description Exception

    If unable to connect to the OpenTSDB server or execute the query.

    "},{"location":"databases/oracle/","title":"Oracle","text":"

    Bases: Spout

    "},{"location":"databases/oracle/#geniusrise_databases.oracle.Oracle.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the OracleSQL class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/oracle/#geniusrise_databases.oracle.Oracle.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius OracleSQL rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args server=localhost port=1521 service_name=myservice user=myuser password=mypassword\n
    "},{"location":"databases/oracle/#geniusrise_databases.oracle.Oracle.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_oraclesql_spout:\nname: \"OracleSQL\"\nmethod: \"fetch\"\nargs:\nserver: \"localhost\"\nport: 1521\nservice_name: \"myservice\"\nuser: \"myuser\"\npassword: \"mypassword\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/oracle/#geniusrise_databases.oracle.Oracle.fetch","title":"fetch(server, port, service_name, user, password, query)","text":"

    \ud83d\udcd6 Fetch data from an Oracle SQL database and save it in batch.

    Parameters:

    Name Type Description Default server str

    The Oracle SQL server.

    required port int

    The Oracle SQL port.

    required service_name str

    The Oracle service name.

    required user str

    The Oracle user.

    required password str

    The Oracle password.

    required query str

    The SQL query to execute.

    required

    Raises:

    Type Description Exception

    If unable to connect to the Oracle SQL server or execute the query.

    "},{"location":"databases/postgres/","title":"PostgreSQL","text":"

    Bases: Spout

    "},{"location":"databases/postgres/#geniusrise_databases.postgres.PostgreSQL.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the PostgreSQL class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/postgres/#geniusrise_databases.postgres.PostgreSQL.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius PostgreSQL rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args host=localhost port=5432 user=postgres password=postgres database=mydb query=\"SELECT * FROM table\" page_size=100\n
    "},{"location":"databases/postgres/#geniusrise_databases.postgres.PostgreSQL.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_postgresql_spout:\nname: \"PostgreSQL\"\nmethod: \"fetch\"\nargs:\nhost: \"localhost\"\nport: 5432\nuser: \"postgres\"\npassword: \"postgres\"\ndatabase: \"mydb\"\nquery: \"SELECT * FROM table\"\npage_size: 100\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/postgres/#geniusrise_databases.postgres.PostgreSQL.fetch","title":"fetch(host, port, user, password, database, query, page_size=100)","text":"

    \ud83d\udcd6 Fetch data from a PostgreSQL database and save it in batch.

    Parameters:

    Name Type Description Default host str

    The PostgreSQL host.

    required port int

    The PostgreSQL port.

    required user str

    The PostgreSQL user.

    required password str

    The PostgreSQL password.

    required database str

    The PostgreSQL database name.

    required query str

    The SQL query to execute.

    required page_size int

    The number of rows to fetch per page. Defaults to 100.

    100

    Raises:

    Type Description Exception

    If unable to connect to the PostgreSQL server or execute the query.

    "},{"location":"databases/presto/","title":"Presto","text":"

    Bases: Spout

    "},{"location":"databases/presto/#geniusrise_databases.presto.Presto.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Presto class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/presto/#geniusrise_databases.presto.Presto.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Presto rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args host=mypresto.example.com username=myusername password=mypassword catalog=mycatalog schema=myschema table=mytable\n
    "},{"location":"databases/presto/#geniusrise_databases.presto.Presto.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_presto_spout:\nname: \"Presto\"\nmethod: \"fetch\"\nargs:\nhost: \"mypresto.example.com\"\nusername: \"myusername\"\npassword: \"mypassword\"\ncatalog: \"mycatalog\"\nschema: \"myschema\"\ntable: \"mytable\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/presto/#geniusrise_databases.presto.Presto.fetch","title":"fetch(host, username, password, catalog, schema, table)","text":"

    \ud83d\udcd6 Fetch data from a Presto table and save it in batch.

    Parameters:

    Name Type Description Default host str

    The Presto host.

    required username str

    The Presto username.

    required password str

    The Presto password.

    required catalog str

    The Presto catalog name.

    required schema str

    The Presto schema name.

    required table str

    The name of the Presto table.

    required

    Raises:

    Type Description Exception

    If unable to connect to the Presto server or execute the command.

    "},{"location":"databases/redis/","title":"Redis","text":"

    Bases: Spout

    "},{"location":"databases/redis/#geniusrise_databases.redis.Redis.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Redis class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/redis/#geniusrise_databases.redis.Redis.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Redis rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args host=localhost port=6379 password=mypassword database=0\n
    "},{"location":"databases/redis/#geniusrise_databases.redis.Redis.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_redis_spout:\nname: \"Redis\"\nmethod: \"fetch\"\nargs:\nhost: \"localhost\"\nport: 6379\npassword: \"mypassword\"\ndatabase: 0\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/redis/#geniusrise_databases.redis.Redis.fetch","title":"fetch(host, port, password, database)","text":"

    \ud83d\udcd6 Fetch data from a Redis database and save it in batch.

    Parameters:

    Name Type Description Default host str

    The Redis host.

    required port int

    The Redis port.

    required password str

    The Redis password.

    required database int

    The Redis database number.

    required

    Raises:

    Type Description Exception

    If unable to connect to the Redis server or execute the command.

    "},{"location":"databases/riak/","title":"Riak","text":"

    Bases: Spout

    "},{"location":"databases/riak/#geniusrise_databases.riak.Riak.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Riak class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/riak/#geniusrise_databases.riak.Riak.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Riak rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args host=localhost port=8098\n
    "},{"location":"databases/riak/#geniusrise_databases.riak.Riak.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_riak_spout:\nname: \"Riak\"\nmethod: \"fetch\"\nargs:\nhost: \"localhost\"\nport: 8098\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/riak/#geniusrise_databases.riak.Riak.fetch","title":"fetch(host, port)","text":"

    \ud83d\udcd6 Fetch data from a Riak database and save it in batch.

    Parameters:

    Name Type Description Default host str

    The Riak host.

    required port int

    The Riak port.

    required

    Raises:

    Type Description Exception

    If unable to connect to the Riak server or execute the query.

    "},{"location":"databases/spanner/","title":"Couchbase","text":"

    Bases: Spout

    "},{"location":"databases/spanner/#geniusrise_databases.spanner.Spanner.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Spanner class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/spanner/#geniusrise_databases.spanner.Spanner.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Spanner rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--project_id my_project_id instance_id=my_instance database_id=my_database table_id=my_table\n
    "},{"location":"databases/spanner/#geniusrise_databases.spanner.Spanner.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_spanner_spout:\nname: \"Spanner\"\nmethod: \"fetch\"\nargs:\nproject_id: \"my_project_id\"\ninstance_id: \"my_instance\"\ndatabase_id: \"my_database\"\ntable_id: \"my_table\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/spanner/#geniusrise_databases.spanner.Spanner.fetch","title":"fetch(project_id, instance_id, database_id, table_id)","text":"

    \ud83d\udcd6 Fetch data from a Spanner database and save it in batch.

    Parameters:

    Name Type Description Default project_id str

    The Google Cloud project ID.

    required instance_id str

    The Spanner instance ID.

    required database_id str

    The Spanner database ID.

    required table_id str

    The Spanner table ID.

    required

    Raises:

    Type Description Exception

    If unable to connect to the Spanner database or execute the query.

    "},{"location":"databases/sql_server/","title":"SQLServer","text":"

    Bases: Spout

    "},{"location":"databases/sql_server/#geniusrise_databases.sql_server.SQLServer.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the SQLServer class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/sql_server/#geniusrise_databases.sql_server.SQLServer.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius SQLServer rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args server=localhost port=1433 user=myuser password=mypassword database=mydatabase query=\"SELECT * FROM mytable\"\n
    "},{"location":"databases/sql_server/#geniusrise_databases.sql_server.SQLServer.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_sqlserver_spout:\nname: \"SQLServer\"\nmethod: \"fetch\"\nargs:\nserver: \"localhost\"\nport: 1433\nuser: \"myuser\"\npassword: \"mypassword\"\ndatabase: \"mydatabase\"\nquery: \"SELECT * FROM mytable\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/sql_server/#geniusrise_databases.sql_server.SQLServer.fetch","title":"fetch(server, port, user, password, database, query)","text":"

    \ud83d\udcd6 Fetch data from a SQL Server database and save it in batch.

    Parameters:

    Name Type Description Default server str

    The SQL Server host.

    required port int

    The SQL Server port.

    required user str

    The SQL Server user.

    required password str

    The SQL Server password.

    required database str

    The SQL Server database name.

    required query str

    The SQL query to execute.

    required

    Raises:

    Type Description Exception

    If unable to connect to the SQL Server server or execute the query.

    "},{"location":"databases/sqlite/","title":"SQLite","text":"

    Bases: Spout

    "},{"location":"databases/sqlite/#geniusrise_databases.sqlite.SQLite.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the SQLite class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs Any

    Additional keyword arguments.

    {}"},{"location":"databases/sqlite/#geniusrise_databases.sqlite.SQLite.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius SQLite rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args s3_bucket=my_s3_bucket s3_key=mydb.sqlite query=\"SELECT * FROM table\" page_size=100\n
    "},{"location":"databases/sqlite/#geniusrise_databases.sqlite.SQLite.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_sqlite_spout:\nname: \"SQLite\"\nmethod: \"fetch\"\nargs:\ns3_bucket: \"my_s3_bucket\"\ns3_key: \"mydb.sqlite\"\nquery: \"SELECT * FROM table\"\npage_size: 100\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/sqlite/#geniusrise_databases.sqlite.SQLite.fetch","title":"fetch(s3_bucket, s3_key, query, page_size=100)","text":"

    \ud83d\udcd6 Fetch data from an SQLite database and save it in batch.

    Parameters:

    Name Type Description Default s3_bucket str

    The S3 bucket containing the SQLite database.

    required s3_key str

    The S3 key for the SQLite database.

    required query str

    The SQL query to execute.

    required page_size int

    The number of rows to fetch per page. Defaults to 100.

    100

    Raises:

    Type Description Exception

    If unable to connect to the SQLite database or execute the query.

    "},{"location":"databases/sybase/","title":"Sybase","text":"

    Bases: Spout

    "},{"location":"databases/sybase/#geniusrise_databases.sybase.Sybase.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Sybase class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/sybase/#geniusrise_databases.sybase.Sybase.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Sybase rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args host=localhost port=5000 user=sa password=secret database=mydb query=\"SELECT * FROM table\" page_size=100\n
    "},{"location":"databases/sybase/#geniusrise_databases.sybase.Sybase.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_sybase_spout:\nname: \"Sybase\"\nmethod: \"fetch\"\nargs:\nhost: \"localhost\"\nport: 5000\nuser: \"sa\"\npassword: \"secret\"\ndatabase: \"mydb\"\nquery: \"SELECT * FROM table\"\npage_size: 100\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/sybase/#geniusrise_databases.sybase.Sybase.fetch","title":"fetch(host, port, user, password, database, query, page_size=100)","text":"

    \ud83d\udcd6 Fetch data from a Sybase database and save it in batch.

    Parameters:

    Name Type Description Default host str

    The Sybase host.

    required port int

    The Sybase port.

    required user str

    The Sybase user.

    required password str

    The Sybase password.

    required database str

    The Sybase database name.

    required query str

    The SQL query to execute.

    required page_size int

    The number of rows to fetch per page. Defaults to 100.

    100

    Raises:

    Type Description Exception

    If unable to connect to the Sybase server or execute the query.

    "},{"location":"databases/teradata/","title":"Teradata","text":"

    Bases: Spout

    "},{"location":"databases/teradata/#geniusrise_databases.teradata.Teradata.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Teradata class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/teradata/#geniusrise_databases.teradata.Teradata.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Teradata rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args host=myteradata.example.com username=myusername password=mypassword database=mydb\n
    "},{"location":"databases/teradata/#geniusrise_databases.teradata.Teradata.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_teradata_spout:\nname: \"Teradata\"\nmethod: \"fetch\"\nargs:\nhost: \"myteradata.example.com\"\nusername: \"myusername\"\npassword: \"mypassword\"\ndatabase: \"mydb\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/teradata/#geniusrise_databases.teradata.Teradata.fetch","title":"fetch(host, username, password, database)","text":"

    \ud83d\udcd6 Fetch data from a Teradata database and save it in batch.

    Parameters:

    Name Type Description Default host str

    The Teradata host.

    required username str

    The Teradata username.

    required password str

    The Teradata password.

    required database str

    The Teradata database name.

    required

    Raises:

    Type Description Exception

    If unable to connect to the Teradata server or execute the command.

    "},{"location":"databases/tidb/","title":"TiDB","text":"

    Bases: Spout

    "},{"location":"databases/tidb/#geniusrise_databases.tidb.TiDB.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the TiDB class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs Any

    Additional keyword arguments.

    {}"},{"location":"databases/tidb/#geniusrise_databases.tidb.TiDB.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius TiDB rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args host=localhost port=4000 user=root password=root database=mydb query=\"SELECT * FROM table\" page_size=100\n
    "},{"location":"databases/tidb/#geniusrise_databases.tidb.TiDB.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_tidb_spout:\nname: \"TiDB\"\nmethod: \"fetch\"\nargs:\nhost: \"localhost\"\nport: 4000\nuser: \"root\"\npassword: \"root\"\ndatabase: \"mydb\"\nquery: \"SELECT * FROM table\"\npage_size: 100\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/tidb/#geniusrise_databases.tidb.TiDB.fetch","title":"fetch(host, port, user, password, database, query, page_size=100)","text":"

    \ud83d\udcd6 Fetch data from a TiDB database and save it in batch.

    Parameters:

    Name Type Description Default host str

    The TiDB host.

    required port int

    The TiDB port.

    required user str

    The TiDB user.

    required password str

    The TiDB password.

    required database str

    The TiDB database name.

    required query str

    The SQL query to execute.

    required page_size int

    The number of rows to fetch per page. Defaults to 100.

    100

    Raises:

    Type Description Exception

    If unable to connect to the TiDB server or execute the query.

    "},{"location":"databases/timescaledb/","title":"TimescaleDB","text":"

    Bases: Spout

    "},{"location":"databases/timescaledb/#geniusrise_databases.timescaledb.TimescaleDB.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the TimescaleDB class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs Any

    Additional keyword arguments.

    {}"},{"location":"databases/timescaledb/#geniusrise_databases.timescaledb.TimescaleDB.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius TimescaleDB rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args host=localhost port=5432 user=postgres password=postgres database=mydb query=\"SELECT * FROM hypertable\" page_size=100\n
    "},{"location":"databases/timescaledb/#geniusrise_databases.timescaledb.TimescaleDB.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_timescaledb_spout:\nname: \"TimescaleDB\"\nmethod: \"fetch\"\nargs:\nhost: \"localhost\"\nport: 5432\nuser: \"postgres\"\npassword: \"postgres\"\ndatabase: \"mydb\"\nquery: \"SELECT * FROM hypertable\"\npage_size: 100\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/timescaledb/#geniusrise_databases.timescaledb.TimescaleDB.fetch","title":"fetch(host, port, user, password, database, query, page_size=100)","text":"

    \ud83d\udcd6 Fetch data from a TimescaleDB hypertable and save it in batch.

    Parameters:

    Name Type Description Default host str

    The TimescaleDB host.

    required port int

    The TimescaleDB port.

    required user str

    The TimescaleDB user.

    required password str

    The TimescaleDB password.

    required database str

    The TimescaleDB database name.

    required query str

    The SQL query to execute.

    required page_size int

    The number of rows to fetch per page. Defaults to 100.

    100

    Raises:

    Type Description Exception

    If unable to connect to the TimescaleDB server or execute the query.

    "},{"location":"databases/vertica/","title":"Vertica","text":"

    Bases: Spout

    "},{"location":"databases/vertica/#geniusrise_databases.vertica.Vertica.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Vertica class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/vertica/#geniusrise_databases.vertica.Vertica.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Vertica rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--host my_host port=5433 user=my_user password=my_password database=my_database query=\"SELECT * FROM my_table\"\n
    "},{"location":"databases/vertica/#geniusrise_databases.vertica.Vertica.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_vertica_spout:\nname: \"Vertica\"\nmethod: \"fetch\"\nargs:\nhost: \"my_host\"\nport: 5433\nuser: \"my_user\"\npassword: \"my_password\"\ndatabase: \"my_database\"\nquery: \"SELECT * FROM my_table\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/vertica/#geniusrise_databases.vertica.Vertica.fetch","title":"fetch(host, port, user, password, database, query)","text":"

    \ud83d\udcd6 Fetch data from a Vertica database and save it in batch.

    Parameters:

    Name Type Description Default host str

    The Vertica host.

    required port int

    The Vertica port.

    required user str

    The Vertica user.

    required password str

    The Vertica password.

    required database str

    The Vertica database name.

    required query str

    The SQL query to execute.

    required

    Raises:

    Type Description Exception

    If unable to connect to the Vertica server or execute the query.

    "},{"location":"databases/voltdb/","title":"VoltDB","text":"

    Bases: Spout

    "},{"location":"databases/voltdb/#geniusrise_databases.voltdb.VoltDB.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the VoltDB class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/voltdb/#geniusrise_databases.voltdb.VoltDB.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius VoltDB rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args host=localhost port=21212 username=myuser password=<PASSWORD>\n
    "},{"location":"databases/voltdb/#geniusrise_databases.voltdb.VoltDB.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_voltdb_spout:\nname: \"VoltDB\"\nmethod: \"fetch\"\nargs:\nhost: \"localhost\"\nport: 21212\nusername: \"myuser\"\npassword: \"<PASSWORD>\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/voltdb/#geniusrise_databases.voltdb.VoltDB.fetch","title":"fetch(host, port, username, password)","text":"

    \ud83d\udcd6 Fetch data from a VoltDB database and save it in batch.

    Parameters:

    Name Type Description Default host str

    The VoltDB host.

    required port int

    The VoltDB port.

    required username str

    The VoltDB username.

    required password str

    The VoltDB password.

    required

    Raises:

    Type Description Exception

    If unable to connect to the VoltDB server or execute the query.

    "},{"location":"guides/404/","title":"WIP","text":"

    Working on it! \ud83d\ude05

    "},{"location":"guides/architecture/","title":"Architecture","text":""},{"location":"guides/architecture/#introduction","title":"Introduction","text":"

    The Geniusrise framework is designed to provide a modular, scalable, and interoperable system for orchestrating machine learning workflows, particularly in the context of Large Language Models (LLMs). The architecture is built around the core concept of a Task, which represents a discrete unit of work. This document provides an overview of the architecture, detailing the primary components and their interactions.

    "},{"location":"guides/architecture/#system-overview","title":"System Overview","text":"

    The Geniusrise framework is composed of several key components:

    1. Tasks: The fundamental units of work.
    2. State Managers: Responsible for monitoring and managing the state of tasks.
    3. Data Managers: Oversee the input and output data associated with tasks.
    4. Model Managers: Handle model operations, ensuring efficient management.
    5. Runners: Wrappers for executing tasks on various platforms.
    6. Spouts and Bolts: Specialized tasks for data ingestion and processing.
    "},{"location":"guides/architecture/#tasks","title":"Tasks","text":"

    A task is the fundamental unit of work in the Geniusrise framework. It represents a specific operation or computation and can run for an arbitrary amount of time, performing any amount of work.

    7f4bbe51-d8d1-4bb9-866a-800b08e279b3
    "},{"location":"guides/architecture/#state-managers","title":"State Managers","text":"

    State Managers play a pivotal role in maintaining the state of tasks. They ensure that the progress and status of tasks are tracked, especially in distributed environments. Geniusrise offers various types of State Managers:

    1. DynamoDBStateManager: Interfaces with Amazon DynamoDB.
    2. InMemoryStateManager: Maintains state within the application's memory.
    3. PostgresStateManager: Interfaces with PostgreSQL databases.
    4. RedisStateManager: Interfaces with Redis in-memory data structure store.

    State Managers store data in various locations, allowing organizations to connect dashboards to these storage systems for real-time monitoring and analytics. This centralized storage and reporting mechanism ensures that stakeholders have a unified view of task states.

    ff2da911-03c1-41c7-958b-6b3417075631
    "},{"location":"guides/architecture/#data-managers","title":"Data Managers","text":"

    Data Managers are responsible for handling the input and output data for tasks. They implement various data operations methods that tasks can leverage to ingest or save data during their runs. Data Managers can be categorized based on their function and data processing type:

    1. BatchInputConfig: Manages batch input data.
    2. BatchOutputConfig: Manages batch output data.
    3. StreamingInputConfig: Manages streaming input data.
    4. StreamingOutputConfig: Manages streaming output data.

    Data Managers manage data partitioning for both batch and streaming data. By adhering to common data patterns, they enable the system's components to operate independently, fostering the creation of intricate networks of tasks. This independence, while allowing for flexibility and scalability, ensures that cascading failures in one component don't necessarily compromise the entire system.

    77f5295d-2b9c-45b8-a12c-e3728abef19a
    "},{"location":"guides/architecture/#model-managers","title":"Model Managers","text":"

    Model Managers oversee model operations, ensuring that models are saved, loaded, and managed. They can be of two primary types:

    1. S3ModelManager: Interfaces with Amazon S3 for model storage.
    2. WANDBModelManager: Interfaces with Weights & Biases for model versioning.
    3. GitModelManager: Interfaces with Git repositories for versioning of models.
    2ecbac4f-84fa-4d78-8941-6a383e41383d
    "},{"location":"guides/architecture/#spouts-and-bolts","title":"Spouts and Bolts","text":"

    At the heart of the Geniusrise framework are two primary component types: spouts and bolts.

    1. Spouts: These are tasks responsible for ingesting data from various sources. Depending on the output type, spouts can either produce streaming output or batch output.

      1. Batch: Runs periodically, Produces data as a batch output.
      2. Stream: Runs forever, produces data into a streaming output.
    2. Bolts: Bolts are tasks that take in data, process it, and produce output. They can be categorized based on their input and output types:

      1. Stream-Stream: Reads streaming data and produces streaming output.
      2. Stream-Batch: Reads streaming data and produces batch output.
      3. Batch-Stream: Reads batch data and produces streaming output.
      4. Batch-Batch: Reads batch data and produces batch output.
    3b50dff8-0b8e-4e5b-8fba-2d116607c5cb
    "},{"location":"guides/architecture/#runners","title":"Runners","text":"

    Runners are the backbone of the Geniusrise framework, ensuring that tasks are executed seamlessly across various platforms. They encapsulate the environment and resources required for task execution, abstracting away the underlying complexities. Geniusrise offers the following runners:

    1. Local Runner: Executes tasks directly on a local machine, ideal for development and testing.
    2. Docker Runner: Runs tasks within Docker containers, ensuring a consistent and isolated environment.
    3. Kubernetes Runner: Deploys tasks on Kubernetes clusters, leveraging its scalability and orchestration capabilities.
    4. Airflow Runner: Integrates with Apache Airflow, allowing for complex workflow orchestration and scheduling.
    5. ECS Runner: Executes tasks on AWS ECS, providing a managed container service.
    6. Batch Runner: Optimized for batch computing workloads on platforms like AWS Batch.
    "},{"location":"guides/cli/","title":"CLI","text":"
    • DESCRIPTION
    • Spouts
      • Command: genius TestSpoutCtlSpout
      • Command: genius TestSpoutCtlSpout rise
      • Command: genius TestSpoutCtlSpout deploy
      • Command: genius TestSpoutCtlSpout help
    • Bolts
      • Command: genius TestBoltCtlBolt
      • Command: genius TestBoltCtlBolt rise
      • Command: genius TestBoltCtlBolt deploy
      • Command: genius TestBoltCtlBolt help
    • Deployment
      • Command: genius rise
      • Command: genius rise up
    • Kubernetes Pods
      • Command: genius pod
      • Command: genius pod status
      • Command: genius pod show
      • Command: genius pod describe
      • Command: genius pod logs
    • Kubernetes Deployments
      • Command: genius deployment
      • Command: genius deployment create
      • Command: genius deployment scale
      • Command: genius deployment describe
      • Command: genius deployment show
      • Command: genius deployment delete
      • Command: genius deployment status
    • Kubernetes Services
      • Command: genius service
      • Command: genius service create
      • Command: genius service delete
      • Command: genius service describe
      • Command: genius service show
    • Kubernetes Jobs
      • Command: genius job
      • Command: genius job create
      • Command: genius job delete
      • Command: genius job status
      • Kubernetes Cron Jobs
      • Command: genius cron_job
      • Command: genius cron_job create_cronjob
      • Command: genius cron_job delete_cronjob
      • Command: genius cron_job get_cronjob_status
    • Packaging
      • Command: genius docker package
    • Miscellaneous
      • Command: genius plugins
      • Command: genius list
    "},{"location":"guides/cli/#description","title":"DESCRIPTION","text":"

    Geniusrise

    POSITIONAL ARGUMENTS

    genius TestSpoutCtlSpout

    : Manage spout TestSpoutCtlSpout.

    genius TestBoltCtlBolt

    : Manage bolt TestBoltCtlBolt.

    genius rise

    : Manage spouts and bolts with a YAML file.

    genius docker

    : Package this application into a Docker image.

    genius pod

    : Manage spouts and bolts as kubernetes pod

    genius deployment

    : Manage spouts and bolts as kubernetes deployment

    genius service

    : Manage spouts and bolts as kubernetes service

    genius job

    : Manage spouts and bolts as kubernetes job

    genius cron_job

    : Manage spouts and bolts as kubernetes cron_job

    genius plugins

    : Print help for all spouts and bolts.

    genius list

    : List all discovered spouts and bolts.

    "},{"location":"guides/cli/#spouts","title":"Spouts","text":""},{"location":"guides/cli/#command-genius-testspoutctlspout","title":"Command: genius TestSpoutCtlSpout","text":"

    Usage: genius TestSpoutCtlSpout [-h] {rise,deploy,help} ...

    POSITIONAL ARGUMENTS genius TestSpoutCtlSpout

    genius TestSpoutCtlSpout rise

    : Run a spout locally.

    genius TestSpoutCtlSpout deploy

    : Run a spout remotely.

    genius TestSpoutCtlSpout help

    : Print help for the spout.

    "},{"location":"guides/cli/#command-genius-testspoutctlspout-rise","title":"Command: genius TestSpoutCtlSpout rise","text":"

    Usage: genius TestSpoutCtlSpout rise [-h] [--buffer_size BUFFER_SIZE] [--output_folder OUTPUT_FOLDER] [--output_kafka_topic OUTPUT_KAFKA_TOPIC] [--output_kafka_cluster_connection_string OUTPUT_KAFKA_CLUSTER_CONNECTION_STRING] [--output_s3_bucket OUTPUT_S3_BUCKET] [--output_s3_folder OUTPUT_S3_FOLDER] [--redis_host REDIS_HOST] [--redis_port REDIS_PORT] [--redis_db REDIS_DB] [--postgres_host POSTGRES_HOST] [--postgres_port POSTGRES_PORT] [--postgres_user POSTGRES_USER] [--postgres_password POSTGRES_PASSWORD] [--postgres_database POSTGRES_DATABASE] [--postgres_table POSTGRES_TABLE] [--dynamodb_table_name DYNAMODB_TABLE_NAME] [--dynamodb_region_name DYNAMODB_REGION_NAME] [--prometheus_gateway PROMETHEUS_GATEWAY] [--args ...] {batch,streaming,stream_to_batch} {none,redis,postgres,dynamodb,prometheus} method_name

    {batch,streaming,stream_to_batch}

    : Choose the type of output data: batch or streaming.

    {none,redis,postgres,dynamodb,prometheus}

    : Select the type of state manager: none, redis, postgres, or dynamodb.

    method_name

    : The name of the method to execute on the spout.

    Options genius TestSpoutCtlSpout rise

    --buffer_size BUFFER_SIZE: Specify the size of the buffer. --output_folder OUTPUT_FOLDER: Specify the directory where output files should be stored temporarily

    --output_kafka_topic OUTPUT_KAFKA_TOPIC: Kafka output topic for streaming spouts. --output_kafka_cluster_connection_string OUTPUT_KAFKA_CLUSTER_CONNECTION_STRING: Kafka connection string for streaming spouts. --output_s3_bucket OUTPUT_S3_BUCKET: Provide the name of the S3 bucket for output storage. --output_s3_folder OUTPUT_S3_FOLDER: Indicate the S3 folder for output storage. --redis_host REDIS_HOST: Enter the host address for the Redis server. --redis_port REDIS_PORT: Enter the port number for the Redis server. --redis_db REDIS_DB: Specify the Redis database to be used. --postgres_host POSTGRES_HOST: Enter the host address for the PostgreSQL server. --postgres_port POSTGRES_PORT: Enter the port number for the PostgreSQL server. --postgres_user POSTGRES_USER: Provide the username for the PostgreSQL server. --postgres_password POSTGRES_PASSWORD: Provide the password for the PostgreSQL server. --postgres_database POSTGRES_DATABASE: Specify the PostgreSQL database to be used. --postgres_table POSTGRES_TABLE: Specify the PostgreSQL table to be used. --dynamodb_table_name DYNAMODB_TABLE_NAME: Provide the name of the DynamoDB table. --dynamodb_region_name DYNAMODB_REGION_NAME: Specify the AWS region for DynamoDB. --prometheus_gateway PROMETHEUS_GATEWAY: Specify the prometheus gateway URL. --args ...: Additional keyword arguments to pass to the spout.

    "},{"location":"guides/cli/#command-genius-testspoutctlspout-deploy","title":"Command: genius TestSpoutCtlSpout deploy","text":"

    Usage: genius TestSpoutCtlSpout deploy [-h] [--buffer_size BUFFER_SIZE] [--output_folder OUTPUT_FOLDER] [--output_kafka_topic OUTPUT_KAFKA_TOPIC] [--output_kafka_cluster_connection_string OUTPUT_KAFKA_CLUSTER_CONNECTION_STRING] [--output_s3_bucket OUTPUT_S3_BUCKET] [--output_s3_folder OUTPUT_S3_FOLDER] [--redis_host REDIS_HOST] [--redis_port REDIS_PORT] [--redis_db REDIS_DB] [--postgres_host POSTGRES_HOST] [--postgres_port POSTGRES_PORT] [--postgres_user POSTGRES_USER] [--postgres_password POSTGRES_PASSWORD] [--postgres_database POSTGRES_DATABASE] [--postgres_table POSTGRES_TABLE] [--dynamodb_table_name DYNAMODB_TABLE_NAME] [--dynamodb_region_name DYNAMODB_REGION_NAME] [--prometheus_gateway PROMETHEUS_GATEWAY] [--k8s_kind {deployment,service,job,cron_job}] [--k8s_name K8S_NAME] [--k8s_image K8S_IMAGE] [--k8s_replicas K8S_REPLICAS] [--k8s_env_vars K8S_ENV_VARS] [--k8s_cpu K8S_CPU] [--k8s_memory K8S_MEMORY] [--k8s_storage K8S_STORAGE] [--k8s_gpu K8S_GPU] [--k8s_kube_config_path K8S_KUBE_CONFIG_PATH] [--k8s_api_key K8S_API_KEY] [--k8s_api_host K8S_API_HOST] [--k8s_verify_ssl K8S_VERIFY_SSL] [--k8s_ssl_ca_cert K8S_SSL_CA_CERT] [--k8s_cluster_name K8S_CLUSTER_NAME] [--k8s_context_name K8S_CONTEXT_NAME] [--k8s_namespace K8S_NAMESPACE] [--k8s_labels K8S_LABELS] [--k8s_annotations K8S_ANNOTATIONS] [--k8s_port K8S_PORT] [--k8s_target_port K8S_TARGET_PORT] [--k8s_schedule K8S_SCHEDULE] [--args ...] {batch,streaming,stream_to_batch} {none,redis,postgres,dynamodb,prometheus} {k8s} method_name

    {batch,streaming,stream_to_batch}

    : Choose the type of output data: batch or streaming.

    {none,redis,postgres,dynamodb,prometheus}

    : Select the type of state manager: none, redis, postgres, or dynamodb.

    {k8s}

    : Choose the type of deployment.

    method_name

    : The name of the method to execute on the spout.

    Options genius TestSpoutCtlSpout deploy

    --buffer_size BUFFER_SIZE: Specify the size of the buffer. --output_folder OUTPUT_FOLDER: Specify the directory where output files should be stored temporarily

    --output_kafka_topic OUTPUT_KAFKA_TOPIC: Kafka output topic for streaming spouts. --output_kafka_cluster_connection_string OUTPUT_KAFKA_CLUSTER_CONNECTION_STRING: Kafka connection string for streaming spouts. --output_s3_bucket OUTPUT_S3_BUCKET: Provide the name of the S3 bucket for output storage. --output_s3_folder OUTPUT_S3_FOLDER: Indicate the S3 folder for output storage. --redis_host REDIS_HOST: Enter the host address for the Redis server. --redis_port REDIS_PORT: Enter the port number for the Redis server. --redis_db REDIS_DB: Specify the Redis database to be used. --postgres_host POSTGRES_HOST: Enter the host address for the PostgreSQL server. --postgres_port POSTGRES_PORT: Enter the port number for the PostgreSQL server. --postgres_user POSTGRES_USER: Provide the username for the PostgreSQL server. --postgres_password POSTGRES_PASSWORD: Provide the password for the PostgreSQL server. --postgres_database POSTGRES_DATABASE: Specify the PostgreSQL database to be used. --postgres_table POSTGRES_TABLE: Specify the PostgreSQL table to be used. --dynamodb_table_name DYNAMODB_TABLE_NAME: Provide the name of the DynamoDB table. --dynamodb_region_name DYNAMODB_REGION_NAME: Specify the AWS region for DynamoDB. --prometheus_gateway PROMETHEUS_GATEWAY: Specify the prometheus gateway URL. --k8s_kind {deployment,service,job,cron_job}: Choose the type of kubernetes resource. --k8s_name K8S_NAME: Name of the Kubernetes resource. --k8s_image K8S_IMAGE: Docker image for the Kubernetes resource. --k8s_replicas K8S_REPLICAS: Number of replicas. --k8s_env_vars K8S_ENV_VARS: Environment variables as a JSON string. --k8s_cpu K8S_CPU: CPU requirements. --k8s_memory K8S_MEMORY: Memory requirements. --k8s_storage K8S_STORAGE: Storage requirements. --k8s_gpu K8S_GPU: GPU requirements. --k8s_kube_config_path K8S_KUBE_CONFIG_PATH: Name of the Kubernetes cluster local config. --k8s_api_key K8S_API_KEY: GPU requirements. --k8s_api_host K8S_API_HOST: GPU requirements. --k8s_verify_ssl K8S_VERIFY_SSL: GPU requirements. --k8s_ssl_ca_cert K8S_SSL_CA_CERT: GPU requirements. --k8s_cluster_name K8S_CLUSTER_NAME: Name of the Kubernetes cluster. --k8s_context_name K8S_CONTEXT_NAME: Name of the kubeconfig context. --k8s_namespace K8S_NAMESPACE: Kubernetes namespace. --k8s_labels K8S_LABELS: Labels for Kubernetes resources, as a JSON string. --k8s_annotations K8S_ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --k8s_port K8S_PORT: Port to run the spout on as a service. --k8s_target_port K8S_TARGET_PORT: Port to expose the spout on as a service. --k8s_schedule K8S_SCHEDULE: Schedule to run the spout on as a cron job. --args ...: Additional keyword arguments to pass to the spout.

    "},{"location":"guides/cli/#command-genius-testspoutctlspout-help","title":"Command: genius TestSpoutCtlSpout help","text":"

    Usage: genius TestSpoutCtlSpout help [-h] method

    method

    : The method to execute.

    "},{"location":"guides/cli/#bolts","title":"Bolts","text":""},{"location":"guides/cli/#command-genius-testboltctlbolt","title":"Command: genius TestBoltCtlBolt","text":"

    Usage: genius TestBoltCtlBolt [-h] {rise,deploy,help} ...

    POSITIONAL ARGUMENTS genius TestBoltCtlBolt

    genius TestBoltCtlBolt rise

    : Run a bolt locally.

    genius TestBoltCtlBolt deploy

    : Run a spout remotely.

    genius TestBoltCtlBolt help

    : Print help for the bolt.

    "},{"location":"guides/cli/#command-genius-testboltctlbolt-rise","title":"Command: genius TestBoltCtlBolt rise","text":"

    Usage: genius TestBoltCtlBolt rise [-h] [--buffer_size BUFFER_SIZE] [--input_folder INPUT_FOLDER] [--input_kafka_topic INPUT_KAFKA_TOPIC] [--input_kafka_cluster_connection_string INPUT_KAFKA_CLUSTER_CONNECTION_STRING] [--input_kafka_consumer_group_id INPUT_KAFKA_CONSUMER_GROUP_ID] [--input_s3_bucket INPUT_S3_BUCKET] [--input_s3_folder INPUT_S3_FOLDER] [--output_folder OUTPUT_FOLDER] [--output_kafka_topic OUTPUT_KAFKA_TOPIC] [--output_kafka_cluster_connection_string OUTPUT_KAFKA_CLUSTER_CONNECTION_STRING] [--output_s3_bucket OUTPUT_S3_BUCKET] [--output_s3_folder OUTPUT_S3_FOLDER] [--redis_host REDIS_HOST] [--redis_port REDIS_PORT] [--redis_db REDIS_DB] [--postgres_host POSTGRES_HOST] [--postgres_port POSTGRES_PORT] [--postgres_user POSTGRES_USER] [--postgres_password POSTGRES_PASSWORD] [--postgres_database POSTGRES_DATABASE] [--postgres_table POSTGRES_TABLE] [--dynamodb_table_name DYNAMODB_TABLE_NAME] [--dynamodb_region_name DYNAMODB_REGION_NAME] [--prometheus_gateway PROMETHEUS_GATEWAY] [--args ...] {batch,streaming,batch_to_stream,stream_to_batch} {batch,streaming,stream_to_batch} {none,redis,postgres,dynamodb,prometheus} method_name

    {batch,streaming,batch_to_stream,stream_to_batch}

    : Choose the type of input data: batch or streaming.

    {batch,streaming,stream_to_batch}

    : Choose the type of output data: batch or streaming.

    {none,redis,postgres,dynamodb,prometheus}

    : Select the type of state manager: none, redis, postgres, or dynamodb.

    method_name

    : The name of the method to execute on the bolt.

    Options genius TestBoltCtlBolt rise

    --buffer_size BUFFER_SIZE: Specify the size of the buffer. --input_folder INPUT_FOLDER: Specify the directory where output files should be stored temporarily

    --input_kafka_topic INPUT_KAFKA_TOPIC: Kafka output topic for streaming spouts. --input_kafka_cluster_connection_string INPUT_KAFKA_CLUSTER_CONNECTION_STRING: Kafka connection string for streaming spouts. --input_kafka_consumer_group_id INPUT_KAFKA_CONSUMER_GROUP_ID: Kafka consumer group id to use. --input_s3_bucket INPUT_S3_BUCKET: Provide the name of the S3 bucket for output storage. --input_s3_folder INPUT_S3_FOLDER: Indicate the S3 folder for output storage. --output_folder OUTPUT_FOLDER: Specify the directory where output files should be stored temporarily

    --output_kafka_topic OUTPUT_KAFKA_TOPIC: Kafka output topic for streaming spouts. --output_kafka_cluster_connection_string OUTPUT_KAFKA_CLUSTER_CONNECTION_STRING: Kafka connection string for streaming spouts. --output_s3_bucket OUTPUT_S3_BUCKET: Provide the name of the S3 bucket for output storage. --output_s3_folder OUTPUT_S3_FOLDER: Indicate the S3 folder for output storage. --redis_host REDIS_HOST: Enter the host address for the Redis server. --redis_port REDIS_PORT: Enter the port number for the Redis server. --redis_db REDIS_DB: Specify the Redis database to be used. --postgres_host POSTGRES_HOST: Enter the host address for the PostgreSQL server. --postgres_port POSTGRES_PORT: Enter the port number for the PostgreSQL server. --postgres_user POSTGRES_USER: Provide the username for the PostgreSQL server. --postgres_password POSTGRES_PASSWORD: Provide the password for the PostgreSQL server. --postgres_database POSTGRES_DATABASE: Specify the PostgreSQL database to be used. --postgres_table POSTGRES_TABLE: Specify the PostgreSQL table to be used. --dynamodb_table_name DYNAMODB_TABLE_NAME: Provide the name of the DynamoDB table. --dynamodb_region_name DYNAMODB_REGION_NAME: Specify the AWS region for DynamoDB. --prometheus_gateway PROMETHEUS_GATEWAY: Specify the prometheus gateway URL. --args ...: Additional keyword arguments to pass to the bolt.

    "},{"location":"guides/cli/#command-genius-testboltctlbolt-deploy","title":"Command: genius TestBoltCtlBolt deploy","text":"

    Usage: genius TestBoltCtlBolt deploy [-h] [--buffer_size BUFFER_SIZE] [--input_folder INPUT_FOLDER] [--input_kafka_topic INPUT_KAFKA_TOPIC] [--input_kafka_cluster_connection_string INPUT_KAFKA_CLUSTER_CONNECTION_STRING] [--input_kafka_consumer_group_id INPUT_KAFKA_CONSUMER_GROUP_ID] [--input_s3_bucket INPUT_S3_BUCKET] [--input_s3_folder INPUT_S3_FOLDER] [--output_folder OUTPUT_FOLDER] [--output_kafka_topic OUTPUT_KAFKA_TOPIC] [--output_kafka_cluster_connection_string OUTPUT_KAFKA_CLUSTER_CONNECTION_STRING] [--output_s3_bucket OUTPUT_S3_BUCKET] [--output_s3_folder OUTPUT_S3_FOLDER] [--redis_host REDIS_HOST] [--redis_port REDIS_PORT] [--redis_db REDIS_DB] [--postgres_host POSTGRES_HOST] [--postgres_port POSTGRES_PORT] [--postgres_user POSTGRES_USER] [--postgres_password POSTGRES_PASSWORD] [--postgres_database POSTGRES_DATABASE] [--postgres_table POSTGRES_TABLE] [--dynamodb_table_name DYNAMODB_TABLE_NAME] [--dynamodb_region_name DYNAMODB_REGION_NAME] [--prometheus_gateway PROMETHEUS_GATEWAY] [--k8s_kind {deployment,service,job,cron_job}] [--k8s_name K8S_NAME] [--k8s_image K8S_IMAGE] [--k8s_replicas K8S_REPLICAS] [--k8s_env_vars K8S_ENV_VARS] [--k8s_cpu K8S_CPU] [--k8s_memory K8S_MEMORY] [--k8s_storage K8S_STORAGE] [--k8s_gpu K8S_GPU] [--k8s_kube_config_path K8S_KUBE_CONFIG_PATH] [--k8s_api_key K8S_API_KEY] [--k8s_api_host K8S_API_HOST] [--k8s_verify_ssl K8S_VERIFY_SSL] [--k8s_ssl_ca_cert K8S_SSL_CA_CERT] [--k8s_cluster_name K8S_CLUSTER_NAME] [--k8s_context_name K8S_CONTEXT_NAME] [--k8s_namespace K8S_NAMESPACE] [--k8s_labels K8S_LABELS] [--k8s_annotations K8S_ANNOTATIONS] [--k8s_port K8S_PORT] [--k8s_target_port K8S_TARGET_PORT] [--k8s_schedule K8S_SCHEDULE] [--args ...] {batch,streaming,batch_to_stream,stream_to_batch} {batch,streaming,stream_to_batch} {none,redis,postgres,dynamodb,prometheus} {k8s} method_name

    {batch,streaming,batch_to_stream,stream_to_batch}

    : Choose the type of input data: batch or streaming.

    {batch,streaming,stream_to_batch}

    : Choose the type of output data: batch or streaming.

    {none,redis,postgres,dynamodb,prometheus}

    : Select the type of state manager: none, redis, postgres, or dynamodb.

    {k8s}

    : Choose the type of deployment.

    method_name

    : The name of the method to execute on the spout.

    Options genius TestBoltCtlBolt deploy

    --buffer_size BUFFER_SIZE: Specify the size of the buffer. --input_folder INPUT_FOLDER: Specify the directory where output files should be stored temporarily

    --input_kafka_topic INPUT_KAFKA_TOPIC: Kafka output topic for streaming spouts. --input_kafka_cluster_connection_string INPUT_KAFKA_CLUSTER_CONNECTION_STRING: Kafka connection string for streaming spouts. --input_kafka_consumer_group_id INPUT_KAFKA_CONSUMER_GROUP_ID: Kafka consumer group id to use. --input_s3_bucket INPUT_S3_BUCKET: Provide the name of the S3 bucket for output storage. --input_s3_folder INPUT_S3_FOLDER: Indicate the S3 folder for output storage. --output_folder OUTPUT_FOLDER: Specify the directory where output files should be stored temporarily

    --output_kafka_topic OUTPUT_KAFKA_TOPIC: Kafka output topic for streaming spouts. --output_kafka_cluster_connection_string OUTPUT_KAFKA_CLUSTER_CONNECTION_STRING: Kafka connection string for streaming spouts. --output_s3_bucket OUTPUT_S3_BUCKET: Provide the name of the S3 bucket for output storage. --output_s3_folder OUTPUT_S3_FOLDER: Indicate the S3 folder for output storage. --redis_host REDIS_HOST: Enter the host address for the Redis server. --redis_port REDIS_PORT: Enter the port number for the Redis server. --redis_db REDIS_DB: Specify the Redis database to be used. --postgres_host POSTGRES_HOST: Enter the host address for the PostgreSQL server. --postgres_port POSTGRES_PORT: Enter the port number for the PostgreSQL server. --postgres_user POSTGRES_USER: Provide the username for the PostgreSQL server. --postgres_password POSTGRES_PASSWORD: Provide the password for the PostgreSQL server. --postgres_database POSTGRES_DATABASE: Specify the PostgreSQL database to be used. --postgres_table POSTGRES_TABLE: Specify the PostgreSQL table to be used. --dynamodb_table_name DYNAMODB_TABLE_NAME: Provide the name of the DynamoDB table. --dynamodb_region_name DYNAMODB_REGION_NAME: Specify the AWS region for DynamoDB. --prometheus_gateway PROMETHEUS_GATEWAY: Specify the prometheus gateway URL. --k8s_kind {deployment,service,job,cron_job}: Choose the type of kubernetes resource. --k8s_name K8S_NAME: Name of the Kubernetes resource. --k8s_image K8S_IMAGE: Docker image for the Kubernetes resource. --k8s_replicas K8S_REPLICAS: Number of replicas. --k8s_env_vars K8S_ENV_VARS: Environment variables as a JSON string. --k8s_cpu K8S_CPU: CPU requirements. --k8s_memory K8S_MEMORY: Memory requirements. --k8s_storage K8S_STORAGE: Storage requirements. --k8s_gpu K8S_GPU: GPU requirements. --k8s_kube_config_path K8S_KUBE_CONFIG_PATH: Name of the Kubernetes cluster local config. --k8s_api_key K8S_API_KEY: GPU requirements. --k8s_api_host K8S_API_HOST: GPU requirements. --k8s_verify_ssl K8S_VERIFY_SSL: GPU requirements. --k8s_ssl_ca_cert K8S_SSL_CA_CERT: GPU requirements. --k8s_cluster_name K8S_CLUSTER_NAME: Name of the Kubernetes cluster. --k8s_context_name K8S_CONTEXT_NAME: Name of the kubeconfig context. --k8s_namespace K8S_NAMESPACE: Kubernetes namespace. --k8s_labels K8S_LABELS: Labels for Kubernetes resources, as a JSON string. --k8s_annotations K8S_ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --k8s_port K8S_PORT: Port to run the spout on as a service. --k8s_target_port K8S_TARGET_PORT: Port to expose the spout on as a service. --k8s_schedule K8S_SCHEDULE: Schedule to run the spout on as a cron job. --args ...: Additional keyword arguments to pass to the spout.

    "},{"location":"guides/cli/#command-genius-testboltctlbolt-help","title":"Command: genius TestBoltCtlBolt help","text":"

    Usage: genius TestBoltCtlBolt help [-h] method

    method

    : The method to execute.

    "},{"location":"guides/cli/#deployment","title":"Deployment","text":""},{"location":"guides/cli/#command-genius-rise","title":"Command: genius rise","text":"

    Usage: genius rise [-h] [--spout SPOUT] [--bolt BOLT] [--file FILE] {up} ...

    POSITIONAL ARGUMENTS genius rise

    genius rise up

    : Deploy according to the genius.yml file.

    "},{"location":"guides/cli/#command-genius-rise-up","title":"Command: genius rise up","text":"

    Usage: genius rise up [-h] [--spout SPOUT] [--bolt BOLT] [--file FILE]

    Options genius rise up

    --spout SPOUT: Name of the specific spout to run. --bolt BOLT: Name of the specific bolt to run. --file FILE: Path of the genius.yml file, default to .

    Options genius rise

    --spout SPOUT: Name of the specific spout to run. --bolt BOLT: Name of the specific bolt to run. --file FILE: Path of the genius.yml file, default to .

    "},{"location":"guides/cli/#kubernetes-pods","title":"Kubernetes Pods","text":""},{"location":"guides/cli/#command-genius-pod","title":"Command: genius pod","text":"

    usage: genius pod [-h] {status,show,describe,logs} ...

    POSITIONAL ARGUMENTS genius pod

    genius pod status

    : Get the status of the Kubernetes pod.

    genius pod show

    : List all pods.

    genius pod describe

    : Describe a pod.

    genius pod logs

    : Get the logs of a pod.

    "},{"location":"guides/cli/#command-genius-pod-status","title":"Command: genius pod status","text":"

    usage: genius pod status [-h] [--kube_config_path KUBE_CONFIG_PATH] [--cluster_name CLUSTER_NAME] [--context_name CONTEXT_NAME] [--namespace NAMESPACE] [--labels LABELS] [--annotations ANNOTATIONS] [--api_key API_KEY] [--api_host API_HOST] [--verify_ssl VERIFY_SSL] [--ssl_ca_cert SSL_CA_CERT] name

    name

    : Name of the Kubernetes pod.

    Options genius pod status

    --kube_config_path KUBE_CONFIG_PATH: Path to the kubeconfig file. --cluster_name CLUSTER_NAME: Name of the Kubernetes cluster. --context_name CONTEXT_NAME: Name of the kubeconfig context. --namespace NAMESPACE: Kubernetes namespace. --labels LABELS: Labels for Kubernetes resources, as a JSON string. --annotations ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --api_key API_KEY: API key for Kubernetes cluster. --api_host API_HOST: API host for Kubernetes cluster. --verify_ssl VERIFY_SSL: Whether to verify SSL certificates. --ssl_ca_cert SSL_CA_CERT: Path to the SSL CA certificate.

    "},{"location":"guides/cli/#command-genius-pod-show","title":"Command: genius pod show","text":"

    usage: genius pod show [-h] [--kube_config_path KUBE_CONFIG_PATH] [--cluster_name CLUSTER_NAME] [--context_name CONTEXT_NAME] [--namespace NAMESPACE] [--labels LABELS] [--annotations ANNOTATIONS] [--api_key API_KEY] [--api_host API_HOST] [--verify_ssl VERIFY_SSL] [--ssl_ca_cert SSL_CA_CERT]

    Options genius pod show

    --kube_config_path KUBE_CONFIG_PATH: Path to the kubeconfig file. --cluster_name CLUSTER_NAME: Name of the Kubernetes cluster. --context_name CONTEXT_NAME: Name of the kubeconfig context. --namespace NAMESPACE: Kubernetes namespace. --labels LABELS: Labels for Kubernetes resources, as a JSON string. --annotations ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --api_key API_KEY: API key for Kubernetes cluster. --api_host API_HOST: API host for Kubernetes cluster. --verify_ssl VERIFY_SSL: Whether to verify SSL certificates. --ssl_ca_cert SSL_CA_CERT: Path to the SSL CA certificate.

    "},{"location":"guides/cli/#command-genius-pod-describe","title":"Command: genius pod describe","text":"

    usage: genius pod describe [-h] [--kube_config_path KUBE_CONFIG_PATH] [--cluster_name CLUSTER_NAME] [--context_name CONTEXT_NAME] [--namespace NAMESPACE] [--labels LABELS] [--annotations ANNOTATIONS] [--api_key API_KEY] [--api_host API_HOST] [--verify_ssl VERIFY_SSL] [--ssl_ca_cert SSL_CA_CERT] name

    name

    : Name of the pod.

    Options genius pod describe

    --kube_config_path KUBE_CONFIG_PATH: Path to the kubeconfig file. --cluster_name CLUSTER_NAME: Name of the Kubernetes cluster. --context_name CONTEXT_NAME: Name of the kubeconfig context. --namespace NAMESPACE: Kubernetes namespace. --labels LABELS: Labels for Kubernetes resources, as a JSON string. --annotations ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --api_key API_KEY: API key for Kubernetes cluster. --api_host API_HOST: API host for Kubernetes cluster. --verify_ssl VERIFY_SSL: Whether to verify SSL certificates. --ssl_ca_cert SSL_CA_CERT: Path to the SSL CA certificate.

    "},{"location":"guides/cli/#command-genius-pod-logs","title":"Command: genius pod logs","text":"

    usage: genius pod logs [-h] [--follow FOLLOW] [--tail TAIL] [--kube_config_path KUBE_CONFIG_PATH] [--cluster_name CLUSTER_NAME] [--context_name CONTEXT_NAME] [--namespace NAMESPACE] [--labels LABELS] [--annotations ANNOTATIONS] [--api_key API_KEY] [--api_host API_HOST] [--verify_ssl VERIFY_SSL] [--ssl_ca_cert SSL_CA_CERT] name

    name

    : Name of the pod.

    Options genius pod logs

    --follow FOLLOW: Whether to follow the logs. --tail TAIL: Number of lines to show from the end of the logs. --kube_config_path KUBE_CONFIG_PATH: Path to the kubeconfig file. --cluster_name CLUSTER_NAME: Name of the Kubernetes cluster. --context_name CONTEXT_NAME: Name of the kubeconfig context. --namespace NAMESPACE: Kubernetes namespace. --labels LABELS: Labels for Kubernetes resources, as a JSON string. --annotations ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --api_key API_KEY: API key for Kubernetes cluster. --api_host API_HOST: API host for Kubernetes cluster. --verify_ssl VERIFY_SSL: Whether to verify SSL certificates. --ssl_ca_cert SSL_CA_CERT: Path to the SSL CA certificate.

    "},{"location":"guides/cli/#kubernetes-deployments","title":"Kubernetes Deployments","text":""},{"location":"guides/cli/#command-genius-deployment","title":"Command: genius deployment","text":"

    usage: genius deployment [-h] {create,scale,describe,show,delete,status} ...

    POSITIONAL ARGUMENTS genius deployment

    genius deployment create

    : Create a new deployment.

    genius deployment scale

    : Scale a deployment.

    genius deployment describe

    : Describe a deployment.

    genius deployment show

    : List all deployments.

    genius deployment delete

    : Delete a deployment.

    genius deployment status

    : Get the status of a deployment.

    "},{"location":"guides/cli/#command-genius-deployment-create","title":"Command: genius deployment create","text":"

    usage: genius deployment create [-h] [--replicas REPLICAS] [--env_vars ENV_VARS] [--cpu CPU] [--memory MEMORY] [--storage STORAGE] [--gpu GPU] [--kube_config_path KUBE_CONFIG_PATH] [--cluster_name CLUSTER_NAME] [--context_name CONTEXT_NAME] [--namespace NAMESPACE] [--labels LABELS] [--annotations ANNOTATIONS] [--api_key API_KEY] [--api_host API_HOST] [--verify_ssl VERIFY_SSL] [--ssl_ca_cert SSL_CA_CERT] name image command

    name

    : Name of the deployment.

    image

    : Docker image for the deployment.

    command

    : Command to run in the container.

    Options genius deployment create

    --replicas REPLICAS: Number of replicas. --env_vars ENV_VARS: Environment variables as a JSON string. --cpu CPU: CPU requirements. --memory MEMORY: Memory requirements. --storage STORAGE: Storage requirements. --gpu GPU: GPU requirements. --kube_config_path KUBE_CONFIG_PATH: Path to the kubeconfig file. --cluster_name CLUSTER_NAME: Name of the Kubernetes cluster. --context_name CONTEXT_NAME: Name of the kubeconfig context. --namespace NAMESPACE: Kubernetes namespace. --labels LABELS: Labels for Kubernetes resources, as a JSON string. --annotations ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --api_key API_KEY: API key for Kubernetes cluster. --api_host API_HOST: API host for Kubernetes cluster. --verify_ssl VERIFY_SSL: Whether to verify SSL certificates. --ssl_ca_cert SSL_CA_CERT: Path to the SSL CA certificate.

    "},{"location":"guides/cli/#command-genius-deployment-scale","title":"Command: genius deployment scale","text":"

    usage: genius deployment scale [-h] [--kube_config_path KUBE_CONFIG_PATH] [--cluster_name CLUSTER_NAME] [--context_name CONTEXT_NAME] [--namespace NAMESPACE] [--labels LABELS] [--annotations ANNOTATIONS] [--api_key API_KEY] [--api_host API_HOST] [--verify_ssl VERIFY_SSL] [--ssl_ca_cert SSL_CA_CERT] name replicas

    name

    : Name of the deployment.

    replicas

    : Number of replicas.

    Options genius deployment scale

    --kube_config_path KUBE_CONFIG_PATH: Path to the kubeconfig file. --cluster_name CLUSTER_NAME: Name of the Kubernetes cluster. --context_name CONTEXT_NAME: Name of the kubeconfig context. --namespace NAMESPACE: Kubernetes namespace. --labels LABELS: Labels for Kubernetes resources, as a JSON string. --annotations ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --api_key API_KEY: API key for Kubernetes cluster. --api_host API_HOST: API host for Kubernetes cluster. --verify_ssl VERIFY_SSL: Whether to verify SSL certificates. --ssl_ca_cert SSL_CA_CERT: Path to the SSL CA certificate.

    "},{"location":"guides/cli/#command-genius-deployment-describe","title":"Command: genius deployment describe","text":"

    usage: genius deployment describe [-h] [--kube_config_path KUBE_CONFIG_PATH] [--cluster_name CLUSTER_NAME] [--context_name CONTEXT_NAME] [--namespace NAMESPACE] [--labels LABELS] [--annotations ANNOTATIONS] [--api_key API_KEY] [--api_host API_HOST] [--verify_ssl VERIFY_SSL] [--ssl_ca_cert SSL_CA_CERT] name

    name

    : Name of the deployment.

    Options genius deployment describe

    --kube_config_path KUBE_CONFIG_PATH: Path to the kubeconfig file. --cluster_name CLUSTER_NAME: Name of the Kubernetes cluster. --context_name CONTEXT_NAME: Name of the kubeconfig context. --namespace NAMESPACE: Kubernetes namespace. --labels LABELS: Labels for Kubernetes resources, as a JSON string. --annotations ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --api_key API_KEY: API key for Kubernetes cluster. --api_host API_HOST: API host for Kubernetes cluster. --verify_ssl VERIFY_SSL: Whether to verify SSL certificates. --ssl_ca_cert SSL_CA_CERT: Path to the SSL CA certificate.

    "},{"location":"guides/cli/#command-genius-deployment-show","title":"Command: genius deployment show","text":"

    usage: genius deployment show [-h] [--kube_config_path KUBE_CONFIG_PATH] [--cluster_name CLUSTER_NAME] [--context_name CONTEXT_NAME] [--namespace NAMESPACE] [--labels LABELS] [--annotations ANNOTATIONS] [--api_key API_KEY] [--api_host API_HOST] [--verify_ssl VERIFY_SSL] [--ssl_ca_cert SSL_CA_CERT]

    Options genius deployment show

    --kube_config_path KUBE_CONFIG_PATH: Path to the kubeconfig file. --cluster_name CLUSTER_NAME: Name of the Kubernetes cluster. --context_name CONTEXT_NAME: Name of the kubeconfig context. --namespace NAMESPACE: Kubernetes namespace. --labels LABELS: Labels for Kubernetes resources, as a JSON string. --annotations ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --api_key API_KEY: API key for Kubernetes cluster. --api_host API_HOST: API host for Kubernetes cluster. --verify_ssl VERIFY_SSL: Whether to verify SSL certificates. --ssl_ca_cert SSL_CA_CERT: Path to the SSL CA certificate.

    "},{"location":"guides/cli/#command-genius-deployment-delete","title":"Command: genius deployment delete","text":"

    usage: genius deployment delete [-h] [--kube_config_path KUBE_CONFIG_PATH] [--cluster_name CLUSTER_NAME] [--context_name CONTEXT_NAME] [--namespace NAMESPACE] [--labels LABELS] [--annotations ANNOTATIONS] [--api_key API_KEY] [--api_host API_HOST] [--verify_ssl VERIFY_SSL] [--ssl_ca_cert SSL_CA_CERT] name

    name

    : Name of the deployment.

    Options genius deployment delete

    --kube_config_path KUBE_CONFIG_PATH: Path to the kubeconfig file. --cluster_name CLUSTER_NAME: Name of the Kubernetes cluster. --context_name CONTEXT_NAME: Name of the kubeconfig context. --namespace NAMESPACE: Kubernetes namespace. --labels LABELS: Labels for Kubernetes resources, as a JSON string. --annotations ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --api_key API_KEY: API key for Kubernetes cluster. --api_host API_HOST: API host for Kubernetes cluster. --verify_ssl VERIFY_SSL: Whether to verify SSL certificates. --ssl_ca_cert SSL_CA_CERT: Path to the SSL CA certificate.

    "},{"location":"guides/cli/#command-genius-deployment-status","title":"Command: genius deployment status","text":"

    usage: genius deployment status [-h] [--kube_config_path KUBE_CONFIG_PATH] [--cluster_name CLUSTER_NAME] [--context_name CONTEXT_NAME] [--namespace NAMESPACE] [--labels LABELS] [--annotations ANNOTATIONS] [--api_key API_KEY] [--api_host API_HOST] [--verify_ssl VERIFY_SSL] [--ssl_ca_cert SSL_CA_CERT] name

    name

    : Name of the deployment.

    Options genius deployment status

    --kube_config_path KUBE_CONFIG_PATH: Path to the kubeconfig file. --cluster_name CLUSTER_NAME: Name of the Kubernetes cluster. --context_name CONTEXT_NAME: Name of the kubeconfig context. --namespace NAMESPACE: Kubernetes namespace. --labels LABELS: Labels for Kubernetes resources, as a JSON string. --annotations ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --api_key API_KEY: API key for Kubernetes cluster. --api_host API_HOST: API host for Kubernetes cluster. --verify_ssl VERIFY_SSL: Whether to verify SSL certificates. --ssl_ca_cert SSL_CA_CERT: Path to the SSL CA certificate.

    "},{"location":"guides/cli/#kubernetes-services","title":"Kubernetes Services","text":""},{"location":"guides/cli/#command-genius-service","title":"Command: genius service","text":"

    usage: genius service [-h] {create,delete,describe,show} ...

    POSITIONAL ARGUMENTS genius service

    genius service create

    : Create a new service.

    genius service delete

    : Delete a service.

    genius service describe

    : Describe a service.

    genius service show

    : List all services.

    "},{"location":"guides/cli/#command-genius-service-create","title":"Command: genius service create","text":"

    usage: genius service create [-h] [--replicas REPLICAS] [--port PORT] [--target_port TARGET_PORT] [--env_vars ENV_VARS] [--cpu CPU] [--memory MEMORY] [--storage STORAGE] [--gpu GPU] [--kube_config_path KUBE_CONFIG_PATH] [--cluster_name CLUSTER_NAME] [--context_name CONTEXT_NAME] [--namespace NAMESPACE] [--labels LABELS] [--annotations ANNOTATIONS] [--api_key API_KEY] [--api_host API_HOST] [--verify_ssl VERIFY_SSL] [--ssl_ca_cert SSL_CA_CERT] name image command

    name

    : Name of the service.

    image

    : Docker image for the service.

    command

    : Command to run in the container.

    Options genius service create

    --replicas REPLICAS: Number of replicas. --port PORT: Service port. --target_port TARGET_PORT: Container target port. --env_vars ENV_VARS: Environment variables as a JSON string. --cpu CPU: CPU requirements. --memory MEMORY: Memory requirements. --storage STORAGE: Storage requirements. --gpu GPU: GPU requirements. --kube_config_path KUBE_CONFIG_PATH: Path to the kubeconfig file. --cluster_name CLUSTER_NAME: Name of the Kubernetes cluster. --context_name CONTEXT_NAME: Name of the kubeconfig context. --namespace NAMESPACE: Kubernetes namespace. --labels LABELS: Labels for Kubernetes resources, as a JSON string. --annotations ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --api_key API_KEY: API key for Kubernetes cluster. --api_host API_HOST: API host for Kubernetes cluster. --verify_ssl VERIFY_SSL: Whether to verify SSL certificates. --ssl_ca_cert SSL_CA_CERT: Path to the SSL CA certificate.

    "},{"location":"guides/cli/#command-genius-service-delete","title":"Command: genius service delete","text":"

    usage: genius service delete [-h] [--kube_config_path KUBE_CONFIG_PATH] [--cluster_name CLUSTER_NAME] [--context_name CONTEXT_NAME] [--namespace NAMESPACE] [--labels LABELS] [--annotations ANNOTATIONS] [--api_key API_KEY] [--api_host API_HOST] [--verify_ssl VERIFY_SSL] [--ssl_ca_cert SSL_CA_CERT] name

    name

    : Name of the service.

    Options genius service delete

    --kube_config_path KUBE_CONFIG_PATH: Path to the kubeconfig file. --cluster_name CLUSTER_NAME: Name of the Kubernetes cluster. --context_name CONTEXT_NAME: Name of the kubeconfig context. --namespace NAMESPACE: Kubernetes namespace. --labels LABELS: Labels for Kubernetes resources, as a JSON string. --annotations ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --api_key API_KEY: API key for Kubernetes cluster. --api_host API_HOST: API host for Kubernetes cluster. --verify_ssl VERIFY_SSL: Whether to verify SSL certificates. --ssl_ca_cert SSL_CA_CERT: Path to the SSL CA certificate.

    "},{"location":"guides/cli/#command-genius-service-describe","title":"Command: genius service describe","text":"

    usage: genius service describe [-h] [--kube_config_path KUBE_CONFIG_PATH] [--cluster_name CLUSTER_NAME] [--context_name CONTEXT_NAME] [--namespace NAMESPACE] [--labels LABELS] [--annotations ANNOTATIONS] [--api_key API_KEY] [--api_host API_HOST] [--verify_ssl VERIFY_SSL] [--ssl_ca_cert SSL_CA_CERT] name

    name

    : Name of the service.

    Options genius service describe

    --kube_config_path KUBE_CONFIG_PATH: Path to the kubeconfig file. --cluster_name CLUSTER_NAME: Name of the Kubernetes cluster. --context_name CONTEXT_NAME: Name of the kubeconfig context. --namespace NAMESPACE: Kubernetes namespace. --labels LABELS: Labels for Kubernetes resources, as a JSON string. --annotations ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --api_key API_KEY: API key for Kubernetes cluster. --api_host API_HOST: API host for Kubernetes cluster. --verify_ssl VERIFY_SSL: Whether to verify SSL certificates. --ssl_ca_cert SSL_CA_CERT: Path to the SSL CA certificate.

    "},{"location":"guides/cli/#command-genius-service-show","title":"Command: genius service show","text":"

    usage: genius service show [-h] [--kube_config_path KUBE_CONFIG_PATH] [--cluster_name CLUSTER_NAME] [--context_name CONTEXT_NAME] [--namespace NAMESPACE] [--labels LABELS] [--annotations ANNOTATIONS] [--api_key API_KEY] [--api_host API_HOST] [--verify_ssl VERIFY_SSL] [--ssl_ca_cert SSL_CA_CERT]

    Options genius service show

    --kube_config_path KUBE_CONFIG_PATH: Path to the kubeconfig file. --cluster_name CLUSTER_NAME: Name of the Kubernetes cluster. --context_name CONTEXT_NAME: Name of the kubeconfig context. --namespace NAMESPACE: Kubernetes namespace. --labels LABELS: Labels for Kubernetes resources, as a JSON string. --annotations ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --api_key API_KEY: API key for Kubernetes cluster. --api_host API_HOST: API host for Kubernetes cluster. --verify_ssl VERIFY_SSL: Whether to verify SSL certificates. --ssl_ca_cert SSL_CA_CERT: Path to the SSL CA certificate.

    "},{"location":"guides/cli/#kubernetes-jobs","title":"Kubernetes Jobs","text":""},{"location":"guides/cli/#command-genius-job","title":"Command: genius job","text":"

    usage: genius job [-h] {create,delete,status} ...

    POSITIONAL ARGUMENTS genius job

    genius job create

    : Create a new job.

    genius job delete

    : Delete a job.

    genius job status

    : Get the status of a job.

    "},{"location":"guides/cli/#command-genius-job-create","title":"Command: genius job create","text":"

    usage: genius job create [-h] [--env_vars ENV_VARS] [--cpu CPU] [--memory MEMORY] [--storage STORAGE] [--gpu GPU] [--kube_config_path KUBE_CONFIG_PATH] [--cluster_name CLUSTER_NAME] [--context_name CONTEXT_NAME] [--namespace NAMESPACE] [--labels LABELS] [--annotations ANNOTATIONS] [--api_key API_KEY] [--api_host API_HOST] [--verify_ssl VERIFY_SSL] [--ssl_ca_cert SSL_CA_CERT] name image command

    name

    : Name of the job.

    image

    : Docker image for the job.

    command

    : Command to run in the container.

    Options genius job create

    --env_vars ENV_VARS: Environment variables as a JSON string. --cpu CPU: CPU requirements. --memory MEMORY: Memory requirements. --storage STORAGE: Storage requirements. --gpu GPU: GPU requirements. --kube_config_path KUBE_CONFIG_PATH: Path to the kubeconfig file. --cluster_name CLUSTER_NAME: Name of the Kubernetes cluster. --context_name CONTEXT_NAME: Name of the kubeconfig context. --namespace NAMESPACE: Kubernetes namespace. --labels LABELS: Labels for Kubernetes resources, as a JSON string. --annotations ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --api_key API_KEY: API key for Kubernetes cluster. --api_host API_HOST: API host for Kubernetes cluster. --verify_ssl VERIFY_SSL: Whether to verify SSL certificates. --ssl_ca_cert SSL_CA_CERT: Path to the SSL CA certificate.

    "},{"location":"guides/cli/#command-genius-job-delete","title":"Command: genius job delete","text":"

    usage: genius job delete [-h] [--kube_config_path KUBE_CONFIG_PATH] [--cluster_name CLUSTER_NAME] [--context_name CONTEXT_NAME] [--namespace NAMESPACE] [--labels LABELS] [--annotations ANNOTATIONS] [--api_key API_KEY] [--api_host API_HOST] [--verify_ssl VERIFY_SSL] [--ssl_ca_cert SSL_CA_CERT] name

    name

    : Name of the job.

    Options genius job delete

    --kube_config_path KUBE_CONFIG_PATH: Path to the kubeconfig file. --cluster_name CLUSTER_NAME: Name of the Kubernetes cluster. --context_name CONTEXT_NAME: Name of the kubeconfig context. --namespace NAMESPACE: Kubernetes namespace. --labels LABELS: Labels for Kubernetes resources, as a JSON string. --annotations ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --api_key API_KEY: API key for Kubernetes cluster. --api_host API_HOST: API host for Kubernetes cluster. --verify_ssl VERIFY_SSL: Whether to verify SSL certificates. --ssl_ca_cert SSL_CA_CERT: Path to the SSL CA certificate.

    "},{"location":"guides/cli/#command-genius-job-status","title":"Command: genius job status","text":"

    usage: genius job status [-h] [--kube_config_path KUBE_CONFIG_PATH] [--cluster_name CLUSTER_NAME] [--context_name CONTEXT_NAME] [--namespace NAMESPACE] [--labels LABELS] [--annotations ANNOTATIONS] [--api_key API_KEY] [--api_host API_HOST] [--verify_ssl VERIFY_SSL] [--ssl_ca_cert SSL_CA_CERT] name

    name

    : Name of the job.

    Options genius job status

    --kube_config_path KUBE_CONFIG_PATH: Path to the kubeconfig file. --cluster_name CLUSTER_NAME: Name of the Kubernetes cluster. --context_name CONTEXT_NAME: Name of the kubeconfig context. --namespace NAMESPACE: Kubernetes namespace. --labels LABELS: Labels for Kubernetes resources, as a JSON string. --annotations ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --api_key API_KEY: API key for Kubernetes cluster. --api_host API_HOST: API host for Kubernetes cluster. --verify_ssl VERIFY_SSL: Whether to verify SSL certificates. --ssl_ca_cert SSL_CA_CERT: Path to the SSL CA certificate.

    "},{"location":"guides/cli/#kubernetes-cron-jobs","title":"Kubernetes Cron Jobs","text":""},{"location":"guides/cli/#command-genius-cron_job","title":"Command: genius cron_job","text":"

    usage: genius cron_job [-h] {create_cronjob,delete_cronjob,get_cronjob_status} ...

    POSITIONAL ARGUMENTS genius cron_job

    genius cron_job create_cronjob

    : Create a new cronjob.

    genius cron_job delete_cronjob

    : Delete a cronjob.

    genius cron_job get_cronjob_status

    : Get the status of a cronjob.

    "},{"location":"guides/cli/#command-genius-cron_job-create_cronjob","title":"Command: genius cron_job create_cronjob","text":"

    usage: genius cron_job create_cronjob [-h] [--env_vars ENV_VARS] [--cpu CPU] [--memory MEMORY] [--storage STORAGE] [--gpu GPU] [--kube_config_path KUBE_CONFIG_PATH] [--cluster_name CLUSTER_NAME] [--context_name CONTEXT_NAME] [--namespace NAMESPACE] [--labels LABELS] [--annotations ANNOTATIONS] [--api_key API_KEY] [--api_host API_HOST] [--verify_ssl VERIFY_SSL] [--ssl_ca_cert SSL_CA_CERT] name image command schedule

    name

    : Name of the cronjob.

    image

    : Docker image for the cronjob.

    command

    : Command to run in the container.

    schedule

    : Cron schedule.

    Options genius cron_job create_cronjob

    --env_vars ENV_VARS: Environment variables as a JSON string. --cpu CPU: CPU requirements. --memory MEMORY: Memory requirements. --storage STORAGE: Storage requirements. --gpu GPU: GPU requirements. --kube_config_path KUBE_CONFIG_PATH: Path to the kubeconfig file. --cluster_name CLUSTER_NAME: Name of the Kubernetes cluster. --context_name CONTEXT_NAME: Name of the kubeconfig context. --namespace NAMESPACE: Kubernetes namespace. --labels LABELS: Labels for Kubernetes resources, as a JSON string. --annotations ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --api_key API_KEY: API key for Kubernetes cluster. --api_host API_HOST: API host for Kubernetes cluster. --verify_ssl VERIFY_SSL: Whether to verify SSL certificates. --ssl_ca_cert SSL_CA_CERT: Path to the SSL CA certificate.

    "},{"location":"guides/cli/#command-genius-cron_job-delete_cronjob","title":"Command: genius cron_job delete_cronjob","text":"

    usage: genius cron_job delete_cronjob [-h] [--kube_config_path KUBE_CONFIG_PATH] [--cluster_name CLUSTER_NAME] [--context_name CONTEXT_NAME] [--namespace NAMESPACE] [--labels LABELS] [--annotations ANNOTATIONS] [--api_key API_KEY] [--api_host API_HOST] [--verify_ssl VERIFY_SSL] [--ssl_ca_cert SSL_CA_CERT] name

    name

    : Name of the cronjob.

    Options genius cron_job delete_cronjob

    --kube_config_path KUBE_CONFIG_PATH: Path to the kubeconfig file. --cluster_name CLUSTER_NAME: Name of the Kubernetes cluster. --context_name CONTEXT_NAME: Name of the kubeconfig context. --namespace NAMESPACE: Kubernetes namespace. --labels LABELS: Labels for Kubernetes resources, as a JSON string. --annotations ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --api_key API_KEY: API key for Kubernetes cluster. --api_host API_HOST: API host for Kubernetes cluster. --verify_ssl VERIFY_SSL: Whether to verify SSL certificates. --ssl_ca_cert SSL_CA_CERT: Path to the SSL CA certificate.

    "},{"location":"guides/cli/#command-genius-cron_job-get_cronjob_status","title":"Command: genius cron_job get_cronjob_status","text":"

    usage: genius cron_job get_cronjob_status [-h] [--kube_config_path KUBE_CONFIG_PATH] [--cluster_name CLUSTER_NAME] [--context_name CONTEXT_NAME] [--namespace NAMESPACE] [--labels LABELS] [--annotations ANNOTATIONS] [--api_key API_KEY] [--api_host API_HOST] [--verify_ssl VERIFY_SSL] [--ssl_ca_cert SSL_CA_CERT] name

    name

    : Name of the cronjob.

    Options genius cron_job get_cronjob_status

    --kube_config_path KUBE_CONFIG_PATH: Path to the kubeconfig file. --cluster_name CLUSTER_NAME: Name of the Kubernetes cluster. --context_name CONTEXT_NAME: Name of the kubeconfig context. --namespace NAMESPACE: Kubernetes namespace. --labels LABELS: Labels for Kubernetes resources, as a JSON string. --annotations ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --api_key API_KEY: API key for Kubernetes cluster. --api_host API_HOST: API host for Kubernetes cluster. --verify_ssl VERIFY_SSL: Whether to verify SSL certificates. --ssl_ca_cert SSL_CA_CERT: Path to the SSL CA certificate.

    "},{"location":"guides/cli/#packaging","title":"Packaging","text":""},{"location":"guides/cli/#command-genius-docker-package","title":"Command: genius docker package","text":"

    Usage: genius docker package [-h] [--auth AUTH] [--base_image BASE_IMAGE] [--workdir WORKDIR] [--local_dir LOCAL_DIR] [--packages [PACKAGES ...]] [--os_packages [OS_PACKAGES ...]] [--env_vars ENV_VARS] image_name repository

    image_name

    : Name of the Docker image.

    repository

    : Container repository to upload to.

    Options genius docker package

    --auth AUTH: Authentication credentials as a JSON string. --base_image BASE_IMAGE: The base image to use for the Docker container. --workdir WORKDIR: The working directory in the Docker container. --local_dir LOCAL_DIR: The local directory to copy into the Docker container. --packages [PACKAGES ...]: List of Python packages to install in the Docker container. --os_packages [OS_PACKAGES ...]: List of OS packages to install in the Docker container. --env_vars ENV_VARS: Environment variables to set in the Docker container.

    "},{"location":"guides/cli/#miscellaneous","title":"Miscellaneous","text":""},{"location":"guides/cli/#command-genius-plugins","title":"Command: genius plugins","text":"

    Usage: genius plugins [-h] [spout_or_bolt]

    spout_or_bolt

    : The spout or bolt to print help for.

    "},{"location":"guides/cli/#command-genius-list","title":"Command: genius list","text":"

    Usage: genius list [-h] [--verbose]

    Options genius list

    --verbose: Print verbose output.

    "},{"location":"guides/concepts/","title":"Concepts","text":"

    The Geniusrise framework is built around loosely-coupled modules acting as a cohesive adhesive between distinct, modular components, much like how one would piece together Lego blocks. This design approach not only promotes flexibility but also ensures that each module or \"Lego block\" remains sufficiently independent. Such independence is crucial for diverse teams, each with its own unique infrastructure and requirements, to seamlessly build and manage their respective components.

    Geniusrise comes with a sizable set of plugins which implement various features and integrations. The independence and modularity of the design enable sharing of these building blocks in the community.

    "},{"location":"guides/concepts/#concepts_1","title":"Concepts","text":"
    1. Task: At its core, a task represents a discrete unit of work within the Geniusrise framework. Think of it as a singular action or operation that the system needs to execute. A task further manifests itself into a Bolt or a Spout as stated below.

    2. Components of a Task: Each task is equipped with four components:

      1. State Manager: This component is responsible for continuously monitoring and managing the task's state, ensuring that it progresses smoothly from initiation to completion and to report errors and ship logs into a central location.
      2. Data Manager: As the name suggests, the Data Manager oversees the input and output data associated with a task, ensuring data integrity and efficient data flow. It also ensures data sanity follows partition semantics and isolation.
      3. Runner: These are wrappers for executing a task on various platforms. Depending on the platform, the runner ensures that the task is executed seamlessly.
    3. Task Classification: Tasks within the Geniusrise framework can be broadly classified into two categories:

      • Spout: If a task's primary function is to ingest or bring in data, it's termed as a 'spout'.
      • Bolt: For tasks that don't primarily ingest data but perform other operations, they are termed 'bolts'.

    The beauty of the Geniusrise framework lies in its adaptability. Developers can script their workflow components once and have the freedom to deploy them across various platforms. To facilitate this, Geniusrise offers:

    1. Runners for Task Execution: Geniusrise is equipped with a diverse set of runners, each tailored for different platforms, ensuring that tasks can be executed almost anywhere:
      1. On your local machine for quick testing and development.
      2. Within Docker containers for isolated, reproducible environments.
      3. On Kubernetes clusters for scalable, cloud-native deployments.
      4. Using Apache Airflow for complex workflow orchestration. (Coming Soon).
      5. On AWS ECS for containerized application management. (Coming Soon).
      6. With AWS Batch for efficient batch computing workloads. (Coming Soon).
      7. With Docker Swarm clusters as an alternative orchestrator to kubernetes. (Coming Soon).

    This document delves into the core components and concepts that make up the Geniusrise framework.

    "},{"location":"guides/concepts/#tradeoffs","title":"Tradeoffs","text":"

    Because of the very loose coupling of the components, though the framework can be used to build very complex networks with independently running nodes, it provides limited orchestration capability, like synchronous pipelines. An external orchestrator like airflow can be used in such cases to orchestrate geniusrise components.

    "},{"location":"guides/deployment/","title":"Deployment","text":""},{"location":"guides/deployment/#introduction","title":"Introduction","text":"

    This guide provides comprehensive instructions on how to deploy and manage resources in a Kubernetes cluster using the Geniusrise platform. The guide covers the following functionalities:

    • Connecting to a Kubernetes cluster
    • Managing Pods
    • Managing Deployments
    • Managing Services
    • Managing Jobs
    • Managing Cron jobs
    "},{"location":"guides/deployment/#prerequisites","title":"Prerequisites","text":"
    • A working Kubernetes cluster
    • Kubeconfig file for cluster access
    • Python 3.x installed
    • Geniusrise CLI installed
    "},{"location":"guides/deployment/#connecting-to-a-kubernetes-cluster","title":"Connecting to a Kubernetes Cluster","text":"

    Before performing any operations, you need to connect to your Kubernetes cluster. You can do this in two ways:

    1. Using a kubeconfig file and context name
    2. Using an API key and API host
    "},{"location":"guides/deployment/#using-kubeconfig-and-context-name","title":"Using Kubeconfig and Context Name","text":"
    genius k8s <command> --kube_config_path /path/to/kubeconfig.yaml --context_name my-context\n
    "},{"location":"guides/deployment/#using-api-key-and-api-host","title":"Using API Key and API Host","text":"
    genius k8s <command> --api_key my-api-key --api_host https://api.k8s.my-cluster.com --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#managing-pods","title":"Managing Pods","text":""},{"location":"guides/deployment/#checking-pod-status","title":"Checking Pod Status","text":"

    To get the status of a specific pod:

    genius k8s status my-pod-name --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#listing-all-pods","title":"Listing All Pods","text":"

    To list all the pods in the current namespace:

    genius k8s show --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#describing-a-pod","title":"Describing a Pod","text":"

    To get detailed information about a specific pod:

    genius k8s describe my-pod-name --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#fetching-pod-logs","title":"Fetching Pod Logs","text":"

    To get the logs of a specific pod:

    genius k8s logs my-pod-name --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#managing-deployments","title":"Managing Deployments","text":""},{"location":"guides/deployment/#creating-a-new-deployment","title":"Creating a New Deployment","text":"

    To create a new deployment:

    genius deployment create --name my-deployment --image my-image --command \"echo hello\" --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#scaling-a-deployment","title":"Scaling a Deployment","text":"

    To scale a deployment:

    genius deployment scale --name my-deployment --replicas 3 --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#listing-all-deployments","title":"Listing All Deployments","text":"

    To list all deployments:

    genius deployment show\n
    "},{"location":"guides/deployment/#describing-a-deployment","title":"Describing a Deployment","text":"

    To describe a specific deployment:

    genius deployment describe my-deployment --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#deleting-a-deployment","title":"Deleting a Deployment","text":"

    To delete a deployment:

    genius deployment delete my-deployment --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#checking-deployment-status","title":"Checking Deployment Status","text":"

    To check the status of a deployment:

    genius deployment status my-deployment --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#advanced-features","title":"Advanced Features","text":""},{"location":"guides/deployment/#environment-variables","title":"Environment Variables","text":"

    You can pass environment variables to your pods and deployments like so:

    genius deployment create --name my-deployment --image my-image --command \"echo hello\" --env_vars '{\"MY_VAR\": \"value\"}' --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#resource-requirements","title":"Resource Requirements","text":"

    You can specify resource requirements for your pods and deployments:

    genius deployment create --name my-deployment --image my-image --command \"echo hello\" --cpu \"100m\" --memory \"256Mi\" --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#gpu-support","title":"GPU Support","text":"

    To allocate GPUs to your pods:

    genius deployment create --name my-deployment --image my-image --command \"echo hello\" --gpu \"1\" --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#managing-services","title":"Managing Services","text":""},{"location":"guides/deployment/#creating-a-new-service","title":"Creating a New Service","text":"

    To create a new service:

    genius service create --name example-service --image example-image --command \"echo hello\" --port 8080 --target_port 8080 --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#deleting-a-service","title":"Deleting a Service","text":"

    To delete a service:

    genius service delete --name example-service --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#describing-a-service","title":"Describing a Service","text":"

    To describe a specific service:

    genius service describe --name example-service --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#listing-all-services","title":"Listing All Services","text":"

    To list all services:

    genius service show --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#managing-jobs","title":"Managing Jobs","text":""},{"location":"guides/deployment/#creating-a-new-job","title":"Creating a New Job","text":"

    To create a new job:

    genius job create --name example-job --image example-image --command \"echo hello\" --cpu \"100m\" --memory \"256Mi\" --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#deleting-a-job","title":"Deleting a Job","text":"

    To delete a job:

    genius job delete --name example-job --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#checking-job-status","title":"Checking Job Status","text":"

    To check the status of a job:

    genius job status --name example-job --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#managing-cron-jobs","title":"Managing Cron Jobs","text":""},{"location":"guides/deployment/#creating-a-new-cron-job","title":"Creating a New Cron Job","text":"

    To create a new cron job, you can use the create_cronjob sub-command. You'll need to specify the name, Docker image, command to run, and the cron schedule.

    genius cronjob create_cronjob --name example-cronjob --image example-image --command \"echo hello\" --schedule \"*/5 * * * *\" --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#additional-options","title":"Additional Options","text":"
    • --env_vars: To set environment variables, pass them as a JSON string.
    • --cpu, --memory, --storage, --gpu: To set resource requirements.
    "},{"location":"guides/deployment/#deleting-a-cron-job","title":"Deleting a Cron Job","text":"

    To delete a cron job, use the delete_cronjob sub-command and specify the name of the cron job.

    genius cronjob delete_cronjob --name example-cronjob --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#checking-cron-job-status","title":"Checking Cron Job Status","text":"

    To check the status of a cron job, use the get_cronjob_status sub-command and specify the name of the cron job.

    genius cronjob get_cronjob_status --name example-cronjob --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#advanced-features-for-cron-jobs","title":"Advanced Features for Cron Jobs","text":""},{"location":"guides/deployment/#environment-variables_1","title":"Environment Variables","text":"

    You can pass environment variables to your cron jobs like so:

    genius cronjob create_cronjob --name example-cronjob --image example-image --command \"echo hello\" --schedule \"*/5 * * * *\" --env_vars '{\"MY_VAR\": \"value\"}' --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#resource-requirements_1","title":"Resource Requirements","text":"

    You can specify resource requirements for your cron jobs:

    genius cronjob create_cronjob --name example-cronjob --image example-image --command \"echo hello\" --schedule \"*/5 * * * *\" --cpu \"100m\" --memory \"256Mi\" --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#gpu-support_1","title":"GPU Support","text":"

    To allocate GPUs to your cron jobs:

    genius cronjob create_cronjob --name example-cronjob --image example-image --command \"echo hello\" --schedule \"*/5 * * * *\" --gpu \"1\" --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/dev_cycle/","title":"Dev Cycle","text":"

    This document describes one full local development cycle.

    Lets say we want to build a pipeline which tags medical terms in EHR documents.

    "},{"location":"guides/dev_cycle/#strategies","title":"Strategies","text":"

    Pre-requisites:

    1. SNOMED-CT: is a knowledge graph of standard medical terminology
    2. IHTSDO: a standards body for medical terminologies in a number of countries.
    3. UMLS: unified medical language system is a set of files and software that brings together many health and biomedical vocabularies and standards together.
    "},{"location":"guides/dev_cycle/#strategy-1-named-entity-recognition","title":"Strategy 1: Named entity recognition","text":""},{"location":"guides/dev_cycle/#1-create-a-labelled-dataset","title":"1. Create a labelled dataset","text":"

    We need a corpus of documents with medical terms labeled. For example, we could use wikipedia + wikidata to build such a dataset, given entities in wikipedia are linked and indexed in the wikidata knowledge graph. Reference: Building a Massive Corpus for Named Entity Recognition using Free Open Data Sources. We could also annotate medical datasets like MIMIC-III annotated with SNOMED-CT based MedCAT which is a medical annotation tool developed on the knowledge graph of medical terminology (SNOMED-CT), as it would be more pertinent to our usecase, reference: DNER Clinical (named entity recognition) from free clinical text to Snomed-CT concept

    "},{"location":"guides/dev_cycle/#2-train-a-model-on-the-ner-dataset","title":"2. Train a model on the NER dataset","text":"

    We could choose a large language model and train the model on the NER fine-tuning task. The model would then be able to recognize and tag medical terms in any given text data.

    "},{"location":"guides/dev_cycle/#strategy-2-vector-knowledge-graph-search","title":"Strategy 2: Vector knowledge graph search","text":""},{"location":"guides/dev_cycle/#1-create-a-vectorized-knowledge-graph","title":"1. Create a vectorized knowledge graph","text":"

    We use an LLM to create a vectorized layer over SNOMED-CT. This layer can be used to semantically search for \"seed\" nodes in the graph. We can then use these seed nodes to traverse nodes a few hops adjacent to the seed nodes.

    "},{"location":"guides/dev_cycle/#2-retrieval-augmented-ner","title":"2. Retrieval Augmented NER","text":"

    We use the knowledge graph search results to not only annotate each node seen in the EHR document, but also add additional information about those nodes derived from its adjacent nodes. But first, we also need to make sure that we query the right information instead of simply vectorized chunks and throwing it at semantic search. We would need a \"traditional\" pipeline for this - lemmatization followed by POS tagging. We use both proper nouns and out of vocabulary words as search query terms.

    "},{"location":"guides/dev_cycle/#boilerplate","title":"Boilerplate","text":"

    To setup a local geniusrise project, simply use the geniusrise project creator script:

    curl -L https://geniusrise.new | bash # TODO: host this or create a template github repo\n
    "},{"location":"guides/dev_cycle/#existing-project","title":"Existing project","text":"

    If you wish to add geniusrise to an existing project:

    pip install geniusrise\npip freeze > requirements.txt\n
    "},{"location":"guides/dev_cycle/#from-scratch","title":"From scratch","text":"

    Here is how to set up from scratch:

    #!/bin/bash\n# Prompt for project details\nread -p \"Enter your project name: \" project_name\nread -p \"Enter your name: \" author_name\nread -p \"Enter your email: \" author_email\nread -p \"Enter your GitHub username: \" github_username\nread -p \"Enter a brief description of your project: \" project_description\n# Create project structure\nmkdir $project_name\ncd $project_name\nmkdir $project_name tests\n# Create basic files\ntouch README.md\ntouch requirements.txt\ntouch setup.py\ntouch Makefile\ntouch $project_name/__init__.py\ntouch tests/__init__.py\n# Populate README.md\necho \"# $project_name\" > README.md\necho \"\\n$project_description\" >> README.md\n# Populate setup.py\ncat <<EOL > setup.py\nfrom setuptools import setup, find_packages\nwith open(\"README.md\", \"r\", encoding=\"utf-8\") as fh:\nlong_description = fh.read()\nsetup(\nname='$project_name',\nversion='0.1.0',\npackages=find_packages(exclude=[\"tests\", \"tests.*\"]),\ninstall_requires=[],\npython_requires='>=3.10',\nauthor='$author_name',\nauthor_email='$author_email',\ndescription='$project_description',\nlong_description=long_description,\nlong_description_content_type='text/markdown',\nurl='https://github.com/$github_username/$project_name',\nclassifiers=[\n'Programming Language :: Python :: 3',\n'License :: OSI Approved :: MIT License',\n'Operating System :: OS Independent',\n],\n)\nEOL\n# Populate Makefile\ncat <<EOL > Makefile\nsetup:\n@pip install -r ./requirements.txt\ntest:\n@coverage run -m pytest -v ./tests\npublish:\n@python setup.py sdist bdist_wheel\n@twine upload dist/$project_name-\\$${VERSION}-* --verbose\nEOL\n# Set up the virtual environment and install necessary packages\nvirtualenv venv -p `which python3.10`\nsource venv/bin/activate\npip install twine setuptools pytest coverage geniusrise\npip freeze > requirements.txt\n# Fetch .pre-commit-config.yaml and .gitignore from geniusrise/geniusrise\ncurl -O https://raw.githubusercontent.com/geniusrise/geniusrise/master/.pre-commit-config.yaml\ncurl -O https://raw.githubusercontent.com/geniusrise/geniusrise/master/.gitignore\necho \"Project $project_name initialized!\"\n

    Create a install script out of this and execute it:

    touch install.sh\nchmod +x ./install.sh\n./install.sh\n
    "},{"location":"guides/dev_cycle/#preparing-the-knowledge-graph","title":"Preparing the knowledge graph","text":"

    Lets prepare the knowledge graph by vectorizing each node's knowledge into a vectorized flat memory. This is a periodic activity that one needs to do whenever a new version of SNOMED-CT is released (typically bi-annually).

    We use the international version of SNOMED-CT from https://www.nlm.nih.gov/healthit/snomedct/international.html.

    mkdir data\ncd data\n

    Go to UMLS or IHTSDO website, register, agree to the agreements and after approval, download the knowledge graph.

    Unzip the file

    unzip SnomedCT_InternationalRF2_PRODUCTION_20230901T120000Z.zip\n
    "},{"location":"guides/dev_cycle/#todo","title":"TODO \ud83d\ude22","text":"

    Need to document https://github.com/geniusrise/geniusrise-healthcare

    "},{"location":"guides/installation/","title":"Installation","text":"

    Geniusrise is composed of the core framework and various plugins that implement specific tasks. The core has to be installed first, and after that selected plugins can be installed as and when required.

    "},{"location":"guides/installation/#installing-geniusrise","title":"Installing Geniusrise","text":""},{"location":"guides/installation/#using-pip","title":"Using pip","text":"

    To install the core framework using pip in local env, simply run:

    pip install geniusrise\n

    Or if you wish to install at user level:

    pip install generiusrise --user\n

    Or on a global level (might conflict with your OS package manager):

    sudo pip install geniusrise\n

    To verify the installation, you can check whether the geniusrise binary exists in PATH:

    which genius\n\ngenius --help\n
    <!--

    "},{"location":"guides/installation/#docker","title":"Docker","text":"

    Geniusrise containers are available on Docker hub.

    docker run -it --rm geniusrise/geniusrise:latest\n``` -->\n\n## Installing Plugins\n---\n\nGeniusrise offers a variety of plugins that act as composable lego blocks. To install a specific plugin, use the following format:\n\n```bash\npip install geniusrise-<plugin-name>\n

    Replace <plugin-name> with the name of the desired plugin.

    Available plugins are:

    1. geniusrise-text: bolts for text models
    2. geniusrise-vision: bolts for vision models
    3. geniusrise-audio: bolts for audio models
    4. geniusrise-openai: bolts for openai
    5. geniusrise-listeners: spouts for streaming event listeners
    6. geniusrise-databases: spouts for databases

    Please visit https://github.com/geniusrise for a complete list of available plugins.

    "},{"location":"guides/installation/#using-conda","title":"Using Conda","text":"
    1. Activate the environment:
    conda activate your-env\n
    1. Install Geniusrise:
    pip install geniusrise\n
    "},{"location":"guides/installation/#using-poetry","title":"Using Poetry","text":"
    1. Add Geniusrise as a dependency:
    poetry add geniusrise\n

    For plugins:

    poetry add geniusrise-<plugin-name>\n
    "},{"location":"guides/installation/#development","title":"Development","text":"

    For development, you may want to install from the repo:

    git clone git@github.com:geniusrise/geniusrise.git\ncd geniusrise\nvirtualenv venv -p `which python3.10`\nsource venv/bin/activate\npip install -r ./requirements.txt\n\nmake install # installs in your local venv directory\n

    That's it! You've successfully installed Geniusrise and its plugins. \ud83c\udf89

    "},{"location":"guides/installation/#alternative-methods-todo","title":"Alternative Methods: TODO \ud83d\ude2d","text":""},{"location":"guides/installation/#using-package-managers","title":"Using package managers","text":"

    Geniusrise is also available as native packages for some Linux distributions.

    "},{"location":"guides/installation/#aur","title":"AUR","text":"

    Geniusrise is available on the AUR for arch and derived distros.

    yay -S geniusrise\n

    or directly from git master:

    yay -S geniusrise-git\n
    "},{"location":"guides/installation/#ppa","title":"PPA","text":"

    Geniusrise is also available on the PPA for debian-based distros.

    Coming soon \ud83d\ude22

    "},{"location":"guides/installation/#brew-cask","title":"Brew (cask)","text":"

    Coming soon \ud83d\ude22

    "},{"location":"guides/installation/#nix","title":"Nix","text":"

    Coming soon \ud83d\ude22

    "},{"location":"guides/kubernetes/","title":"Kubernetes Runner","text":""},{"location":"guides/kubernetes/#overview","title":"Overview","text":"

    This runner module enables running spouts or bolts on Kubernetes. It provides the ability to:

    1. create
    2. delete
    3. scale
    4. describe

    various Kubernetes resources like

    1. Pods
    2. Deployments
    3. Services
    "},{"location":"guides/kubernetes/#command-line-interface","title":"Command-Line Interface","text":"

    The following commands are available:

    1. create: Create a Kubernetes resource.
    2. delete: Delete a Kubernetes resource.
    3. status: Get the status of a Kubernetes resource.
    4. logs: Get logs of a Kubernetes resource.
    5. pod: Describe a Kubernetes pod.
    6. pods: List all pods.
    7. service: Describe a Kubernetes service.
    8. services: List all services.
    9. deployment: Describe a Kubernetes deployment.
    10. deployments: List all deployments.
    11. scale: Scale a Kubernetes deployment.
    "},{"location":"guides/kubernetes/#common-arguments","title":"Common Arguments","text":"

    These arguments are common to all commands:

    • --kube_config_path: Path to the kubeconfig file.
    • --cluster_name: Name of the Kubernetes cluster.
    • --context_name: Name of the kubeconfig context.
    • --namespace: Kubernetes namespace (default is \"default\").
    • --labels: Labels for Kubernetes resources, as a JSON string.
    • --annotations: Annotations for Kubernetes resources, as a JSON string.
    • --api_key: API key for Kubernetes cluster.
    • --api_host: API host for Kubernetes cluster.
    • --verify_ssl: Whether to verify SSL certificates (default is True).
    • --ssl_ca_cert: Path to the SSL CA certificate.
    "},{"location":"guides/kubernetes/#create_resource","title":"create_resource","text":"

    Create a Kubernetes resource.

    • name: Name of the resource.
    • image: Docker image for the resource.
    • command: Command to run in the container.
    • --registry_creds: Credentials for Docker registry, as a JSON string.
    • --is_service: Whether this is a service (default is False).
    • --replicas: Number of replicas (default is 1).
    • --port: Service port (default is 80).
    • --target_port: Container target port (default is 8080).
    • --env_vars: Environment variables, as a JSON string.

    Example:

    python script.py create_resource my_resource nginx \"nginx -g 'daemon off;'\" --replicas=3\n
    "},{"location":"guides/kubernetes/#delete_resource","title":"delete_resource","text":"

    Delete a Kubernetes resource.

    • name: Name of the resource.
    • --is_service: Whether this is a service (default is False).

    Example:

    python script.py delete_resource my_resource\n
    "},{"location":"guides/kubernetes/#get_status","title":"get_status","text":"

    Get the status of a Kubernetes resource.

    • name: Name of the resource.

    Example:

    python script.py get_status my_resource\n
    "},{"location":"guides/kubernetes/#get_logs","title":"get_logs","text":"

    Get logs of a Kubernetes resource.

    • name: Name of the resource.
    • --tail_lines: Number of lines to tail (default is 10).

    Example:

    python script.py get_logs my_resource --tail_lines=20\n
    "},{"location":"guides/kubernetes/#scale","title":"scale","text":"

    Scale a Kubernetes deployment.

    • name: Name of the deployment.
    • replicas: Number of replicas.

    Example:

    python script.py scale my_resource 5\n
    "},{"location":"guides/kubernetes/#list_pods-list_services-list_deployments","title":"list_pods, list_services, list_deployments","text":"

    List all pods, services, or deployments.

    Example:

    python script.py list_pods\n
    "},{"location":"guides/kubernetes/#describe_pod-describe_service-describe_deployment","title":"describe_pod, describe_service, describe_deployment","text":"

    Describe a pod, service, or deployment.

    • name: Name of the resource.

    Example:

    python script.py describe_pod my_pod\n
    "},{"location":"guides/kubernetes/#yaml-configuration","title":"YAML Configuration","text":"

    You can also use a YAML configuration file to specify the common arguments. The command-specific arguments will still come from the command line.

    Example YAML:

    deploy:\ntype: \"k8s\"\nargs:\nkube_config_path: \"\"\ncluster_name: \"geniusrise\"\ncontext_name: \"eks\"\nnamespace: \"geniusrise_k8s_test\"\nlabels: { \"tag1\": \"lol\", \"tag2\": \"lel\" }\nannotations: {}\napi_key:\napi_host: localhost\nverify_ssl: true\nssl_ca_cert:\n

    To use the YAML configuration, you can read it in your Python script and pass the arguments to the K8sResourceManager methods.

    Example:

    python script.py --config=my_config.yaml create_resource my_resource nginx \"nginx -g 'daemon off;'\" --replicas=3\n

    In this example, the --config=my_config.yaml would be used to read the common arguments from the YAML file, and the rest of the arguments would be taken from the command line.

    "},{"location":"guides/local/","title":"Local setup","text":"

    Lets create a workspace for local experimentation. We will not build anything here, just try to use whatever components are available. This is what a low-code workflow could look like.

    Lets create a workflow in which:

    1. A web server listens for all kinds of HTTP events.
      1. Clients send the following information to the server:
        1. HTTP request
        2. Response and response status code
      2. The server buffers events in batches of 1000 and uploads them on to s3.
    2. Train a small LLM model on the data to be used to predict whether the request was valid.

    A representation of the process using a sequence diagram:

    3df6131b-2989-469b-9203-56d38b88b3ee

    This model could be used to predict if a request will fail before serving it. It could also be used to classify requests as malicious etc.

    "},{"location":"guides/local/#install","title":"Install","text":"

    Let's start by installing geniusrise and itc components in a local virtual environment.

    1. Create a directory:
    mkdir test\ncd test\n
    1. Create a virtualenv:
    virtualenv venv\nsource venv/bin/activate\n
    1. Install geniursise
    pip install geniusrise\npip install geniusrise-listeners\npip install geniusrise-huggingface\n
    1. Save the installed package versions
    pip freeze > requirements.txt\n
    1. Verify if everything is installed:
    $ genius list\n\n+--------------------------------------------+-------+\n| Name                                       | Type  |\n+--------------------------------------------+-------+\n| TestSpoutCtlSpout                          | Spout |\n| Kafka                                      | Spout |\n| MQTT                                       | Spout |\n| Quic                                       | Spout |\n| RESTAPIPoll                                | Spout |\n| RabbitMQ                                   | Spout |\n| RedisPubSub                                | Spout |\n| RedisStream                                | Spout |\n| SNS                                        | Spout |\n| SQS                                        | Spout |\n| Udp                                        | Spout |\n| Webhook                                    | Spout |\n| Websocket                                  | Spout |\n| TestBoltCtlBolt                            | Bolt  |\n| HuggingFaceClassificationFineTuner         | Bolt  |\n| HuggingFaceCommonsenseReasoningFineTuner   | Bolt  |\n| HuggingFaceFineTuner                       | Bolt  |\n| HuggingFaceInstructionTuningFineTuner      | Bolt  |\n| HuggingFaceLanguageModelingFineTuner       | Bolt  |\n| HuggingFaceNamedEntityRecognitionFineTuner | Bolt  |\n| HuggingFaceQuestionAnsweringFineTuner      | Bolt  |\n| HuggingFaceSentimentAnalysisFineTuner      | Bolt  |\n| HuggingFaceSummarizationFineTuner          | Bolt  |\n| HuggingFaceTranslationFineTuner            | Bolt  |\n| NamedEntityRecognitionFineTuner            | Bolt  |\n| OpenAIClassificationFineTuner              | Bolt  |\n| OpenAICommonsenseReasoningFineTuner        | Bolt  |\n| OpenAIFineTuner                            | Bolt  |\n| OpenAIInstructionFineTuner                 | Bolt  |\n| OpenAILanguageModelFineTuner               | Bolt  |\n| OpenAIQuestionAnsweringFineTuner           | Bolt  |\n| OpenAISentimentAnalysisFineTuner           | Bolt  |\n| OpenAISummarizationFineTuner               | Bolt  |\n| OpenAITranslationFineTuner                 | Bolt  |\n+--------------------------------------------+-------+\n
    "},{"location":"guides/local/#input-data","title":"Input Data","text":"

    Lets start with the server which has to listen for HTTP events. We can use the Webhook listener for this purpose.

    Next, we have to ask ourselves 2 things:

    1. Where do we want the output?
    2. A: in s3 in batches (output = stream_to_batch)
    3. Do we want monitoring?
    4. A: no (state = none)

    Let's run the listener:

    genius Webhook rise \\\nstream_to_batch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder train \\\nnone \\\nlisten \\\n--args port=8080\n

    The server should be up with:

        \ud83d\ude80 Initialized Task with ID: Webhookaca9cb67-5c41-420c-9445-cf0015d9d866\n    [17/Sep/2023:14:00:18] ENGINE Bus STARTING\nCherryPy Checker:\nThe Application mounted at '' has an empty config.\n\n[17/Sep/2023:14:00:18] ENGINE Started monitor thread 'Autoreloader'.\n    [17/Sep/2023:14:00:18] ENGINE Serving on http://0.0.0.0:8080\n    [17/Sep/2023:14:00:18] ENGINE Bus STARTED\n
    "},{"location":"guides/local/#data","title":"Data","text":"

    Lets create some data for testing:

    while true; do\n# Generate a random customer ID\ncustomer_id=$(( RANDOM % 10000001 ))\n# Determine the status code based on the customer ID\nif [ $customer_id -gt 10000000 ]; then\nstatus_code=\"1\"\nelif [ $customer_id -le 10000 ]; then\nstatus_code=\"1\"\nelse\nstatus_code=\"0\"\nfi\n# Make the API call\ncurl --header \"Content-Type: application/json\" \\\n--request POST \\\n--data \"{\\\"text\\\":\\\"GET /api/v1/customer/$customer_id\\\",\\\"label\\\":\\\"$status_code\\\"}\" \\\nhttp://localhost:8080/application-1-tag-a-tag-b-whatever\ndone\n

    Verify that the data is being dumped in the right place with the correct format:

    $ aws s3 ls s3://geniusrise-test/train/\n\n2023-08-11 14:02:47      28700 DGtx4KjVZw5C2gfWmTVCmD.json\n2023-08-11 14:02:50      28700 UYXAvn8JC2yk6pMuAjKMPq.json\n

    The Webhook spout generates data like this:

    {'data': {'text': 'GET /api/v1/customer/28546', 'label': '401'},\n'endpoint': 'http://localhost:8080/application-1-tag-a-tag-b-whatever',\n'headers': {'Remote-Addr': '127.0.0.1',\n'Host': 'localhost:8080',\n'User-Agent': 'curl/8.1.2',\n'Accept': '*/*',\n'Content-Type': 'application/json',\n'Content-Length': '51'}}\n

    We need to extract the data field from this data before training. This can be done by passing a lambda lambda x: x['data'] to the fine tuning bolt.

    More info on other arguments can be found with:

    genius Webhook rise --help\n
    "},{"location":"guides/local/#fine-tuning","title":"Fine-tuning","text":"

    Now lets test the second leg of this, the model. Since we want to use the model for predicting the status code given the data, we will use classification as our task for fine-tuning the model.

    Lets use the bert-base-uncased model for now, as it is small enough to run on a CPU on a laptop. We also create a model on huggingface hub to store the model once it is trained: ixaxaar/geniusrise-api-status-code-prediction.

    genius HuggingFaceClassificationFineTuner rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder train \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder api-prediction \\\nnone \\\nfine_tune \\\n--args \\\nmodel_name=\"bert-base-uncased\" \\\ntokenizer_name=\"bert-base-uncased\" \\\nnum_train_epochs=2 \\\nper_device_train_batch_size=64 \\\nmodel_class=BertForSequenceClassification \\\ntokenizer_class=BertTokenizer \\\ndata_masked=True \\\ndata_extractor_lambda=\"lambda x: x['data']\" \\\nhf_repo_id=ixaxaar/geniusrise-api-status-code-prediction \\\nhf_commit_message=\"initial local testing\" \\\nhf_create_pr=True \\\nhf_token=hf_lalala\n
        \ud83d\ude80 Initialized Task with ID: HuggingFaceClassificationFineTuner772627a0-43a5-4f9d-9b0f-4362d69ba08c\n    Found credentials in shared credentials file: ~/.aws/credentials\nSome weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']\nYou should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n    Loading dataset from /tmp/tmp3h3wav4h/train\n    New labels detected, ignore if fine-tuning\nMap: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 300/300 [00:00<00:00, 4875.76 examples/s]\n{'train_runtime': 13.3748, 'train_samples_per_second': 44.861, 'train_steps_per_second': 22.43, 'train_loss': 0.6400579833984374, 'epoch': 2.0}\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 300/300 [00:13<00:00, 22.43it/s]\npytorch_model.bin: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 438M/438M [01:29<00:00, 4.88MB/s]\n    Successfully executed the bolt method: fine_tune \ud83d\udc4d\n

    You'll see a progress bar at the bottom, on completion, a pull request will appear on huggingface hub. Here is the model we trained: https://huggingface.co/ixaxaar/geniusrise-api-status-code-prediction.

    "},{"location":"guides/local/#packaging","title":"Packaging","text":"

    Finally, lets package this workflow so that we can run it again and again.

    Create a genius.yml file, similar to the cli commands:

    version: 1\nspouts:\nhttp_listener:\nname: Webhook\nmethod: listen\nargs:\nport: 8080\nstate:\ntype: none\noutput:\ntype: stream_to_batch\nargs:\nbucket: geniusrise-test\nfolder: train\nhttp_classifier:\nname: HuggingFaceClassificationFineTuner\nmethod: fine_tune\nargs:\nmodel_name: \"bert-base-uncased\"\ntokenizer_name: \"bert-base-uncased\"\nnum_train_epochs: 2\nper_device_train_batch_size: 2\nmodel_class: BertForSequenceClassification\ntokenizer_class: BertTokenizer\ndata_masked: True\ndata_extractor_lambda: \"lambda x: x['data']\"\nhf_repo_id: ixaxaar/geniusrise-api-status-code-prediction\nhf_commit_message: \"initial local testing\"\nhf_create_pr: True\nhf_token: hf_lalala\ninput:\ntype: spout\nargs:\nname: http_listener\noutput:\ntype: batch\nargs:\nbucket: geniusrise-test\nfolder: model\n

    Finally run them:

    genius rise\n

    Or run them individually:

    genius rise --spout all\ngenius rise --bolt all\n

    Package this entire workspace into a docker container and upload to ECR:

    genius docker package geniusrise ecr \\\n--auth '{\"aws_region\": \"ap-south-1\"}' \\\n--packages geniusrise-listeners geniusrise-huggingface\n
    "},{"location":"guides/local/#deployment","title":"Deployment","text":"

    Delpoy the spout and bolt to kubernetes. We could use the command line to deploy:

    genius Webhook deploy \\\nstream_to_batch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder train \\\nnone \\\nk8s \\\n--k8s_kind service \\\n--k8s_namespace geniusrise \\\n--k8s_cluster_name geniusrise-dev \\\n--k8s_context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev \\\n--k8s_name webhook \\\n--k8s_image \"genius-dev.dkr.ecr.ap-south-1.amazonaws.com/geniusrise\" \\\n--k8s_env_vars '{\"AWS_DEFAULT_REGION\": \"ap-south-1\", \"AWS_SECRET_ACCESS_KEY\": \"your-key\", \"AWS_ACCESS_KEY_ID\": \"your-secret\"}' \\\n--k8s_port 8080 \\\n--k8s_target_port 8080 \\\nlisten \\\n--args port=8080\n

    Or we could simply use the yaml we created in the previous step:

    genius rise up\n

    See the status of the deployment:

    # Find the pod id\ngenius pod show \\\n--namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev 2>&1 | grep Running\n\ngenius pod describe \\\nwebhook-75c4bff67d-hbhts \\\n--namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n\ngenius deployment describe \\\nwebhook \\\n--namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n\ngenius service describe \\\nwebhook \\\n--namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/packaging/","title":"Packaging","text":""},{"location":"guides/packaging/#overview","title":"Overview","text":"

    Geniusrise uses docker for packaging and delivering modules.

    "},{"location":"guides/packaging/#capabilities","title":"Capabilities","text":"
    • Docker Image Creation: Create Docker images with custom base images, working directories, and local directories.
    • Package Installation: Install both OS-level and Python packages during the Docker image creation.
    • Environment Variables: Set environment variables in the Docker container.
    • Multi-Repository Support: Upload Docker images to multiple types of container repositories.
    • Authentication: Supports various authentication methods for different container repositories.
    "},{"location":"guides/packaging/#command-line-interface","title":"Command-Line Interface","text":""},{"location":"guides/packaging/#syntax","title":"Syntax","text":"
    genius docker package <image_name> <repository> [options]\n
    "},{"location":"guides/packaging/#parameters","title":"Parameters","text":"
    • <image_name>: The name of the Docker image to build and upload.
    • <repository>: The container repository to upload to (e.g., \"ECR\", \"DockerHub\", \"Quay\", \"ACR\", \"GCR\").
    "},{"location":"guides/packaging/#options","title":"Options","text":"
    • --auth: Authentication credentials as a JSON string. Default is an empty JSON object.
    • --base_image: The base image to use for the Docker container. Default is \"nvidia/cuda:12.2.0-runtime-ubuntu20.04\".
    • --workdir: The working directory in the Docker container. Default is \"/app\".
    • --local_dir: The local directory to copy into the Docker container. Default is \".\".
    • --packages: List of Python packages to install in the Docker container. Default is an empty list.
    • --os_packages: List of OS packages to install in the Docker container. Default is an empty list.
    • --env_vars: Environment variables to set in the Docker container. Default is an empty dictionary.
    "},{"location":"guides/packaging/#authentication-details","title":"Authentication Details","text":"
    • ECR: {\"aws_region\": \"ap-south-1\", \"aws_secret_access_key\": \"aws_key\", \"aws_access_key_id\": \"aws_secret\"}
    • DockerHub: {\"dockerhub_username\": \"username\", \"dockerhub_password\": \"password\"}
    • ACR: {\"acr_username\": \"username\", \"acr_password\": \"password\", \"acr_login_server\": \"login_server\"}
    • GCR: {\"gcr_key_file_path\": \"/path/to/keyfile.json\", \"gcr_repository\": \"repository\"}
    • Quay: {\"quay_username\": \"username\", \"quay_password\": \"password\"}
    "},{"location":"guides/packaging/#examples","title":"Examples","text":""},{"location":"guides/packaging/#uploading-to-ecr-amazon-elastic-container-registry","title":"Uploading to ECR (Amazon Elastic Container Registry)","text":"
    genius docker package geniusrise ecr --auth '{\"aws_region\": \"ap-south-1\"}'\n
    "},{"location":"guides/packaging/#uploading-to-dockerhub","title":"Uploading to DockerHub","text":"
    genius docker package geniusrise dockerhub --auth '{\"dockerhub_username\": \"username\", \"dockerhub_password\": \"password\"}'\n
    "},{"location":"guides/packaging/#uploading-to-acr-azure-container-registry","title":"Uploading to ACR (Azure Container Registry)","text":"
    genius docker package geniusrise acr --auth '{\"acr_username\": \"username\", \"acr_password\": \"password\", \"acr_login_server\": \"login_server\"}'\n
    "},{"location":"guides/packaging/#uploading-to-gcr-google-container-registry","title":"Uploading to GCR (Google Container Registry)","text":"
    genius docker package geniusrise gcr --auth '{\"gcr_key_file_path\": \"/path/to/keyfile.json\", \"gcr_repository\": \"repository\"}'\n
    "},{"location":"guides/packaging/#uploading-to-quay","title":"Uploading to Quay","text":"
    genius docker package geniusrise quay --auth '{\"quay_username\": \"username\", \"quay_password\": \"password\"}'\n
    "},{"location":"guides/packaging/#uploading-with-custom-packages-and-os-packages","title":"Uploading with Custom Packages and OS Packages","text":"
    genius docker package geniusrise dockerhub \\\n--packages geniusrise-listeners geniusrise-databases geniusrise-huggingface geniusrise-openai \\\n--os_packages libmysqlclient-dev libldap2-dev libsasl2-dev libssl-dev\n
    "},{"location":"guides/packaging/#uploading-with-environment-variables","title":"Uploading with Environment Variables","text":"
    genius docker package geniusrise dockerhub --env_vars '{\"API_KEY\": \"123456\", \"ENV\": \"production\"}'\n
    "},{"location":"guides/packaging/#complex-examples","title":"Complex Examples","text":""},{"location":"guides/packaging/#1-uploading-to-ecr-with-custom-base-image-and-packages","title":"1. Uploading to ECR with Custom Base Image and Packages","text":"

    This example demonstrates how to upload a Docker image to ECR with a custom base image and additional Python packages.

    genius docker package my_custom_image ecr \\\n--auth '{\"aws_region\": \"us-west-2\", \"aws_secret_access_key\": \"aws_key\", \"aws_access_key_id\": \"aws_secret\"}' \\\n--base_image \"python:3.9-slim\" \\\n--packages \"numpy pandas scikit-learn\" \\\n--os_packages \"gcc g++\"\n
    "},{"location":"guides/packaging/#2-uploading-to-dockerhub-with-environment-variables-and-working-directory","title":"2. Uploading to DockerHub with Environment Variables and Working Directory","text":"

    This example shows how to upload a Docker image to DockerHub with custom environment variables and a specific working directory.

    genius docker package my_app dockerhub \\\n--auth '{\"dockerhub_username\": \"username\", \"dockerhub_password\": \"password\"}' \\\n--env_vars '{\"DEBUG\": \"True\", \"SECRET_KEY\": \"mysecret\"}' \\\n--workdir \"/my_app\"\n
    "},{"location":"guides/packaging/#3-uploading-to-acr-with-multiple-local-directories","title":"3. Uploading to ACR with Multiple Local Directories","text":"

    In this example, we upload a Docker image to Azure Container Registry (ACR) and specify multiple local directories to be copied into the Docker container.

    # First, create a Dockerfile that copies multiple directories\n# Then use the following command\ngenius docker package multi_dir_app acr \\\n--auth '{\"acr_username\": \"username\", \"acr_password\": \"password\", \"acr_login_server\": \"login_server\"}' \\\n--local_dir \"./app ./config\"\n
    "},{"location":"guides/packaging/#4-uploading-to-gcr-with-custom-base-image-packages-and-os-packages","title":"4. Uploading to GCR with Custom Base Image, Packages, and OS Packages","text":"

    This example demonstrates how to upload a Docker image to Google Container Registry (GCR) with a custom base image, Python packages, and OS packages.

    genius docker package my_ml_model gcr \\\n--auth '{\"gcr_key_file_path\": \"/path/to/keyfile.json\", \"gcr_repository\": \"repository\"}' \\\n--base_image \"tensorflow/tensorflow:latest-gpu\" \\\n--packages \"scipy keras\" \\\n--os_packages \"libsm6 libxext6 libxrender-dev\"\n
    "},{"location":"guides/packaging/#5-uploading-to-quay-with-all-customizations","title":"5. Uploading to Quay with All Customizations","text":"

    This example shows how to upload a Docker image to Quay with all available customizations like base image, working directory, local directory, Python packages, OS packages, and environment variables.

    genius docker package full_custom quay \\\n--auth '{\"quay_username\": \"username\", \"quay_password\": \"password\"}' \\\n--base_image \"alpine:latest\" \\\n--workdir \"/custom_app\" \\\n--local_dir \"./src\" \\\n--packages \"flask gunicorn\" \\\n--os_packages \"bash curl\" \\\n--env_vars '{\"FLASK_ENV\": \"production\", \"PORT\": \"8000\"}'\n
    "},{"location":"guides/pin/","title":"Bulding an AI pin","text":"

    Lets do an end to end project where we build an AI-pin to talk to a multi-modal language model.

    The system consists of two parts:

    1. Device: A low-power network device with camera, speaker and microphone
    2. Desktop: A central machine hosting the LLM, possibly a desktop computer running geniusrise
    ee58786d-2c61-4bb3-8eab-202074080965

    We start with a ESP32 based platform as there are many these days. Lets look at two of them:

    "},{"location":"guides/pin/#ttgo","title":"TTGO","text":"

    The TTGO T-Camera Plus is a unique ESP32 module featuring a built-in camera and display. It's designed for applications that require direct image capture and display capabilities without the need for external screens or cameras.

    • CPU: Dual-core Tensilica LX6 microprocessor up to 240 MHz
    • Memory: 520 KB SRAM, 4 MB PSRAM
    • Connectivity: Wi-Fi (802.11 b/g/n), Bluetooth (Classic and BLE)
    • Camera: OV2640 camera module, 2 Megapixels
    • Display: 1.3-inch OLED display
    • Extras: Fish-eye lens, optional MPU6050 module for motion sensing

    "},{"location":"guides/pin/#seeed-studio-xiao","title":"Seeed Studio XIAO","text":"

    Seeed Studio XIAO ESP32C3 is a mini but powerful module. It's part of the Seeed Studio XIAO series, known for its compact design and reliability in various IoT projects.

    • CPU: RISC-V single-core processor, up to 160 MHz
    • Memory: 400 KB SRAM, 4 MB Flash
    • Connectivity: Wi-Fi (802.11 b/g/n), Bluetooth 5 (LE)
    • I/O Pins: Rich set of peripherals including GPIOs, UART, SPI, I2C, and more.
    • Size: Ultra-small form factor suitable for wearable devices and compact projects

    "},{"location":"guides/pin/#peripherals","title":"Peripherals","text":"

    We used a bunch of these peripherals wherever the boards did not have them. We usually chose a platform with at least a screen and a camera included and added these peripherals to them.

    "},{"location":"guides/pin/#microphone","title":"Microphone","text":"
    • Model: INMP441 I2S
    • Features: High precision, omnidirectional, MEMS microphone module, Digital I2S interface
    • Usage: Ideal for high-quality audio input and voice command projects

    product-page.

    "},{"location":"guides/pin/#speaker","title":"Speaker","text":"
    • Model: SeeedStudio Grove Speaker
    • Features: Programmable, with built-in amplifier, capable of playing various tones and sounds
    • Usage: Suitable for projects requiring audio output like alarms, voice notifications, and music playback

    product-page

    "},{"location":"guides/pin/#touchscreen","title":"Touchscreen","text":"
    • Model: SeeedStudio Round Display for XIAO
    • Features: Touchscreen capability, round display, perfect for user interface projects
    • Usage: Excellent for compact and wearable devices requiring user interaction

    product-page

    "},{"location":"guides/pin/#connections","title":"Connections","text":"

    Now lets get connected. The following lists all connections, some soldering of headers may be required.

    "},{"location":"guides/pin/#seeed-studio-xiao-connections","title":"Seeed Studio XIAO Connections","text":"

    For the Seeed Studio XIAO, we'll connect a touchscreen display, an INMP441 I2S microphone, and a SeeedStudio Grove Speaker.

    "},{"location":"guides/pin/#touchscreen-display","title":"Touchscreen Display","text":"
    • Display Model: Seeed Studio Round Display for XIAO
    • Connection Type: SPI
    • Required Pins:
    • SCL (Serial Clock) to XIAO's SCL (GPIO18 for SPI clock)
    • SDA (Serial Data) to XIAO's SDA (GPIO19 for SPI MOSI)
    • RES (Reset) to any available GPIO pin (e.g., GPIO21) for display reset
    • DC (Data/Command) to any available GPIO pin (e.g., GPIO22) for data/command selection
    • CS (Chip Select) to any available GPIO pin (e.g., GPIO5) for SPI chip select

    Very easy to connect, xiao sits on the display.

    "},{"location":"guides/pin/#microphone-inmp441-i2s","title":"Microphone (INMP441 I2S)","text":"
    • Connection Type: I2S
    • Required Pins:
    • WS (Word Select/LRCLK) to GPIO23
    • SCK (Serial Clock) to GPIO18
    • SD (Serial Data) to GPIO19
    "},{"location":"guides/pin/#speaker-seeedstudio-grove","title":"Speaker (SeeedStudio Grove)","text":"
    • Connection Type: Digital I/O
    • Required Pins:
    • SIG to any PWM-capable GPIO pin (e.g., GPIO25) for audio signal
    • GND to GND
    "},{"location":"guides/pin/#ttgo-t-camera-plus-connections","title":"TTGO T-Camera Plus Connections","text":"

    For the TTGO T-Camera Plus, we're connecting an INMP441 I2S microphone and a SeeedStudio Grove Speaker since it already includes a camera and display.

    "},{"location":"guides/pin/#microphone-inmp441-i2s_1","title":"Microphone (INMP441 I2S)","text":"
    • Connection Type: I2S
    • Required Pins:
    • WS (Word Select/LRCLK) to GPIO32
    • SCK (Serial Clock) to GPIO14
    • SD (Serial Data) to GPIO27
    "},{"location":"guides/pin/#speaker-seeedstudio-grove_1","title":"Speaker (SeeedStudio Grove)","text":"
    • Connection Type: Digital I/O
    • Required Pins:
    • SIG to any PWM-capable GPIO pin (e.g., GPIO33) for audio signal
    • GND to GND
    "},{"location":"guides/pin/#general-tips","title":"General Tips","text":"
    • Power Supply: Ensure that all devices are powered appropriately. The XIAO and TTGO can be powered via USB or an external 3.3V power supply.
    • Common Ground: Make sure all components share a common ground connection.
    • Programming: Use the Arduino IDE or ESP-IDF for programming the ESP32 devices. Libraries specific to the peripherals (e.g., display, I2S microphone, and speaker) will be required.
    • I2S Library: For the INMP441 microphone, an I2S library suitable for ESP32 should be used to handle audio input.
    • Display Library: For the touchscreen display, a library compatible with the specific model will be needed for interfacing and graphics rendering.
    "},{"location":"guides/usage/","title":"Usage","text":"

    The easiest way to use geniusrise is to host an API over a desired model. Use one of the examples from text, vision or audio.

    "},{"location":"guides/usage/#run-on-local","title":"Run on Local","text":"

    Say, we are interested in running an API over a vision / multi-modal model such as bakLlava from huggingface:

    "},{"location":"guides/usage/#1-install-geniusrise-and-vision","title":"1. Install geniusrise and vision","text":"
    pip install torch\npip install geniusrise\npip install geniusrise-vision # vision multi-modal models\n# pip install geniusrise-text # text models, LLMs\n# pip install geniusrise-audio # audio models\n
    "},{"location":"guides/usage/#2-use-the-genius-cli-to-run-bakllava","title":"2. Use the genius cli to run bakLlava","text":"
    genius VisualQAAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\nlisten \\\n--args \\\nmodel_name=\"llava-hf/bakLlava-v1-hf\" \\\nmodel_class=\"LlavaForConditionalGeneration\" \\\nprocessor_class=\"AutoProcessor\" \\\ndevice_map=\"cuda:0\" \\\nuse_cuda=True \\\nprecision=\"bfloat16\" \\\nquantization=0 \\\nmax_memory=None \\\ntorchscript=False \\\ncompile=False \\\nflash_attention=False \\\nbetter_transformers=False \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n
    "},{"location":"guides/usage/#3-test-the-api","title":"3. Test the API","text":"
    MY_IMAGE=/path/to/test/image\n\n(base64 -w 0 $MY_IMAGE | awk '{print \"{\\\"image_base64\\\": \\\"\"$0\"\\\", \\\"question\\\": \\\"<image>\\nUSER: Whats the content of the image?\\nASSISTANT:\\\", \\\"do_sample\\\": false, \\\"max_new_tokens\\\": 128}\"}' > /tmp/image_payload.json)\ncurl -X POST http://localhost:3000/api/v1/answer_question \\\n-H \"Content-Type: application/json\" \\\n-u user:password \\\n-d @/tmp/image_payload.json | jq\n
    "},{"location":"guides/usage/#4-save-your-work","title":"4. Save your work","text":"

    Save what you did to be replicated later as genius.yml file:

    version: '1'\nbolts:\nmy_bolt:\nname: VisualQAAPI\nstate:\ntype: none\ninput:\ntype: batch\nargs:\ninput_folder: ./input\noutput:\ntype: batch\nargs:\noutput_folder: ./output\nmethod: listen\nargs:\nmodel_name: 'llava-hf/bakLlava-v1-hf'\nmodel_class: 'LlavaForConditionalGeneration'\nprocessor_class: 'AutoProcessor'\ndevice_map: 'cuda:0'\nuse_cuda: True\nprecision: 'bfloat16'\nquantization: 0\nmax_memory: None\ntorchscript: False\ncompile: False\nflash_attention: False\nbetter_transformers: False\nendpoint: '*'\nport: 3000\ncors_domain: 'http://localhost:3000'\nusername: 'user'\npassword: 'password'\n

    To later re-run the same, simply navigate to the directory of this file and do:

    genius rise\n
    "},{"location":"guides/usage/#advanced-usage","title":"Advanced Usage","text":"

    For having a set of APIs, say for voice -> text -> text -> voice pipeline, create a genius.yml file like this:

    version: \"1\"\nbolts:\nspeech_to_text_bolt:\nname: SpeechToTextAPI\nstate:\ntype: none\ninput:\ntype: batch\nargs:\ninput_folder: ./input\noutput:\ntype: batch\nargs:\noutput_folder: ./output\nmethod: listen\nargs:\nmodel_name: openai/whisper-large-v3\nmodel_class: WhisperForConditionalGeneration\nprocessor_class: AutoProcessor\nuse_cuda: true\nprecision: float\nquantization: 0\ndevice_map: cuda:0\nmax_memory: null\ntorchscript: false\ncompile: false\nflash_attention: False\nbetter_transformers: False\nendpoint: \"0.0.0.0\"\nport: 3001\ncors_domain: http://localhost:3001\nusername: user\npassword: password\nchat_bolt:\nname: InstructionAPI\nstate:\ntype: none\ninput:\ntype: batch\nargs:\ninput_folder: ./input\noutput:\ntype: batch\nargs:\noutput_folder: ./output\nmethod: listen\nargs:\nmodel_name: TheBloke/Mistral-7B-Instruct-v0.1-GPTQ:gptq-4bit-32g-actorder_True\nmodel_class: AutoModelForCausalLM\ntokenizer_class: AutoTokenizer\nuse_cuda: true\nprecision: float16\nquantization: 0\ndevice_map: auto\nmax_memory: null\ntorchscript: false\ncompile: false\nflash_attention: False\nbetter_transformers: False\nawq_enabled: False\nendpoint: \"0.0.0.0\"\nport: 3002\ncors_domain: http://localhost:3002\nusername: user\npassword: password\ntext_to_speech_bolt:\nname: TextToSpeechAPI\nstate:\ntype: none\ninput:\ntype: batch\nargs:\ninput_folder: ./input\noutput:\ntype: batch\nargs:\noutput_folder: ./output\nmethod: listen\nargs:\nmodel_name: suno/bark\nmodel_class: BarkModel\nprocessor_class: BarkProcessor\nuse_cuda: true\nprecision: float32\nquantization: 0\ndevice_map: cuda:0\nmax_memory: null\ntorchscript: false\ncompile: false\nflash_attention: False\nbetter_transformers: False\nendpoint: \"0.0.0.0\"\nport: 3003\ncors_domain: http://localhost:3003\nusername: user\npassword: password\n

    and run:

    genius rise\n

    (like docker-compose etc).

    then try it out:

    # Step 1: Transcribe audio file\nTRANSCRIPTION=$(echo $(base64 -w 0 sample.mp3) | awk '{print \"{\\\"audio_file\\\": \\\"\"$0\"\\\", \\\"model_sampling_rate\\\": 16000}\"}' | \\\ncurl -s -X POST http://localhost:3001/api/v1/transcribe \\\n-H \"Content-Type: application/json\" \\\n-u user:password \\\n-d @- | jq -r '.transcriptions.transcription')\necho \"Transcription: $TRANSCRIPTION\"\n# Step 2: Send a prompt to the text completion API\nPROMPT_JSON=$(jq -n --arg prompt \"$TRANSCRIPTION\" '{\"prompt\": $prompt, \"decoding_strategy\": \"generate\", \"max_new_tokens\": 100, \"do_sample\": true, \"pad_token_id\": 0}')\nCOMPLETION=$(echo $PROMPT_JSON | curl -s -X POST \"http://localhost:3002/api/v1/complete\" \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d @- | jq -r '.completion')\necho \"Completion: $COMPLETION\"\n# Step 3: Synthesize speech from text and play the output\nSYNTH_JSON=$(jq -n --arg text \"$COMPLETION\" '{\"text\": $text, \"output_type\": \"mp3\", \"voice_preset\": \"v2/en_speaker_6\"}')\ncurl -s -X POST \"http://localhost:3003/api/v1/synthesize\" \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d \"$SYNTH_JSON\" | jq -r '.audio_file' | base64 -d > output.mp3\n\nvlc output.mp3 &>/dev/null\n
    "},{"location":"guides/usage/#run-on-remote","title":"Run on Remote","text":"

    If we are running on a remote machine instead, perhaps we want to use our own model stored in S3?

    genius VisualQAAPI rise \\\nbatch \\\n--input_s3_bucket my-s3-bucket \\\n--input_s3_folder model \\\nbatch \\\n--output_s3_bucket my-s3-bucket \\\n--output_s3_folder output-<partition/keys> \\\nnone \\\nlisten \\\n--args \\\nmodel_name=\"local\" \\\nmodel_class=\"LlavaForConditionalGeneration\" \\\nprocessor_class=\"AutoProcessor\" \\\ndevice_map=\"cuda:0\" \\\nuse_cuda=True \\\nprecision=\"bfloat16\" \\\nquantization=0 \\\nmax_memory=None \\\ntorchscript=False \\\ncompile=False \\\nflash_attention=False \\\nbetter_transformers=False \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n

    or in YAML:

    version: '1'\nbolts:\nmy_bolt:\nname: VisualQAAPI\nstate:\ntype: none\ninput:\ntype: batch\nargs:\nbucket: my-s3-bucket\nfolder: model\noutput:\ntype: batch\nargs:\nbucket: my-s3-bucket\nfolder: output-<partition/keys>\nmethod: listen\nargs:\nmodel_name: 'llava-hf/bakLlava-v1-hf'\nmodel_class: 'LlavaForConditionalGeneration'\nprocessor_class: 'AutoProcessor'\ndevice_map: 'cuda:0'\nuse_cuda: True\nprecision: 'bfloat16'\nquantization: 0\nmax_memory: None\ntorchscript: False\ncompile: False\nflash_attention: False\nbetter_transformers: False\nendpoint: '*'\nport: 3000\ncors_domain: 'http://localhost:3000'\nusername: 'user'\npassword: 'password'\n
    "},{"location":"guides/usage/#docker-packaging","title":"Docker packaging","text":"

    Perhaps we also want to now use docker to package?

    Refer Packaging

    "},{"location":"guides/usage/#to-production","title":"To Production","text":"

    And finally deploy as a replicaset on a kubernetes cluster for going to prod!

    Refer Deployment

    "},{"location":"guides/usage/#observability","title":"Observability","text":"

    We have prometheus integrated, just integrate with your prometheus cluster! Prometheus runs on PROMETHEUS_PORT ENV variable or 8282 by default.

    "},{"location":"guides/yaml/","title":"YAML Structure and Operations","text":"

    The YAML file for Geniusrise is called Geniusfile.yaml and it has the following structure:

    version: 1\nspouts:\n<spout_name>:\nname: <spout_name>\nmethod: <method_name>\nargs:\n<key>: <value>\noutput:\ntype: <output_type>\nargs:\n<key>: <value>\nstate:\ntype: <state_type>\nargs:\n<key>: <value>\ndeploy:\ntype: <deploy_type>\nargs:\n<key>: <value>\nbolts:\n<bolt_name>:\nname: <bolt_name>\nmethod: <method_name>\nargs:\n<key>: <value>\ninput:\ntype: <input_type>\nargs:\n<key>: <value>\noutput:\ntype: <output_type>\nargs:\n<key>: <value>\nstate:\ntype: <state_type>\nargs:\n<key>: <value>\ndeploy:\ntype: <deploy_type>\nargs:\n<key>: <value>\n
    "},{"location":"guides/yaml/#example-yaml-files","title":"Example YAML Files","text":""},{"location":"guides/yaml/#example-1-basic-spout-and-bolt","title":"Example 1: Basic Spout and Bolt","text":"
    version: 1\nspouts:\nTestSpout:\nname: TestSpout\nmethod: listen\nargs:\nport: 8080\noutput:\ntype: batch\nargs:\nbucket: geniusrise-test\nfolder: train\nstate:\ntype: none\ndeploy:\ntype: k8s\nargs:\nkind: job\nname: coretest\nnamespace: geniusrise\nimage: \"geniusrise/geniusrise-core\"\nbolts:\nTestBolt:\nname: TestBolt\nmethod: process\nargs:\nfactor: 2\ninput:\ntype: batch\nargs:\nbucket: geniusrise-test\nfolder: train\noutput:\ntype: batch\nargs:\nbucket: geniusrise-test\nfolder: output\nstate:\ntype: none\ndeploy:\ntype: k8s\nargs:\nkind: job\nname: coretest\nnamespace: geniusrise\nimage: \"geniusrise/geniusrise-core\"\n
    "},{"location":"guides/yaml/#example-2-spout-with-redis-state","title":"Example 2: Spout with Redis State","text":"
    version: 1\nspouts:\nRedisSpout:\nname: RedisSpout\nmethod: listen\nargs:\nport: 8080\noutput:\ntype: streaming\nargs:\noutput_topic: geniusrise-stream\nkafka_servers: \"localhost:9092\"\nstate:\ntype: redis\nargs:\nredis_host: \"localhost\"\nredis_port: 6379\nredis_db: 0\ndeploy:\ntype: k8s\nargs:\nkind: service\nname: redisspout\nnamespace: geniusrise\nimage: \"geniusrise/geniusrise-core\"\n
    "},{"location":"guides/yaml/#example-3-bolt-with-postgres-state-and-ecs-deployment","title":"Example 3: Bolt with Postgres State and ECS Deployment","text":"
    version: 1\nbolts:\nPostgresBolt:\nname: PostgresBolt\nmethod: process\nargs:\nfactor: 2\ninput:\ntype: streaming\nargs:\ninput_topic: geniusrise-stream\nkafka_servers: \"localhost:9092\"\noutput:\ntype: batch\nargs:\nbucket: geniusrise-test\nfolder: output\nstate:\ntype: postgres\nargs:\npostgres_host: \"localhost\"\npostgres_port: 5432\npostgres_user: \"postgres\"\npostgres_password: \"password\"\npostgres_database: \"geniusrise\"\npostgres_table: \"state_table\"\ndeploy:\ntype: ecs\nargs:\nname: postgresbolt\naccount_id: \"123456789012\"\ncluster: \"geniusrise-cluster\"\nsubnet_ids: [\"subnet-abc123\", \"subnet-def456\"]\nsecurity_group_ids: [\"sg-abc123\"]\nlog_group: \"geniusrise-logs\"\nimage: \"geniusrise/geniusrise-core\"\n
    "},{"location":"guides/yaml/#example-4-spout-with-s3-state-and-lambda-deployment","title":"Example 4: Spout with S3 State and Lambda Deployment","text":"
    version: 1\nspouts:\nS3Spout:\nname: S3Spout\nmethod: listen\nargs:\ns3_bucket: geniusrise-data\ns3_prefix: input/\noutput:\ntype: streaming\nargs:\noutput_topic: geniusrise-s3-stream\nkafka_servers: \"localhost:9092\"\nstate:\ntype: s3\nargs:\nstate_bucket: geniusrise-state\nstate_prefix: s3spout/\ndeploy:\ntype: lambda\nargs:\nfunction_name: S3SpoutFunction\nrole_arn: arn:aws:iam::123456789012:role/execution_role\nruntime: python3.8\nhandler: s3spout.handler\n
    "},{"location":"guides/yaml/#example-5-bolt-with-dynamodb-state-and-fargate-deployment","title":"Example 5: Bolt with DynamoDB State and Fargate Deployment","text":"
    version: 1\nbolts:\nDynamoBolt:\nname: DynamoBolt\nmethod: process\nargs:\noperation: multiply\nfactor: 3\ninput:\ntype: streaming\nargs:\ninput_topic: geniusrise-s3-stream\nkafka_servers: \"localhost:9092\"\noutput:\ntype: batch\nargs:\nbucket: geniusrise-output\nfolder: dynamo/\nstate:\ntype: dynamodb\nargs:\ntable_name: DynamoStateTable\nregion: us-east-1\ndeploy:\ntype: fargate\nargs:\ncluster: geniusrise-fargate\ntask_definition: DynamoBoltTask\nlaunch_type: FARGATE\nsubnets: [\"subnet-xyz789\", \"subnet-uvw456\"]\n
    "},{"location":"guides/yaml/#example-6-spout-and-bolt-with-azure-blob-storage-and-azure-functions","title":"Example 6: Spout and Bolt with Azure Blob Storage and Azure Functions","text":"
    version: 1\nspouts:\nAzureBlobSpout:\nname: AzureBlobSpout\nmethod: listen\nargs:\ncontainer_name: geniusrise-input\nstorage_account: geniusriseaccount\nstorage_key: \"your_storage_key_here\"\noutput:\ntype: streaming\nargs:\noutput_topic: geniusrise-azure-stream\nkafka_servers: \"localhost:9092\"\nstate:\ntype: azure_blob\nargs:\ncontainer_name: geniusrise-state\nstorage_account: geniusriseaccount\nstorage_key: \"your_storage_key_here\"\ndeploy:\ntype: azure_function\nargs:\nfunction_name: AzureBlobSpoutFunction\nresource_group: geniusrise-rg\nstorage_account: geniusriseaccount\nplan: Consumption\nbolts:\nAzureBlobBolt:\nname: AzureBlobBolt\nmethod: process\nargs:\noperation: add\nvalue: 5\ninput:\ntype: streaming\nargs:\ninput_topic: geniusrise-azure-stream\nkafka_servers: \"localhost:9092\"\noutput:\ntype: azure_blob\nargs:\ncontainer_name: geniusrise-output\nstorage_account: geniusriseaccount\nstorage_key: \"your_storage_key_here\"\nstate:\ntype: azure_blob\nargs:\ncontainer_name: geniusrise-state\nstorage_account: geniusriseaccount\nstorage_key: \"your_storage_key_here\"\ndeploy:\ntype: azure_function\nargs:\nfunction_name: AzureBlobBoltFunction\nresource_group: geniusrise-rg\nstorage_account: geniusriseaccount\nplan: Consumption\n
    "},{"location":"guides/yaml/#running-and-deploying-yaml-files","title":"Running and Deploying YAML Files","text":"

    To run the YAML file:

    genius rise\n

    To deploy the YAML file:

    genius rise up\n
    "},{"location":"guides/yaml/#managing-kubernetes-deployments","title":"Managing Kubernetes Deployments","text":"

    You can manage Kubernetes deployments using the genius CLI. Here are some example commands:

    # Show pods in a namespace\ngenius pod show --namespace geniusrise --context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise\n\n# Scale a deployment\ngenius pod scale --namespace geniusrise --context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise --name testspout --replicas 3\n# Delete a deployment\ngenius pod delete --namespace geniusrise --context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise --name testspout\n
    "},{"location":"guides/yaml/#managing-ecs-deployments","title":"Managing ECS Deployments","text":"

    You can manage ECS deployments using the genius CLI. Here are some example commands:

    # Show tasks in a cluster\ngenius ecs show --cluster geniusrise-cluster --account_id 123456789012\n# Scale a service\ngenius ecs scale --cluster geniusrise-cluster --account_id 123456789012 --name postgresbolt --desired_count 3\n# Delete a service\ngenius ecs delete --cluster geniusrise-cluster --account_id 123456789012 --name postgresbolt\n
    "},{"location":"listeners/activemq/","title":"ActiveMQ","text":"

    Spout for ActiveMQ

    Bases: Spout

    "},{"location":"listeners/activemq/#activemq.ActiveMQ.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the ActiveMQ class.

    Parameters:

    Name Type Description Default output StreamingOutput

    An instance of the StreamingOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"listeners/activemq/#activemq.ActiveMQ.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius ActiveMQ rise \\\nstreaming \\\n--output_kafka_topic activemq_test \\\n--output_kafka_cluster_connection_string localhost:9094 \\\nnone \\\nlisten \\\n--args host=localhost port=61613 destination=my_queue\n
    "},{"location":"listeners/activemq/#activemq.ActiveMQ.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_activemq_spout:\nname: \"ActiveMQ\"\nmethod: \"listen\"\nargs:\nhost: \"localhost\"\nport: 61613\ndestination: \"my_queue\"\noutput:\ntype: \"streaming\"\nargs:\noutput_topic: \"activemq_test\"\nkafka_servers: \"localhost:9094\"\n
    "},{"location":"listeners/activemq/#activemq.ActiveMQ.listen","title":"listen(host, port, destination, username=None, password=None)","text":"

    \ud83d\udcd6 Start listening for data from the ActiveMQ server.

    Parameters:

    Name Type Description Default host str

    The ActiveMQ server host.

    required port int

    The ActiveMQ server port.

    required destination str

    The ActiveMQ destination (queue or topic).

    required username Optional[str]

    The username for authentication. Defaults to None.

    None password Optional[str]

    The password for authentication. Defaults to None.

    None

    Raises:

    Type Description Exception

    If unable to connect to the ActiveMQ server.

    "},{"location":"listeners/amqp/","title":"ActiveMQ","text":"

    Spout for AMQP

    Bases: Spout

    "},{"location":"listeners/amqp/#amqp.RabbitMQ.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the RabbitMQ class.

    Parameters:

    Name Type Description Default output StreamingOutput

    An instance of the StreamingOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"listeners/amqp/#amqp.RabbitMQ.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius RabbitMQ rise \\\nstreaming \\\n--output_kafka_topic rabbitmq_test \\\n--output_kafka_cluster_connection_string localhost:9094 \\\nnone \\\nlisten \\\n--args queue_name=my_queue host=localhost\n
    "},{"location":"listeners/amqp/#amqp.RabbitMQ.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_rabbitmq_spout:\nname: \"RabbitMQ\"\nmethod: \"listen\"\nargs:\nqueue_name: \"my_queue\"\nhost: \"localhost\"\noutput:\ntype: \"streaming\"\nargs:\noutput_topic: \"rabbitmq_test\"\nkafka_servers: \"localhost:9094\"\n
    "},{"location":"listeners/amqp/#amqp.RabbitMQ.listen","title":"listen(queue_name, host='localhost', username=None, password=None)","text":"

    \ud83d\udcd6 Start listening for data from the RabbitMQ server.

    Parameters:

    Name Type Description Default queue_name str

    The RabbitMQ queue name to listen to.

    required host str

    The RabbitMQ server host. Defaults to \"localhost\".

    'localhost' username Optional[str]

    The username for authentication. Defaults to None.

    None password Optional[str]

    The password for authentication. Defaults to None.

    None

    Raises:

    Type Description Exception

    If unable to connect to the RabbitMQ server.

    "},{"location":"listeners/grpc/","title":"GRPC","text":"

    Spout for gRPC

    Bases: Spout

    "},{"location":"listeners/grpc/#grpc.Grpc.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Grpc class.

    Parameters:

    Name Type Description Default output StreamingOutput

    An instance of the StreamingOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"listeners/grpc/#grpc.Grpc.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Grpc rise \\\nstreaming \\\n--output_kafka_topic grpc_test \\\n--output_kafka_cluster_connection_string localhost:9094 \\\nnone \\\nlisten \\\n--args server_address=localhost:50051 request_data=my_request syntax=proto3\n
    "},{"location":"listeners/grpc/#grpc.Grpc.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_grpc_spout:\nname: \"Grpc\"\nmethod: \"listen\"\nargs:\nserver_address: \"localhost:50051\"\nrequest_data: \"my_request\"\nsyntax: \"proto3\"\noutput:\ntype: \"streaming\"\nargs:\noutput_topic: \"grpc_test\"\nkafka_servers: \"localhost:9094\"\n
    "},{"location":"listeners/grpc/#grpc.Grpc.listen","title":"listen(server_address, request_data, syntax, certificate=None, client_key=None, client_cert=None)","text":"

    \ud83d\udcd6 Start listening for data from the gRPC server.

    Parameters:

    Name Type Description Default server_address str

    The address of the gRPC server.

    required request_data str

    Data to send in the request.

    required syntax str

    The syntax to be used (e.g., \"proto3\").

    required certificate Optional[str]

    Optional server certificate for SSL/TLS.

    None client_key Optional[str]

    Optional client key for SSL/TLS.

    None client_cert Optional[str]

    Optional client certificate for SSL/TLS.

    None

    Raises:

    Type Description grpc.RpcError

    If there is an error while processing gRPC messages.

    "},{"location":"listeners/http_polling/","title":"HTTP polling","text":"

    Spout for HTTP polling

    Bases: Spout

    "},{"location":"listeners/http_polling/#http_polling.RESTAPIPoll.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the RESTAPIPoll class.

    Parameters:

    Name Type Description Default output StreamingOutput

    An instance of the StreamingOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"listeners/http_polling/#http_polling.RESTAPIPoll.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius RESTAPIPoll rise \\\nstreaming \\\n--output_kafka_topic restapi_test \\\n--output_kafka_cluster_connection_string localhost:9094 \\\nnone \\\nlisten \\\n--args url=https://api.example.com method=GET interval=60\n
    "},{"location":"listeners/http_polling/#http_polling.RESTAPIPoll.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_restapi_poll:\nname: \"RESTAPIPoll\"\nmethod: \"listen\"\nargs:\nurl: \"https://api.example.com\"\nmethod: \"GET\"\ninterval: 60\noutput:\ntype: \"streaming\"\nargs:\noutput_topic: \"restapi_test\"\nkafka_servers: \"localhost:9094\"\n
    "},{"location":"listeners/http_polling/#http_polling.RESTAPIPoll.listen","title":"listen(url, method, interval=60, body=None, headers=None, params=None)","text":"

    Start polling the REST API for data.

    Parameters:

    Name Type Description Default url str

    The API endpoint.

    required method str

    The HTTP method (GET, POST, etc.).

    required interval int

    The polling interval in seconds. Defaults to 60.

    60 body Optional[Dict]

    The request body. Defaults to None.

    None headers Optional[Dict[str, str]]

    The request headers. Defaults to None.

    None params Optional[Dict[str, str]]

    The request query parameters. Defaults to None.

    None"},{"location":"listeners/http_polling/#http_polling.RESTAPIPoll.poll_api","title":"poll_api(url, method, body=None, headers=None, params=None)","text":"

    \ud83d\udcd6 Start polling the REST API for data.

    Parameters:

    Name Type Description Default url str

    The API endpoint.

    required method str

    The HTTP method (GET, POST, etc.).

    required interval int

    The polling interval in seconds.

    required body Optional[Dict]

    The request body. Defaults to None.

    None headers Optional[Dict[str, str]]

    The request headers. Defaults to None.

    None params Optional[Dict[str, str]]

    The request query parameters. Defaults to None.

    None

    Raises:

    Type Description Exception

    If unable to connect to the REST API server.

    "},{"location":"listeners/kafka/","title":"Kafka","text":"

    Spout for Kafka

    Bases: Spout

    "},{"location":"listeners/kafka/#kafka.Kafka.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Kafka class.

    Parameters:

    Name Type Description Default output StreamingOutput

    An instance of the StreamingOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"listeners/kafka/#kafka.Kafka.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Kafka rise \\\nstreaming \\\n--output_kafka_topic kafka_test \\\n--output_kafka_cluster_connection_string localhost:9094 \\\nnone \\\nlisten \\\n--args topic=my_topic group_id=my_group\n
    "},{"location":"listeners/kafka/#kafka.Kafka.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_kafka_spout:\nname: \"Kafka\"\nmethod: \"listen\"\nargs:\ntopic: \"my_topic\"\ngroup_id: \"my_group\"\noutput:\ntype: \"streaming\"\nargs:\noutput_topic: \"kafka_test\"\nkafka_servers: \"localhost:9094\"\n
    "},{"location":"listeners/kafka/#kafka.Kafka.listen","title":"listen(topic, group_id, bootstrap_servers='localhost:9092', username=None, password=None)","text":"

    \ud83d\udcd6 Start listening for data from the Kafka topic.

    Parameters:

    Name Type Description Default topic str

    The Kafka topic to listen to.

    required group_id str

    The Kafka consumer group ID.

    required bootstrap_servers str

    The Kafka bootstrap servers. Defaults to \"localhost:9092\".

    'localhost:9092' username Optional[str]

    The username for SASL/PLAIN authentication. Defaults to None.

    None password Optional[str]

    The password for SASL/PLAIN authentication. Defaults to None.

    None

    Raises:

    Type Description Exception

    If unable to connect to the Kafka server.

    "},{"location":"listeners/kinesis/","title":"Kinesis","text":"

    Spout for Kinesis

    Bases: Spout

    "},{"location":"listeners/kinesis/#kinesis.Kinesis.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Kinesis class.

    Parameters:

    Name Type Description Default output StreamingOutput

    An instance of the StreamingOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"listeners/kinesis/#kinesis.Kinesis.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Kinesis rise \\\nstreaming \\\n--output_kafka_topic kinesis_test \\\n--output_kafka_cluster_connection_string localhost:9094 \\\nnone \\\nlisten \\\n--args stream_name=my_stream shard_id=shardId-000000000000\n
    "},{"location":"listeners/kinesis/#kinesis.Kinesis.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_kinesis_spout:\nname: \"Kinesis\"\nmethod: \"listen\"\nargs:\nstream_name: \"my_stream\"\nshard_id: \"shardId-000000000000\"\noutput:\ntype: \"streaming\"\nargs:\noutput_topic: \"kinesis_test\"\nkafka_servers: \"localhost:9094\"\n
    "},{"location":"listeners/kinesis/#kinesis.Kinesis.listen","title":"listen(stream_name, shard_id='shardId-000000000000', region_name=None, aws_access_key_id=None, aws_secret_access_key=None)","text":"

    \ud83d\udcd6 Start listening for data from the Kinesis stream.

    Parameters:

    Name Type Description Default stream_name str

    The name of the Kinesis stream.

    required shard_id str

    The shard ID to read from. Defaults to \"shardId-000000000000\".

    'shardId-000000000000' region_name str

    The AWS region name.

    None aws_access_key_id str

    AWS access key ID for authentication.

    None aws_secret_access_key str

    AWS secret access key for authentication.

    None

    Raises:

    Type Description Exception

    If there is an error while processing Kinesis records.

    "},{"location":"listeners/mqtt/","title":"MQTT","text":"

    Spout for MQTT

    Bases: Spout

    "},{"location":"listeners/mqtt/#mqtt.MQTT.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the MQTT class.

    Parameters:

    Name Type Description Default output StreamingOutput

    An instance of the StreamingOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"listeners/mqtt/#mqtt.MQTT.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius MQTT rise \\\nstreaming \\\n--output_kafka_topic mqtt_test \\\n--output_kafka_cluster_connection_string localhost:9094 \\\nnone \\\nlisten \\\n--args host=localhost port=1883 topic=my_topic\n
    "},{"location":"listeners/mqtt/#mqtt.MQTT.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_mqtt_spout:\nname: \"MQTT\"\nmethod: \"listen\"\nargs:\nhost: \"localhost\"\nport: 1883\ntopic: \"my_topic\"\noutput:\ntype: \"streaming\"\nargs:\noutput_topic: \"mqtt_test\"\nkafka_servers: \"localhost:9094\"\n
    "},{"location":"listeners/mqtt/#mqtt.MQTT.listen","title":"listen(host='localhost', port=1883, topic='#', username=None, password=None)","text":"

    Start listening for data from the MQTT broker.

    Parameters:

    Name Type Description Default host str

    The MQTT broker host. Defaults to \"localhost\".

    'localhost' port int

    The MQTT broker port. Defaults to 1883.

    1883 topic str

    The MQTT topic to subscribe to. Defaults to \"#\".

    '#' username Optional[str]

    The username for authentication. Defaults to None.

    None password Optional[str]

    The password for authentication. Defaults to None.

    None"},{"location":"listeners/quic/","title":"Quic","text":"

    Spout for Quic

    Bases: Spout

    "},{"location":"listeners/quic/#quic.Quic.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Quic class.

    Parameters:

    Name Type Description Default output StreamingOutput

    An instance of the StreamingOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"listeners/quic/#quic.Quic.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Quic rise \\\nstreaming \\\n--output_kafka_topic quic_test \\\n--output_kafka_cluster_connection_string localhost:9094 \\\nnone \\\nlisten \\\n--args cert_path=/path/to/cert.pem key_path=/path/to/key.pem host=localhost port=4433\n
    "},{"location":"listeners/quic/#quic.Quic.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_quic_spout:\nname: \"Quic\"\nmethod: \"listen\"\nargs:\ncert_path: \"/path/to/cert.pem\"\nkey_path: \"/path/to/key.pem\"\nhost: \"localhost\"\nport: 4433\noutput:\ntype: \"streaming\"\nargs:\noutput_topic: \"quic_test\"\nkafka_servers: \"localhost:9094\"\n
    "},{"location":"listeners/quic/#quic.Quic.handle_stream_data","title":"handle_stream_data(data, stream_id) async","text":"

    Handle incoming stream data.

    :param data: The incoming data. :param stream_id: The ID of the stream.

    "},{"location":"listeners/quic/#quic.Quic.listen","title":"listen(cert_path, key_path, host='localhost', port=4433)","text":"

    \ud83d\udcd6 Start listening for data from the QUIC server.

    Parameters:

    Name Type Description Default cert_path str

    Path to the certificate file.

    required key_path str

    Path to the private key file.

    required host str

    Hostname to listen on. Defaults to \"localhost\".

    'localhost' port int

    Port to listen on. Defaults to 4433.

    4433

    Raises:

    Type Description Exception

    If unable to start the QUIC server.

    "},{"location":"listeners/redis_pubsub/","title":"Redis pubsub","text":"

    Spout for Redis pubsub

    Bases: Spout

    "},{"location":"listeners/redis_pubsub/#redis_pubsub.RedisPubSub.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the RedisPubSub class.

    Parameters:

    Name Type Description Default output StreamingOutput

    An instance of the StreamingOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"listeners/redis_pubsub/#redis_pubsub.RedisPubSub.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius RedisPubSub rise \\\nstreaming \\\n--output_kafka_topic redis_test \\\n--output_kafka_cluster_connection_string localhost:9094 \\\nnone \\\nlisten \\\n--args channel=my_channel host=localhost port=6379 db=0\n
    "},{"location":"listeners/redis_pubsub/#redis_pubsub.RedisPubSub.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_redis_spout:\nname: \"RedisPubSub\"\nmethod: \"listen\"\nargs:\nchannel: \"my_channel\"\nhost: \"localhost\"\nport: 6379\ndb: 0\noutput:\ntype: \"streaming\"\nargs:\noutput_topic: \"redis_test\"\nkafka_servers: \"localhost:9094\"\n
    "},{"location":"listeners/redis_pubsub/#redis_pubsub.RedisPubSub.listen","title":"listen(channel, host='localhost', port=6379, db=0, password=None)","text":"

    \ud83d\udcd6 Start listening for data from the Redis Pub/Sub channel.

    Parameters:

    Name Type Description Default channel str

    The Redis Pub/Sub channel to listen to.

    required host str

    The Redis server host. Defaults to \"localhost\".

    'localhost' port int

    The Redis server port. Defaults to 6379.

    6379 db int

    The Redis database index. Defaults to 0.

    0 password Optional[str]

    The password for authentication. Defaults to None.

    None

    Raises:

    Type Description Exception

    If unable to connect to the Redis server.

    "},{"location":"listeners/redis_streams/","title":"Redis streams","text":"

    Spout for Redis streams

    Bases: Spout

    "},{"location":"listeners/redis_streams/#redis_streams.RedisStream.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the RedisStream class.

    Parameters:

    Name Type Description Default output StreamingOutput

    An instance of the StreamingOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"listeners/redis_streams/#redis_streams.RedisStream.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius RedisStream rise \\\nstreaming \\\n--output_kafka_topic redis_stream_test \\\n--output_kafka_cluster_connection_string localhost:9094 \\\nnone \\\nlisten \\\n--args stream_key=my_stream host=localhost port=6379 db=0\n
    "},{"location":"listeners/redis_streams/#redis_streams.RedisStream.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_redis_stream:\nname: \"RedisStream\"\nmethod: \"listen\"\nargs:\nstream_key: \"my_stream\"\nhost: \"localhost\"\nport: 6379\ndb: 0\noutput:\ntype: \"streaming\"\nargs:\noutput_topic: \"redis_stream_test\"\nkafka_servers: \"localhost:9094\"\n
    "},{"location":"listeners/redis_streams/#redis_streams.RedisStream.listen","title":"listen(stream_key, host='localhost', port=6379, db=0, password=None)","text":"

    \ud83d\udcd6 Start the asyncio event loop to listen for data from the Redis stream.

    Parameters:

    Name Type Description Default stream_key str

    The Redis stream key to listen to.

    required host str

    The Redis server host. Defaults to \"localhost\".

    'localhost' port int

    The Redis server port. Defaults to 6379.

    6379 db int

    The Redis database index. Defaults to 0.

    0 password Optional[str]

    The password for authentication. Defaults to None.

    None"},{"location":"listeners/sns/","title":"SNS","text":"

    Spout for SNS

    Bases: Spout

    "},{"location":"listeners/sns/#sns.SNS.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the SNS class.

    Parameters:

    Name Type Description Default output StreamingOutput

    An instance of the StreamingOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"listeners/sns/#sns.SNS.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius SNS rise \\\nstreaming \\\n--output_kafka_topic sns_test \\\n--output_kafka_cluster_connection_string localhost:9094 \\\nnone \\\nlisten\n
    "},{"location":"listeners/sns/#sns.SNS.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_sns_spout:\nname: \"SNS\"\nmethod: \"listen\"\noutput:\ntype: \"streaming\"\nargs:\noutput_topic: \"sns_test\"\nkafka_servers: \"localhost:9094\"\n
    "},{"location":"listeners/sns/#sns.SNS.listen","title":"listen()","text":"

    \ud83d\udcd6 Start the asyncio event loop to listen for data from AWS SNS.

    "},{"location":"listeners/socket.io/","title":"Socket.io","text":"

    Spout for socket.io

    Bases: Spout

    "},{"location":"listeners/socket.io/#socketio.SocketIo.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the SocketIo class.

    Parameters:

    Name Type Description Default output StreamingOutput

    An instance of the StreamingOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"listeners/socket.io/#socketio.SocketIo.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius SocketIo rise \\\nstreaming \\\n--output_kafka_topic socketio_test \\\n--output_kafka_cluster_connection_string localhost:9094 \\\nnone \\\nlisten \\\n--args url=http://localhost:3000 namespace=/chat\n
    "},{"location":"listeners/socket.io/#socketio.SocketIo.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_socketio_spout:\nname: \"SocketIo\"\nmethod: \"listen\"\nargs:\nurl: \"http://localhost:3000\"\nnamespace: \"/chat\"\noutput:\ntype: \"streaming\"\nargs:\noutput_topic: \"socketio_test\"\nkafka_servers: \"localhost:9094\"\n
    "},{"location":"listeners/socket.io/#socketio.SocketIo.listen","title":"listen(url, namespace=None, event='message', auth=None)","text":"

    \ud83d\udcd6 Start listening for data from the Socket.io server.

    Parameters:

    Name Type Description Default url str

    The Socket.io server URL.

    required namespace Optional[str]

    The Socket.io namespace. Defaults to None.

    None event str

    The Socket.io event to listen to. Defaults to \"message\".

    'message' auth Optional[dict]

    Authentication dictionary. Defaults to None.

    None

    Raises:

    Type Description Exception

    If unable to connect to the Socket.io server.

    "},{"location":"listeners/sqs/","title":"SQS","text":"

    Spout for SQS

    Bases: Spout

    "},{"location":"listeners/sqs/#sqs.SQS.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the SQS class.

    Parameters:

    Name Type Description Default output StreamingOutput

    An instance of the StreamingOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"listeners/sqs/#sqs.SQS.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius SQS rise \\\nstreaming \\\n--output_kafka_topic sqs_test \\\n--output_kafka_cluster_connection_string localhost:9094 \\\nnone \\\nlisten \\\n--args queue_url=https://sqs.us-east-1.amazonaws.com/123456789012/my-queue batch_size=10 batch_interval=10\n
    "},{"location":"listeners/sqs/#sqs.SQS.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_sqs_spout:\nname: \"SQS\"\nmethod: \"listen\"\nargs:\nqueue_url: \"https://sqs.us-east-1.amazonaws.com/123456789012/my-queue\"\nbatch_size: 10\nbatch_interval: 10\noutput:\ntype: \"streaming\"\nargs:\noutput_topic: \"sqs_test\"\nkafka_servers: \"localhost:9094\"\n
    "},{"location":"listeners/sqs/#sqs.SQS.listen","title":"listen(queue_url, batch_size=10, batch_interval=10)","text":"

    \ud83d\udcd6 Start listening for new messages in the SQS queue.

    Parameters:

    Name Type Description Default queue_url str

    The URL of the SQS queue to listen to.

    required batch_size int

    The maximum number of messages to receive in each batch. Defaults to 10.

    10 batch_interval int

    The time in seconds to wait for a new message if the queue is empty. Defaults to 10.

    10

    Raises:

    Type Description Exception

    If unable to connect to the SQS service.

    "},{"location":"listeners/udp/","title":"UDP","text":"

    Spout for UDP

    Bases: Spout

    "},{"location":"listeners/udp/#udp.Udp.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Udp class.

    Parameters:

    Name Type Description Default output StreamingOutput

    An instance of the StreamingOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"listeners/udp/#udp.Udp.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Udp rise \\\nstreaming \\\n--output_kafka_topic udp_test \\\n--output_kafka_cluster_connection_string localhost:9094 \\\nnone \\\nlisten \\\n--args host=localhost port=12345\n
    "},{"location":"listeners/udp/#udp.Udp.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_udp_spout:\nname: \"Udp\"\nmethod: \"listen\"\nargs:\nhost: \"localhost\"\nport: 12345\noutput:\ntype: \"streaming\"\nargs:\noutput_topic: \"udp_test\"\nkafka_servers: \"localhost:9094\"\n
    "},{"location":"listeners/udp/#udp.Udp.listen","title":"listen(host='localhost', port=12345)","text":"

    \ud83d\udcd6 Start listening for data from the UDP server.

    Parameters:

    Name Type Description Default host str

    The UDP server host. Defaults to \"localhost\".

    'localhost' port int

    The UDP server port. Defaults to 12345.

    12345

    Raises:

    Type Description Exception

    If unable to connect to the UDP server.

    "},{"location":"listeners/webhook/","title":"Webhook","text":"

    Spout for Webhook

    Bases: Spout

    "},{"location":"listeners/webhook/#webhook.Webhook.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Webhook class.

    Parameters:

    Name Type Description Default output StreamingOutput

    An instance of the StreamingOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"listeners/webhook/#webhook.Webhook.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Webhook rise \\\nstreaming \\\n--output_kafka_topic webhook_test \\\n--output_kafka_cluster_connection_string localhost:9094 \\\nnone \\\nlisten \\\n--args endpoint=* port=3000\n
    "},{"location":"listeners/webhook/#webhook.Webhook.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_webhook_spout:\nname: \"Webhook\"\nmethod: \"listen\"\nargs:\nendpoint: \"*\"\nport: 3000\noutput:\ntype: \"streaming\"\nargs:\noutput_topic: \"webhook_test\"\nkafka_servers: \"localhost:9094\"\n
    "},{"location":"listeners/webhook/#webhook.Webhook.listen","title":"listen(endpoint='*', port=3000, username=None, password=None)","text":"

    \ud83d\udcd6 Start listening for data from the webhook.

    Parameters:

    Name Type Description Default endpoint str

    The webhook endpoint to listen to. Defaults to \"*\".

    '*' port int

    The port to listen on. Defaults to 3000.

    3000 username Optional[str]

    The username for basic authentication. Defaults to None.

    None password Optional[str]

    The password for basic authentication. Defaults to None.

    None

    Raises:

    Type Description Exception

    If unable to start the CherryPy server.

    "},{"location":"listeners/websocket/","title":"Websocket","text":"

    Spout for Websocket

    Bases: Spout

    "},{"location":"listeners/websocket/#websocket.Websocket.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Websocket class.

    Parameters:

    Name Type Description Default output StreamingOutput

    An instance of the StreamingOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"listeners/websocket/#websocket.Websocket.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Websocket rise \\\nstreaming \\\n--output_kafka_topic websocket_test \\\n--output_kafka_cluster_connection_string localhost:9094 \\\nnone \\\nlisten \\\n--args host=localhost port=8765\n
    "},{"location":"listeners/websocket/#websocket.Websocket.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_websocket_spout:\nname: \"Websocket\"\nmethod: \"listen\"\nargs:\nhost: \"localhost\"\nport: 8765\noutput:\ntype: \"streaming\"\nargs:\noutput_topic: \"websocket_test\"\nkafka_servers: \"localhost:9094\"\n
    "},{"location":"listeners/websocket/#websocket.Websocket.__listen","title":"__listen(host, port) async","text":"

    Start listening for data from the WebSocket server.

    "},{"location":"listeners/websocket/#websocket.Websocket.listen","title":"listen(host='localhost', port=8765)","text":"

    \ud83d\udcd6 Start the WebSocket server.

    Parameters:

    Name Type Description Default host str

    The WebSocket server host. Defaults to \"localhost\".

    'localhost' port int

    The WebSocket server port. Defaults to 8765.

    8765

    Raises:

    Type Description Exception

    If unable to start the WebSocket server.

    "},{"location":"listeners/websocket/#websocket.Websocket.receive_message","title":"receive_message(websocket, path) async","text":"

    Receive a message from a WebSocket client and save it along with metadata.

    Parameters:

    Name Type Description Default websocket

    WebSocket client connection.

    required path

    WebSocket path.

    required"},{"location":"listeners/zeromq/","title":"ZeroMQ","text":"

    Spout for ZeroMQ

    Bases: Spout

    "},{"location":"listeners/zeromq/#zeromq.ZeroMQ.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the ZeroMQ class.

    Parameters:

    Name Type Description Default output StreamingOutput

    An instance of the StreamingOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"listeners/zeromq/#zeromq.ZeroMQ.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius ZeroMQ rise \\\nstreaming \\\n--output_kafka_topic zmq_test \\\n--output_kafka_cluster_connection_string localhost:9094 \\\nnone \\\nlisten \\\n--args endpoint=tcp://localhost:5555 topic=my_topic syntax=json\n
    "},{"location":"listeners/zeromq/#zeromq.ZeroMQ.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_zmq_spout:\nname: \"ZeroMQ\"\nmethod: \"listen\"\nargs:\nendpoint: \"tcp://localhost:5555\"\ntopic: \"my_topic\"\nsyntax: \"json\"\noutput:\ntype: \"streaming\"\nargs:\noutput_topic: \"zmq_test\"\nkafka_servers: \"localhost:9094\"\n
    "},{"location":"listeners/zeromq/#zeromq.ZeroMQ.listen","title":"listen(endpoint, topic, syntax, socket_type='SUB')","text":"

    \ud83d\udcd6 Start listening for data from the ZeroMQ server.

    Parameters:

    Name Type Description Default endpoint str

    The endpoint to connect to (e.g., \"tcp://localhost:5555\").

    required topic str

    The topic to subscribe to.

    required syntax str

    The syntax to be used (e.g., \"json\").

    required socket_type Optional[str]

    The type of ZeroMQ socket (default is \"SUB\").

    'SUB'

    Raises:

    Type Description Exception

    If unable to connect to the ZeroMQ server or process messages.

    "},{"location":"ocr/ConvertImage/","title":"Convert Images","text":"

    Bases: Bolt

    "},{"location":"ocr/ConvertImage/#geniusrise_ocr.readers.image.ConvertImage.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    The ConvertImage class is designed to convert images from one format to another. It takes an input folder containing images and an output format as arguments. The class iterates through each image file in the specified folder and converts it to the desired format. Additional options like quality and subsampling can be specified for lossy formats like 'JPG'.

    Parameters:

    Name Type Description Default input BatchInput

    An instance of the BatchInput class for reading the data.

    required output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"ocr/ConvertImage/#geniusrise_ocr.readers.image.ConvertImage.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius ConvertImage rise \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/input \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/output \\\nnone \\\nprocess \\\n--args input_folder=/path/to/image/folder output_format=PNG quality=95 subsampling=0\n
    "},{"location":"ocr/ConvertImage/#geniusrise_ocr.readers.image.ConvertImage.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nconvert_images:\nname: \"ConvertImage\"\nmethod: \"process\"\nargs:\noutput_format: \"PNG\"\nquality: 95\nsubsampling: 0\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/input\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/output\"\n
    "},{"location":"ocr/ConvertImage/#geniusrise_ocr.readers.image.ConvertImage.process","title":"process(output_format, quality=None, subsampling=0)","text":"

    \ud83d\udcd6 Convert images in the given input folder to the specified output format.

    Parameters:

    Name Type Description Default output_format str

    The format to convert images to ('PNG' or 'JPG').

    required quality Optional[int]

    The quality of the output image for lossy formats like 'JPG'. Defaults to None.

    None subsampling Optional[int]

    The subsampling factor for JPEG compression. Defaults to 0.

    0

    This method iterates through each image file in the specified folder, reads the image, and converts it to the specified output format. Additional parameters like quality and subsampling can be set for lossy formats.

    "},{"location":"ocr/FineTunePix2Struct/","title":"Fine-tune pix2struct","text":"

    Bases: Bolt

    "},{"location":"ocr/FineTunePix2Struct/#geniusrise_ocr.ocr.pix2struct.fine_tune.FineTunePix2Struct.__init__","title":"__init__(input, output, state, model_name='google/pix2struct-large', **kwargs)","text":"

    The FineTunePix2Struct class is designed to fine-tune the Pix2Struct model on a custom OCR dataset. It supports three popular OCR dataset formats: COCO, ICDAR, and SynthText.

    Parameters:

    Name Type Description Default input BatchInput

    An instance of the BatchInput class for reading the data.

    required output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required model_name str

    The name of the Pix2Struct model to use. Default is \"google/pix2struct-large\".

    'google/pix2struct-large' **kwargs

    Additional keyword arguments.

    {} Dataset Formats
    • COCO: Assumes a folder structure with an 'annotations.json' file containing image and text annotations.
    • ICDAR: Assumes a folder structure with 'Images' and 'Annotations' folders containing image files and XML annotation files respectively.
    • SynthText: Assumes a folder with image files and corresponding '.txt' files containing ground truth text.
    "},{"location":"ocr/FineTunePix2Struct/#geniusrise_ocr.ocr.pix2struct.fine_tune.FineTunePix2Struct.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius FineTunePix2Struct rise \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/input \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/output \\\nnone \\\nprocess \\\n--args epochs=3 batch_size=32 learning_rate=0.001 dataset_format=coco use_cuda=true\n
    "},{"location":"ocr/FineTunePix2Struct/#geniusrise_ocr.ocr.pix2struct.fine_tune.FineTunePix2Struct.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nfine_tune_pix2struct:\nname: \"FineTunePix2Struct\"\nmethod: \"process\"\nargs:\nepochs: 3\nbatch_size: 32\nlearning_rate: 0.001\ndataset_format: coco\nuse_cuda: true\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/input\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/output\"\n
    "},{"location":"ocr/FineTunePix2Struct/#geniusrise_ocr.ocr.pix2struct.fine_tune.FineTunePix2Struct.process","title":"process(epochs, batch_size, learning_rate, dataset_format, use_cuda=False)","text":"

    \ud83d\udcd6 Fine-tune the Pix2Struct model on a custom OCR dataset.

    Parameters:

    Name Type Description Default epochs int

    Number of training epochs.

    required batch_size int

    Batch size for training.

    required learning_rate float

    Learning rate for the optimizer.

    required dataset_format str

    Format of the OCR dataset. Supported formats are \"coco\", \"icdar\", and \"synthtext\".

    required use_cuda bool

    Whether to use CUDA for training. Default is False.

    False

    This method fine-tunes the Pix2Struct model using the images and annotations in the dataset specified by dataset_format. The fine-tuned model is saved to the specified output path.

    "},{"location":"ocr/FineTuneTROCR/","title":"OCR API using trocr","text":"

    Bases: Bolt

    "},{"location":"ocr/FineTuneTROCR/#geniusrise_ocr.ocr.trocr.fine_tune.FineTuneTROCR.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    The FineTuneTROCR class is designed to fine-tune the TROCR model on a custom OCR dataset. It supports three popular OCR dataset formats: COCO, ICDAR, and SynthText.

    Parameters:

    Name Type Description Default input BatchInput

    An instance of the BatchInput class for reading the data.

    required output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {} Dataset Formats
    • COCO: Assumes a folder structure with an 'annotations.json' file containing image and text annotations.
    • ICDAR: Assumes a folder structure with 'Images' and 'Annotations' folders containing image files and XML annotation files respectively.
    • SynthText: Assumes a folder with image files and corresponding '.txt' files containing ground truth text.
    "},{"location":"ocr/FineTuneTROCR/#geniusrise_ocr.ocr.trocr.fine_tune.FineTuneTROCR.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius FineTuneTROCR rise \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/input \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/output \\\nnone \\\nprocess \\\n--args epochs=3 batch_size=32 learning_rate=0.001 dataset_format=coco use_cuda=true\n
    "},{"location":"ocr/FineTuneTROCR/#geniusrise_ocr.ocr.trocr.fine_tune.FineTuneTROCR.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nfine_tune_trocr:\nname: \"FineTuneTROCR\"\nmethod: \"process\"\nargs:\nepochs: 3\nbatch_size: 32\nlearning_rate: 0.001\ndataset_format: coco\nuse_cuda: true\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/input\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/output\"\n
    "},{"location":"ocr/FineTuneTROCR/#geniusrise_ocr.ocr.trocr.fine_tune.FineTuneTROCR.process","title":"process(epochs, batch_size, learning_rate, dataset_format, use_cuda=False)","text":"

    \ud83d\udcd6 Fine-tune the TROCR model on a custom OCR dataset.

    Parameters:

    Name Type Description Default epochs int

    Number of training epochs.

    required batch_size int

    Batch size for training.

    required learning_rate float

    Learning rate for the optimizer.

    required dataset_format str

    Format of the OCR dataset. Supported formats are \"coco\", \"icdar\", and \"synthtext\".

    required use_cuda bool

    Whether to use CUDA for training. Default is False.

    False

    This method fine-tunes the TROCR model using the images and annotations in the dataset specified by dataset_format. The fine-tuned model is saved to the specified output path.

    "},{"location":"ocr/ImageClassPredictor/","title":"Predict image classes","text":"

    Bases: Bolt

    "},{"location":"ocr/ImageClassPredictor/#geniusrise_ocr.classification.predict.ImageClassPredictor.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    The ImageClassPredictor class classifies images using a pre-trained PyTorch model. It assumes that the input.input_folder contains sub-folders of images to be classified. The classified images are saved in output.output_folder, organized by their predicted labels.

    Parameters:

    Name Type Description Default input BatchInput

    Instance of BatchInput for reading data.

    required output BatchOutput

    Instance of BatchOutput for saving data.

    required state State

    Instance of State for maintaining state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"ocr/ImageClassPredictor/#geniusrise_ocr.classification.predict.ImageClassPredictor.__init__--command-line-invocation-with-geniusrise","title":"Command Line Invocation with geniusrise","text":"
    genius ImageClassPredictor rise \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/input \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/output \\\nnone \\\npredict \\\n--args classes='{\"0\": \"cat\", \"1\": \"dog\"}' model_path=/path/to/model.pth\n
    "},{"location":"ocr/ImageClassPredictor/#geniusrise_ocr.classification.predict.ImageClassPredictor.__init__--yaml-configuration-with-geniusrise","title":"YAML Configuration with geniusrise","text":"
    version: \"1\"\nspouts:\nimage_classification:\nname: \"ImageClassPredictor\"\nmethod: \"predict\"\nargs:\nclasses: '{\"0\": \"cat\", \"1\": \"dog\"}'\nmodel_path: \"/path/to/model.pth\"\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/input\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/output\"\n
    "},{"location":"ocr/ImageClassPredictor/#geniusrise_ocr.classification.predict.ImageClassPredictor.get_label","title":"get_label(class_idx)","text":"

    \ud83d\udcd6 Get the label corresponding to the class index.

    Parameters:

    Name Type Description Default class_idx int

    The class index.

    required

    Returns:

    Name Type Description str str

    The label corresponding to the class index.

    This method returns the label that corresponds to a given class index based on the classes dictionary.

    "},{"location":"ocr/ImageClassPredictor/#geniusrise_ocr.classification.predict.ImageClassPredictor.predict","title":"predict(classes, model_path, use_cuda=False)","text":"

    \ud83d\udcd6 Classify images in the input sub-folders using a pre-trained PyTorch model.

    Parameters:

    Name Type Description Default classes str

    JSON string mapping class indices to labels.

    required model_path str

    Path to the pre-trained PyTorch model.

    required use_cuda bool

    Whether to use CUDA for model inference. Default is False.

    False

    This method iterates through each image file in the specified sub-folders, applies the model, and classifies the image. The classified images are then saved in an output folder, organized by their predicted labels.

    "},{"location":"ocr/ParseCBZCBR/","title":"Parse CBZCBR files","text":"

    Bases: Bolt

    "},{"location":"ocr/ParseCBZCBR/#geniusrise_ocr.readers.cbz_cbr.ParseCBZCBR.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    The ParseCBZCBR class is designed to process CBZ and CBR files, which are commonly used for comic books. It takes an input folder containing CBZ/CBR files as an argument and iterates through each file. For each file, it extracts the images and saves them in a designated output folder.

    Parameters:

    Name Type Description Default input BatchInput

    An instance of the BatchInput class for reading the data.

    required output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"ocr/ParseCBZCBR/#geniusrise_ocr.readers.cbz_cbr.ParseCBZCBR.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius ParseCBZCBR rise \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/input \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/output \\\nnone \\\nprocess\n
    "},{"location":"ocr/ParseCBZCBR/#geniusrise_ocr.readers.cbz_cbr.ParseCBZCBR.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nparse_cbzcbr:\nname: \"ParseCBZCBR\"\nmethod: \"process\"\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/input\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/output\"\n
    "},{"location":"ocr/ParseCBZCBR/#geniusrise_ocr.readers.cbz_cbr.ParseCBZCBR.process","title":"process(input_folder=None)","text":"

    \ud83d\udcd6 Process CBZ and CBR files in the given input folder and extract images.

    Parameters:

    Name Type Description Default input_folder str

    The folder containing CBZ/CBR files to process.

    None

    This method iterates through each CBZ/CBR file in the specified folder and extracts the images.

    "},{"location":"ocr/ParseDjvu/","title":"Parse Djvu files","text":"

    Bases: Bolt

    "},{"location":"ocr/ParseDjvu/#geniusrise_ocr.readers.djvu.ParseDjvu.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    The ParseDjvu class is designed to process DJVU files and classify them as either text-based or image-based. It takes an input folder containing DJVU files as an argument and iterates through each file. For each DJVU, it samples a few pages to determine the type of content it primarily contains. If the DJVU is text-based, the class extracts the text from each page and saves it as a JSON file. If the DJVU is image-based, it converts each page to a PNG image and saves them in a designated output folder.

    Parameters:

    Name Type Description Default input BatchInput

    An instance of the BatchInput class for reading the data.

    required output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"ocr/ParseDjvu/#geniusrise_ocr.readers.djvu.ParseDjvu.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius ParseDjvu rise \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/input \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/output \\\nnone \\\nprocess\n
    "},{"location":"ocr/ParseDjvu/#geniusrise_ocr.readers.djvu.ParseDjvu.process","title":"process(input_folder=None)","text":"

    \ud83d\udcd6 Process DJVU files in the given input folder and classify them as text-based or image-based.

    Parameters:

    Name Type Description Default input_folder str

    The folder containing DJVU files to process.

    None

    This method iterates through each DJVU file in the specified folder, reads a sample of pages, and determines whether the DJVU is text-based or image-based. It then delegates further processing to _process_text_djvu or _process_image_djvu based on this determination.

    "},{"location":"ocr/ParseEpub/","title":"Parse Epub files","text":"

    Bases: Bolt

    "},{"location":"ocr/ParseEpub/#geniusrise_ocr.readers.epub.ParseEpub.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    The ParseEpub class is designed to process EPUB files and classify them as either text-based or image-based. It takes an input folder containing EPUB files as an argument and iterates through each file. For each EPUB, it samples a few items to determine the type of content it primarily contains. If the EPUB is text-based, the class extracts the text from each item and saves it as a JSON file. If the EPUB is image-based, it saves the images in a designated output folder.

    Parameters:

    Name Type Description Default input BatchInput

    An instance of the BatchInput class for reading the data.

    required output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"ocr/ParseEpub/#geniusrise_ocr.readers.epub.ParseEpub.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius ParseEpub rise \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/input \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/output \\\nnone \\\nprocess\n
    "},{"location":"ocr/ParseEpub/#geniusrise_ocr.readers.epub.ParseEpub.process","title":"process(input_folder=None)","text":"

    \ud83d\udcd6 Process EPUB files in the given input folder and classify them as text-based or image-based.

    Parameters:

    Name Type Description Default input_folder str

    The folder containing EPUB files to process.

    None

    This method iterates through each EPUB file in the specified folder, reads a sample of items, and determines whether the EPUB is text-based or image-based. It then delegates further processing to _process_text_epub or _process_image_epub based on this determination.

    "},{"location":"ocr/ParseMOBI/","title":"Parse MOBI files","text":"

    Bases: Bolt

    "},{"location":"ocr/ParseMOBI/#geniusrise_ocr.readers.mobi.ParseMOBI.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    The ParseMOBI class is designed to process MOBI files. It takes an input folder containing MOBI files as an argument and iterates through each file. For each file, it extracts the images and saves them in a designated output folder.

    Parameters:

    Name Type Description Default input BatchInput

    An instance of the BatchInput class for reading the data.

    required output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"ocr/ParseMOBI/#geniusrise_ocr.readers.mobi.ParseMOBI.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius ParseMOBI rise \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/input \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/output \\\nnone \\\nprocess\n
    "},{"location":"ocr/ParseMOBI/#geniusrise_ocr.readers.mobi.ParseMOBI.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nparse_mobi:\nname: \"ParseMOBI\"\nmethod: \"process\"\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/input\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/output\"\n
    "},{"location":"ocr/ParseMOBI/#geniusrise_ocr.readers.mobi.ParseMOBI.process","title":"process(input_folder=None)","text":"

    \ud83d\udcd6 Process MOBI files in the given input folder and extract images.

    Parameters:

    Name Type Description Default input_folder str

    The folder containing MOBI files to process.

    None

    This method iterates through each MOBI file in the specified folder and extracts the images.

    "},{"location":"ocr/ParsePdf/","title":"Parse PDF files","text":"

    Bases: Bolt

    "},{"location":"ocr/ParsePdf/#geniusrise_ocr.readers.pdf.ParsePdf.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    The ParsePdf class is designed to process PDF files and classify them as either text-based or image-based. It takes an input folder containing PDF files as an argument and iterates through each file. For each PDF, it samples a few pages to determine the type of content it primarily contains. If the PDF is text-based, the class extracts the text from each page and saves it as a JSON file. If the PDF is image-based, it converts each page to a PNG image and saves them in a designated output folder.

    Args:\n    input (BatchInput): An instance of the BatchInput class for reading the data.\n    output (BatchOutput): An instance of the BatchOutput class for saving the data.\n    state (State): An instance of the State class for maintaining the state.\n    **kwargs: Additional keyword arguments.\n
    "},{"location":"ocr/ParsePdf/#geniusrise_ocr.readers.pdf.ParsePdf.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius ParsePdf rise \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/input \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/output \\\nnone \\\nprocess\n
    "},{"location":"ocr/ParsePdf/#geniusrise_ocr.readers.pdf.ParsePdf.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nparse_pdfs:\nname: \"ParsePdf\"\nmethod: \"process\"\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/input\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/outupt\"\n
    "},{"location":"ocr/ParsePdf/#geniusrise_ocr.readers.pdf.ParsePdf.process","title":"process(input_folder=None)","text":"

    \ud83d\udcd6 Process PDF files in the given input folder and classify them as text-based or image-based.

    Parameters:

    Name Type Description Default input_folder str

    The folder containing PDF files to process.

    None

    This method iterates through each PDF file in the specified folder, reads a sample of pages, and determines whether the PDF is text-based or image-based. It then delegates further processing to _process_text_pdf or _process_image_pdf based on this determination.

    "},{"location":"ocr/ParsePostScript/","title":"Parse PostScript files","text":"

    Bases: Bolt

    "},{"location":"ocr/ParsePostScript/#geniusrise_ocr.readers.postscript.ParsePostScript.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    The ParsePostScript class is designed to process PostScript files and classify them as either text-based or image-based. It takes an input folder containing PostScript files as an argument and iterates through each file. For each PostScript file, it converts it to PDF and samples a few pages to determine the type of content it primarily contains. If the PostScript is text-based, the class extracts the text from each page and saves it as a JSON file. If the PostScript is image-based, it converts each page to a PNG image and saves them in a designated output folder.

    Parameters:

    Name Type Description Default input BatchInput

    An instance of the BatchInput class for reading the data.

    required output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"ocr/ParsePostScript/#geniusrise_ocr.readers.postscript.ParsePostScript.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius ParsePostScript rise \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/input \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/output \\\nnone \\\nprocess\n
    "},{"location":"ocr/ParsePostScript/#geniusrise_ocr.readers.postscript.ParsePostScript.process","title":"process(input_folder=None)","text":"

    \ud83d\udcd6 Process PostScript files in the given input folder and classify them as text-based or image-based.

    Parameters:

    Name Type Description Default input_folder str

    The folder containing PostScript files to process.

    None

    This method iterates through each PostScript file in the specified folder, converts it to PDF, reads a sample of pages, and determines whether the PostScript is text-based or image-based. It then delegates further processing to _process_text_ps or _process_image_ps based on this determination.

    "},{"location":"ocr/ParseXPS/","title":"Parse XPS files","text":"

    Bases: Bolt

    "},{"location":"ocr/ParseXPS/#geniusrise_ocr.readers.xps.ParseXPS.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    The ParseXPS class is designed to process XPS files. It takes an input folder containing XPS files as an argument and iterates through each file. For each file, it extracts the images and saves them in a designated output folder.

    Parameters:

    Name Type Description Default input BatchInput

    An instance of the BatchInput class for reading the data.

    required output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"ocr/ParseXPS/#geniusrise_ocr.readers.xps.ParseXPS.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius ParseXPS rise \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/input \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/output \\\nnone \\\nprocess\n
    "},{"location":"ocr/ParseXPS/#geniusrise_ocr.readers.xps.ParseXPS.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nparse_xps:\nname: \"ParseXPS\"\nmethod: \"process\"\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/input\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/output\"\n
    "},{"location":"ocr/ParseXPS/#geniusrise_ocr.readers.xps.ParseXPS.process","title":"process(input_folder=None)","text":"

    \ud83d\udcd6 Process XPS files in the given input folder and extract images.

    Parameters:

    Name Type Description Default input_folder str

    The folder containing XPS files to process.

    None

    This method iterates through each XPS file in the specified folder and extracts the images.

    "},{"location":"ocr/Pix2StructImageOCR/","title":"OCR using pix2struct","text":"

    Bases: Bolt

    "},{"location":"ocr/Pix2StructImageOCR/#geniusrise_ocr.ocr.pix2struct.bulk.Pix2StructImageOCR.__init__","title":"__init__(input, output, state, model_name='google/pix2struct-large', **kwargs)","text":"

    The Pix2StructImageOCR class performs OCR on images using Google's Pix2Struct model. It expects the input.input_folder to contain the images for OCR and saves the OCR results as JSON files in output.output_folder.

    Parameters:

    Name Type Description Default input BatchInput

    Instance of BatchInput for reading data.

    required output BatchOutput

    Instance of BatchOutput for saving data.

    required state State

    Instance of State for maintaining state.

    required model_name str

    The name of the Pix2Struct model to use. Default is \"google/pix2struct-large\".

    'google/pix2struct-large' **kwargs

    Additional keyword arguments.

    {}"},{"location":"ocr/Pix2StructImageOCR/#geniusrise_ocr.ocr.pix2struct.bulk.Pix2StructImageOCR.__init__--command-line-invocation-with-geniusrise","title":"Command Line Invocation with geniusrise","text":"
    genius Pix2StructImageOCR rise \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/input \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/output \\\nnone \\\nprocess\n
    "},{"location":"ocr/Pix2StructImageOCR/#geniusrise_ocr.ocr.pix2struct.bulk.Pix2StructImageOCR.__init__--yaml-configuration-with-geniusrise","title":"YAML Configuration with geniusrise","text":"
    version: \"1\"\nspouts:\nocr_processing:\nname: \"Pix2StructImageOCR\"\nmethod: \"process\"\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/input\"\nuse_cuda: true\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/output\"\nuse_cuda: true\n
    "},{"location":"ocr/Pix2StructImageOCR/#geniusrise_ocr.ocr.pix2struct.bulk.Pix2StructImageOCR.process","title":"process(use_cuda=True)","text":"

    \ud83d\udcd6 Perform OCR on images in the input folder and save the OCR results as JSON files in the output folder.

    Parameters:

    Name Type Description Default use_cuda bool

    Whether to use CUDA for model inference. Default is True.

    True"},{"location":"ocr/Pix2StructImageOCRAPI/","title":"OCR API using pix2struct","text":"

    Bases: Bolt

    "},{"location":"ocr/Pix2StructImageOCRAPI/#geniusrise_ocr.ocr.pix2struct.api.Pix2StructImageOCRAPI.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    The Pix2StructImageOCRAPI class performs OCR on images using Google's Pix2Struct model. The class exposes an API endpoint for OCR on single images. The endpoint is accessible at /api/v1/ocr. The API takes a POST request with a JSON payload containing a base64 encoded image under the key image_base64. It returns a JSON response containing the OCR result under the key ocr_text.

    Parameters:

    Name Type Description Default input BatchInput

    Instance of BatchInput for reading data.

    required output BatchOutput

    Instance of BatchOutput for saving data.

    required state State

    Instance of State for maintaining state.

    required model_name str

    The name of the Pix2Struct model to use. Default is \"google/pix2struct-large\".

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"ocr/Pix2StructImageOCRAPI/#geniusrise_ocr.ocr.pix2struct.api.Pix2StructImageOCRAPI.__init__--command-line-invocation-with-geniusrise","title":"Command Line Invocation with geniusrise","text":"
    genius Pix2StructImageOCRAPI rise \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/input \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/output \\\nnone \\\nlisten \\\n--args endpoint=* port=3000 cors_domain=* use_cuda=True\n
    "},{"location":"ocr/Pix2StructImageOCRAPI/#geniusrise_ocr.ocr.pix2struct.api.Pix2StructImageOCRAPI.__init__--yaml-configuration-with-geniusrise","title":"YAML Configuration with geniusrise","text":"
    version: \"1\"\nspouts:\nocr_processing:\nname: \"Pix2StructImageOCRAPI\"\nmethod: \"listen\"\nargs:\nendpoint: *\nport: 3000\ncors_domain: *\nuse_cuda: true\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/input\"\nuse_cuda: true\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/output\"\nuse_cuda: true\n
    "},{"location":"ocr/TROCRImageOCR/","title":"OCR using trocr","text":"

    Bases: Bolt

    "},{"location":"ocr/TROCRImageOCR/#geniusrise_ocr.ocr.trocr.bulk.TROCRImageOCR.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    The TROCRImageOCR class performs OCR (Optical Character Recognition) on images using Microsoft's TROCR model. It expects the input.input_folder to contain the images for OCR and saves the OCR results as JSON files in output.output_folder.

    Parameters:

    Name Type Description Default input BatchInput

    Instance of BatchInput for reading data.

    required output BatchOutput

    Instance of BatchOutput for saving data.

    required state State

    Instance of State for maintaining state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"ocr/TROCRImageOCR/#geniusrise_ocr.ocr.trocr.bulk.TROCRImageOCR.__init__--command-line-invocation-with-geniusrise","title":"Command Line Invocation with geniusrise","text":"
    genius TROCRImageOCR rise \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/input \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/output \\\nnone \\\nprocess\n
    "},{"location":"ocr/TROCRImageOCR/#geniusrise_ocr.ocr.trocr.bulk.TROCRImageOCR.__init__--yaml-configuration-with-geniusrise","title":"YAML Configuration with geniusrise","text":"
    version: \"1\"\nspouts:\nocr_processing:\nname: \"TROCRImageOCR\"\nmethod: \"process\"\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/input\"\nuse_cuda: true\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/output\"\nuse_cuda: true\n
    "},{"location":"ocr/TROCRImageOCR/#geniusrise_ocr.ocr.trocr.bulk.TROCRImageOCR.process","title":"process(kind='printed', use_cuda=True)","text":"

    \ud83d\udcd6 Perform OCR on images in the input folder and save the OCR results as JSON files in the output folder.

    This method iterates through each image file in input.input_folder, performs OCR using the TROCR model, and saves the OCR results as JSON files in output.output_folder.

    Parameters:

    Name Type Description Default kind str

    The kind of TROCR model to use. Default is \"printed\". Options are \"printed\" or \"handwritten\".

    'printed' use_cuda bool

    Whether to use CUDA for model inference. Default is True.

    True"},{"location":"ocr/TROCRImageOCRAPI/","title":"OCR API using trocr","text":"

    Bases: Bolt

    "},{"location":"ocr/TROCRImageOCRAPI/#geniusrise_ocr.ocr.trocr.api.TROCRImageOCRAPI.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    The TROCRImageOCR class performs OCR (Optical Character Recognition) on images using Microsoft's TROCR model. The class exposes an API endpoint for OCR on single images. The endpoint is accessible at /api/v1/ocr. The API takes a POST request with a JSON payload containing a base64 encoded image under the key image_base64. It returns a JSON response containing the OCR result under the key ocr_text.

    Parameters:

    Name Type Description Default input BatchInput

    Instance of BatchInput for reading data.

    required output BatchOutput

    Instance of BatchOutput for saving data.

    required state State

    Instance of State for maintaining state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"ocr/TROCRImageOCRAPI/#geniusrise_ocr.ocr.trocr.api.TROCRImageOCRAPI.__init__--command-line-invocation-with-geniusrise","title":"Command Line Invocation with geniusrise","text":"
    genius TROCRImageOCR rise \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/input \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/output \\\nnone \\\nlisten \\\n--args endpoint=* port=3000 cors_domain=* kind=handwriting use_cuda=True\n
    "},{"location":"ocr/TROCRImageOCRAPI/#geniusrise_ocr.ocr.trocr.api.TROCRImageOCRAPI.__init__--yaml-configuration-with-geniusrise","title":"YAML Configuration with geniusrise","text":"
    version: \"1\"\nspouts:\nocr_processing:\nname: \"TROCRImageOCR\"\nmethod: \"listen\"\nargs:\nendpoint: *\nport: 3000\ncors_domain: *\nkind: handwriting\nuse_cuda: true\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/input\"\nuse_cuda: true\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/output\"\nuse_cuda: true\n
    "},{"location":"ocr/TROCRImageOCRAPI/#geniusrise_ocr.ocr.trocr.api.TROCRImageOCRAPI.__init__--api-example","title":"API Example","text":"
    curl -X POST \"http://localhost:3000/api/v1/ocr\" -H \"Content-Type: application/json\" -d '{\"image_base64\": \"your_base64_encoded_image_here\"}'\n
    "},{"location":"ocr/TROCRImageOCRAPI/#geniusrise_ocr.ocr.trocr.api.TROCRImageOCRAPI.preprocess_and_detect_boxes","title":"preprocess_and_detect_boxes(image)","text":"

    Preprocess the image and detect text bounding boxes using the EAST model.

    Parameters:

    Name Type Description Default image Image.Image

    PIL Image object.

    required

    Returns:

    Type Description List[Tuple[int, int, int, int]]

    List[Tuple[int, int, int, int]]: List of bounding boxes (x, y, w, h).

    "},{"location":"ocr/TrainImageClassifier/","title":"Train image classifier","text":"

    Bases: Bolt

    "},{"location":"ocr/TrainImageClassifier/#geniusrise_ocr.classification.train.TrainImageClassifier.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    The TrainImageClassifier class trains an image classifier using a ResNet-152 model. It assumes that the input.input_folder contains sub-folders named 'train' and 'test'. Each of these sub-folders should contain class-specific folders with images. The trained model is saved as 'model.pth' in output.output_folder.

    Parameters:

    Name Type Description Default input BatchInput

    Instance of BatchInput for reading data.

    required output BatchOutput

    Instance of BatchOutput for saving data.

    required state State

    Instance of State for maintaining state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"ocr/TrainImageClassifier/#geniusrise_ocr.classification.train.TrainImageClassifier.__init__--command-line-invocation-with-geniusrise","title":"Command Line Invocation with geniusrise","text":"
    genius TrainImageClassifier rise \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/input \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/output \\\nnone \\\nprocess \\\n--args num_classes=4 epochs=10 batch_size=32 learning_rate=0.001\n
    "},{"location":"ocr/TrainImageClassifier/#geniusrise_ocr.classification.train.TrainImageClassifier.__init__--yaml-configuration-with-geniusrise","title":"YAML Configuration with geniusrise","text":"
    version: \"1\"\nspouts:\nimage_training:\nname: \"TrainImageClassifier\"\nmethod: \"process\"\nargs:\nnum_classes: 4\nepochs: 10\nbatch_size: 32\nlearning_rate: 0.001\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/input\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/output\"\n
    "},{"location":"ocr/TrainImageClassifier/#geniusrise_ocr.classification.train.TrainImageClassifier.process","title":"process(num_classes=4, epochs=10, batch_size=32, learning_rate=0.001, use_cuda=False)","text":"

    \ud83d\udcd6 Train an image classifier using a ResNet-152 model.

    Parameters:

    Name Type Description Default num_classes int

    Number of classes of the images.

    4 epochs int

    Number of training epochs. Default is 10.

    10 batch_size int

    Batch size for training. Default is 32.

    32 learning_rate float

    Learning rate for the optimizer. Default is 0.001.

    0.001 use_cuda bool

    Whether to use CUDA for model training. Default is False.

    False

    This method trains a ResNet-152 model using the images in the 'train' and 'test' sub-folders of input.input_folder. Each of these sub-folders should contain class-specific folders with images. The trained model is saved as 'model.pth' in output.output_folder.

    "},{"location":"text/api/base/","title":"Base Fine Tuner","text":"

    Bases: TextBulk

    A class representing a Hugging Face API for generating text using a pre-trained language model.

    Attributes:

    Name Type Description model Any

    The pre-trained language model.

    tokenizer Any

    The tokenizer used to preprocess input text.

    model_name str

    The name of the pre-trained language model.

    model_revision Optional[str]

    The revision of the pre-trained language model.

    tokenizer_name str

    The name of the tokenizer used to preprocess input text.

    tokenizer_revision Optional[str]

    The revision of the tokenizer used to preprocess input text.

    model_class str

    The name of the class of the pre-trained language model.

    tokenizer_class str

    The name of the class of the tokenizer used to preprocess input text.

    use_cuda bool

    Whether to use a GPU for inference.

    quantization int

    The level of quantization to use for the pre-trained language model.

    precision str

    The precision to use for the pre-trained language model.

    device_map str | Dict | None

    The mapping of devices to use for inference.

    max_memory Dict[int, str]

    The maximum memory to use for inference.

    torchscript bool

    Whether to use a TorchScript-optimized version of the pre-trained language model.

    model_args Any

    Additional arguments to pass to the pre-trained language model.

    Methods

    text(**kwargs: Any) -> Dict[str, Any]: Generates text based on the given prompt and decoding strategy.

    listen(model_name: str, model_class: str = \"AutoModelForCausalLM\", tokenizer_class: str = \"AutoTokenizer\", use_cuda: bool = False, precision: str = \"float16\", quantization: int = 0, device_map: str | Dict | None = \"auto\", max_memory={0: \"24GB\"}, torchscript: bool = True, endpoint: str = \"\", port: int = 3000, cors_domain: str = \"http://localhost:3000\", username: Optional[str] = None, password: Optional[str] = None, *model_args: Any) -> None: Starts a CherryPy server to listen for requests to generate text.

    "},{"location":"text/api/base/#geniusrise_text.base.api.TextAPI.__init__","title":"__init__(input, output, state)","text":"

    Initializes a new instance of the TextAPI class.

    Parameters:

    Name Type Description Default input BatchInput

    The input data to process.

    required output BatchOutput

    The output data to process.

    required state State

    The state of the API.

    required"},{"location":"text/api/base/#geniusrise_text.base.api.TextAPI.listen","title":"listen(model_name, model_class='AutoModelForCausalLM', tokenizer_class='AutoTokenizer', use_cuda=False, precision='float16', quantization=0, device_map='auto', max_memory={0: '24GB'}, torchscript=False, compile=False, awq_enabled=False, flash_attention=False, concurrent_queries=False, use_vllm=False, use_llama_cpp=False, vllm_tokenizer_mode='auto', vllm_download_dir=None, vllm_load_format='auto', vllm_seed=42, vllm_max_model_len=1024, vllm_enforce_eager=False, vllm_max_context_len_to_capture=8192, vllm_block_size=16, vllm_gpu_memory_utilization=0.9, vllm_swap_space=4, vllm_sliding_window=None, vllm_pipeline_parallel_size=1, vllm_tensor_parallel_size=1, vllm_worker_use_ray=False, vllm_max_parallel_loading_workers=None, vllm_disable_custom_all_reduce=False, vllm_max_num_batched_tokens=None, vllm_max_num_seqs=64, vllm_max_paddings=512, vllm_max_lora_rank=None, vllm_max_loras=None, vllm_max_cpu_loras=None, vllm_lora_extra_vocab_size=0, vllm_placement_group=None, vllm_log_stats=False, llama_cpp_filename=None, llama_cpp_n_gpu_layers=0, llama_cpp_split_mode=llama_cpp.LLAMA_SPLIT_LAYER, llama_cpp_tensor_split=None, llama_cpp_vocab_only=False, llama_cpp_use_mmap=True, llama_cpp_use_mlock=False, llama_cpp_kv_overrides=None, llama_cpp_seed=llama_cpp.LLAMA_DEFAULT_SEED, llama_cpp_n_ctx=2048, llama_cpp_n_batch=512, llama_cpp_n_threads=None, llama_cpp_n_threads_batch=None, llama_cpp_rope_scaling_type=llama_cpp.LLAMA_ROPE_SCALING_UNSPECIFIED, llama_cpp_rope_freq_base=0.0, llama_cpp_rope_freq_scale=0.0, llama_cpp_yarn_ext_factor=-1.0, llama_cpp_yarn_attn_factor=1.0, llama_cpp_yarn_beta_fast=32.0, llama_cpp_yarn_beta_slow=1.0, llama_cpp_yarn_orig_ctx=0, llama_cpp_mul_mat_q=True, llama_cpp_logits_all=False, llama_cpp_embedding=False, llama_cpp_offload_kqv=True, llama_cpp_last_n_tokens_size=64, llama_cpp_lora_base=None, llama_cpp_lora_scale=1.0, llama_cpp_lora_path=None, llama_cpp_numa=False, llama_cpp_chat_format=None, llama_cpp_draft_model=None, llama_cpp_verbose=True, endpoint='*', port=3000, cors_domain='http://localhost:3000', username=None, password=None, **model_args)","text":"

    Starts a CherryPy server to listen for requests to generate text.

    Parameters:

    Name Type Description Default model_name str

    Name or identifier of the pre-trained model to be used.

    required model_class str

    Class name of the model to be used from the transformers library.

    'AutoModelForCausalLM' tokenizer_class str

    Class name of the tokenizer to be used from the transformers library.

    'AutoTokenizer' use_cuda bool

    Flag to enable CUDA for GPU acceleration.

    False precision str

    Specifies the precision configuration for PyTorch tensors, e.g., \"float16\".

    'float16' quantization int

    Level of model quantization to reduce model size and inference time.

    0 device_map Union[str, Dict, None]

    Maps model layers to specific devices for distributed inference.

    'auto' max_memory Dict[int, str]

    Maximum memory allocation for the model on each device.

    {0: '24GB'} torchscript bool

    Enables the use of TorchScript for model optimization.

    False compile bool

    Enables model compilation for further optimization.

    False awq_enabled bool

    Enables Adaptive Weight Quantization (AWQ) for model optimization.

    False flash_attention bool

    Utilizes Flash Attention optimizations for faster processing.

    False concurrent_queries bool

    Allows the server to handle multiple requests concurrently if True.

    False use_vllm bool

    Flag to use Very Large Language Models (VLLM) integration.

    False use_llama_cpp bool

    Flag to use llama.cpp integration for language model inference.

    False llama_cpp_filename Optional[str]

    The filename of the model file for llama.cpp.

    None llama_cpp_n_gpu_layers int

    Number of layers to offload to GPU in llama.cpp configuration.

    0 llama_cpp_split_mode int

    Defines how the model is split across multiple GPUs in llama.cpp.

    llama_cpp.LLAMA_SPLIT_LAYER llama_cpp_tensor_split Optional[List[float]]

    Custom tensor split configuration for llama.cpp.

    None llama_cpp_vocab_only bool

    Loads only the vocabulary part of the model in llama.cpp.

    False llama_cpp_use_mmap bool

    Enables memory-mapped files for model loading in llama.cpp.

    True llama_cpp_use_mlock bool

    Locks the model in RAM to prevent swapping in llama.cpp.

    False llama_cpp_kv_overrides Optional[Dict[str, Union[bool, int, float]]]

    Key-value pairs for overriding default llama.cpp model parameters.

    None llama_cpp_seed int

    Seed for random number generation in llama.cpp.

    llama_cpp.LLAMA_DEFAULT_SEED llama_cpp_n_ctx int

    The number of context tokens for the model in llama.cpp.

    2048 llama_cpp_n_batch int

    Batch size for processing prompts in llama.cpp.

    512 llama_cpp_n_threads Optional[int]

    Number of threads for generation in llama.cpp.

    None llama_cpp_n_threads_batch Optional[int]

    Number of threads for batch processing in llama.cpp.

    None llama_cpp_rope_scaling_type Optional[int]

    Specifies the RoPE (Rotary Positional Embeddings) scaling type in llama.cpp.

    llama_cpp.LLAMA_ROPE_SCALING_UNSPECIFIED llama_cpp_rope_freq_base float

    Base frequency for RoPE in llama.cpp.

    0.0 llama_cpp_rope_freq_scale float

    Frequency scaling factor for RoPE in llama.cpp.

    0.0 llama_cpp_yarn_ext_factor float

    Extrapolation mix factor for YaRN in llama.cpp.

    -1.0 llama_cpp_yarn_attn_factor float

    Attention factor for YaRN in llama.cpp.

    1.0 llama_cpp_yarn_beta_fast float

    Beta fast parameter for YaRN in llama.cpp.

    32.0 llama_cpp_yarn_beta_slow float

    Beta slow parameter for YaRN in llama.cpp.

    1.0 llama_cpp_yarn_orig_ctx int

    Original context size for YaRN in llama.cpp.

    0 llama_cpp_mul_mat_q bool

    Flag to enable matrix multiplication for queries in llama.cpp.

    True llama_cpp_logits_all bool

    Returns logits for all tokens when set to True in llama.cpp.

    False llama_cpp_embedding bool

    Enables embedding mode only in llama.cpp.

    False llama_cpp_offload_kqv bool

    Offloads K, Q, V matrices to GPU in llama.cpp.

    True llama_cpp_last_n_tokens_size int

    Size for the last_n_tokens buffer in llama.cpp.

    64 llama_cpp_lora_base Optional[str]

    Base model path for LoRA adjustments in llama.cpp.

    None llama_cpp_lora_scale float

    Scale factor for LoRA adjustments in llama.cpp.

    1.0 llama_cpp_lora_path Optional[str]

    Path to LoRA adjustments file in llama.cpp.

    None llama_cpp_numa Union[bool, int]

    NUMA configuration for llama.cpp.

    False llama_cpp_chat_format Optional[str]

    Specifies the chat format for llama.cpp.

    None llama_cpp_draft_model Optional[llama_cpp.LlamaDraftModel]

    Draft model for speculative decoding in llama.cpp.

    None endpoint str

    Network interface to bind the server to.

    '*' port int

    Port number to listen on for incoming requests.

    3000 cors_domain str

    Specifies the domain to allow for Cross-Origin Resource Sharing (CORS).

    'http://localhost:3000' username Optional[str]

    Username for basic authentication, if required.

    None password Optional[str]

    Password for basic authentication, if required.

    None **model_args Any

    Additional arguments to pass to the pre-trained language model or llama.cpp configuration.

    {}"},{"location":"text/api/base/#geniusrise_text.base.api.TextAPI.text","title":"text(**kwargs)","text":"

    Generates text based on the given prompt and decoding strategy.

    Parameters:

    Name Type Description Default **kwargs Any

    Additional arguments to pass to the pre-trained language model.

    {}

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the prompt, arguments, and generated text.

    "},{"location":"text/api/base/#geniusrise_text.base.api.TextAPI.validate_password","title":"validate_password(realm, username, password)","text":"

    Validate the username and password against expected values.

    Parameters:

    Name Type Description Default realm str

    The authentication realm.

    required username str

    The provided username.

    required password str

    The provided password.

    required

    Returns:

    Name Type Description bool

    True if credentials are valid, False otherwise.

    "},{"location":"text/api/classification/","title":"Classification","text":"

    Bases: TextAPI

    TextClassificationAPI leveraging Hugging Face's transformers for text classification tasks. This API provides an interface to classify text into various categories like sentiment, topic, intent, etc.

    Attributes:

    Name Type Description model AutoModelForSequenceClassification

    A Hugging Face model for sequence classification.

    tokenizer AutoTokenizer

    A tokenizer for preprocessing text.

    hf_pipeline Pipeline

    A Hugging Face pipeline for text classification.

    Methods

    classify(self): Classifies text using the model and tokenizer. classification_pipeline(self): Classifies text using the Hugging Face pipeline. initialize_pipeline(self): Lazy initialization of the classification pipeline.

    Example CLI Usage:

    genius TextClassificationAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\n--id cardiffnlp/twitter-roberta-base-hate-multiclass-latest-lol \\\nlisten \\\n--args \\\nmodel_name=\"cardiffnlp/twitter-roberta-base-hate-multiclass-latest\" \\\nmodel_class=\"AutoModelForSequenceClassification\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nprecision=\"float\" \\\nquantization=0 \\\ndevice_map=\"cuda:0\" \\\nmax_memory=None \\\ntorchscript=False \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n

    "},{"location":"text/api/classification/#geniusrise_text.classification.api.TextClassificationAPI.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the TextClassificationAPI with the necessary configurations for input, output, and state management.

    Parameters:

    Name Type Description Default input BatchInput

    Configuration for the input data.

    required output BatchOutput

    Configuration for the output data.

    required state State

    State management for the API.

    required **kwargs

    Additional keyword arguments for extended functionality.

    {}"},{"location":"text/api/classification/#geniusrise_text.classification.api.TextClassificationAPI.classification_pipeline","title":"classification_pipeline()","text":"

    Accepts text input and returns classification results using the Hugging Face pipeline.

    This method uses the Hugging Face pipeline for efficient and robust text classification. It's suitable for various classification tasks such as sentiment analysis, topic classification, and intent recognition.

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the original input text and the classification results.

    Example CURL Request for text classification:

    /usr/bin/curl -X POST localhost:3000/api/v1/classification_pipeline             -H \"Content-Type: application/json\"             -d '{\"text\": \"The movie was fantastic, with great acting and plot.\"}' | jq\n

    "},{"location":"text/api/classification/#geniusrise_text.classification.api.TextClassificationAPI.classify","title":"classify()","text":"

    Accepts text input and returns classification results. The method uses the model and tokenizer to classify the text and provide the likelihood of each class label.

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the original input text and the classification scores for each label.

    Example CURL Request for text classification:

    /usr/bin/curl -X POST localhost:3000/api/v1/classify             -H \"Content-Type: application/json\"             -d '{\n        \"text\": \"tata sons lost a major contract to its rival mahindra motors\"\n    }' | jq\n

    "},{"location":"text/api/classification/#geniusrise_text.classification.api.TextClassificationAPI.initialize_pipeline","title":"initialize_pipeline()","text":"

    Lazy initialization of the Hugging Face pipeline for classification.

    "},{"location":"text/api/instruction_tuning/","title":"Instruction Tuning","text":"

    Bases: TextAPI

    InstructionAPI is designed for generating text based on prompts using instruction-tuned language models. It serves as an interface to Hugging Face's pre-trained instruction-tuned models, providing a flexible API for various text generation tasks. It can be used in scenarios ranging from generating creative content to providing instructions or answers based on the prompts.

    Attributes:

    Name Type Description model Any

    The loaded instruction-tuned language model.

    tokenizer Any

    The tokenizer for processing text suitable for the model.

    Methods

    complete(**kwargs: Any) -> Dict[str, Any]: Generates text based on the given prompt and decoding strategy.

    listen(**model_args: Any) -> None: Starts a server to listen for text generation requests.

    CLI Usage Example:

    genius InstructionAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\nlisten \\\n--args \\\nmodel_name=\"TheBloke/Mistral-7B-OpenOrca-AWQ\" \\\nmodel_class=\"AutoModelForCausalLM\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nprecision=\"float16\" \\\nquantization=0 \\\ndevice_map=\"auto\" \\\nmax_memory=None \\\ntorchscript=False \\\nawq_enabled=True \\\nflash_attention=True \\\nendpoint=\"*\" \\\nport=3001 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n

    Or using VLLM:

    genius InstructionAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\n--id mistralai/Mistral-7B-Instruct-v0.1 \\\nlisten \\\n--args \\\nmodel_name=\"mistralai/Mistral-7B-Instruct-v0.1\" \\\nmodel_class=\"AutoModelForCausalLM\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nprecision=\"bfloat16\" \\\nquantization=0 \\\ndevice_map=\"auto\" \\\nmax_memory=None \\\ntorchscript=False \\\nuse_vllm=True \\\nvllm_enforce_eager=True \\\nvllm_max_model_len=1024 \\\nconcurrent_queries=False \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n

    or using llama.cpp:

    genius InstructionAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\nlisten \\\n--args \\\nmodel_name=\"TheBloke/Mistral-7B-Instruct-v0.2-GGUF\" \\\nmodel_class=\"AutoModelForCausalLM\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nuse_llama_cpp=True \\\nllama_cpp_filename=\"mistral-7b-instruct-v0.2.Q4_K_M.gguf\" \\\nllama_cpp_n_gpu_layers=35 \\\nllama_cpp_n_ctx=32768 \\\nconcurrent_queries=False \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n

    "},{"location":"text/api/instruction_tuning/#geniusrise_text.instruction.api.InstructionAPI.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes a new instance of the InstructionAPI class, setting up the necessary configurations for input, output, and state.

    Parameters:

    Name Type Description Default input BatchInput

    Configuration for the input data.

    required output BatchOutput

    Configuration for the output data.

    required state State

    The state of the API.

    required **kwargs Any

    Additional keyword arguments for extended functionality.

    {}"},{"location":"text/api/instruction_tuning/#geniusrise_text.instruction.api.InstructionAPI.chat","title":"chat(**kwargs)","text":"

    Handles chat interaction using the Hugging Face pipeline. This method enables conversational text generation, simulating a chat-like interaction based on user and system prompts.

    Parameters:

    Name Type Description Default **kwargs Any

    Arbitrary keyword arguments containing 'user_prompt' and 'system_prompt'.

    {}

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the user prompt, system prompt, and chat interaction results.

    Example CURL Request for chat interaction:

    /usr/bin/curl -X POST localhost:3001/api/v1/chat             -H \"Content-Type: application/json\"             -d '{\n        \"user_prompt\": \"What is the capital of France?\",\n        \"system_prompt\": \"The capital of France is\"\n    }' | jq\n

    "},{"location":"text/api/instruction_tuning/#geniusrise_text.instruction.api.InstructionAPI.chat_llama_cpp","title":"chat_llama_cpp(**kwargs)","text":"

    Handles POST requests to generate chat completions using the llama.cpp engine. This method accepts various parameters for customizing the chat completion request, including messages, sampling settings, and more.

    Parameters:

    Name Type Description Default messages List[Dict[str, str]]

    The chat messages for generating a response.

    required functions Optional[List[Dict]]

    A list of functions to use for the chat completion (advanced usage).

    required function_call Optional[Dict]

    A function call to use for the chat completion (advanced usage).

    required tools Optional[List[Dict]]

    A list of tools to use for the chat completion (advanced usage).

    required tool_choice Optional[Dict]

    A tool choice option for the chat completion (advanced usage).

    required temperature float

    The temperature to use for sampling, controlling randomness.

    required top_p float

    The nucleus sampling's top-p parameter, controlling diversity.

    required top_k int

    The top-k sampling parameter, limiting the token selection pool.

    required min_p float

    The minimum probability threshold for sampling.

    required typical_p float

    The typical-p parameter for locally typical sampling.

    required stream bool

    Flag to stream the results.

    required stop Optional[Union[str, List[str]]]

    Tokens or sequences where generation should stop.

    required seed Optional[int]

    Seed for random number generation to ensure reproducibility.

    required response_format Optional[Dict]

    Specifies the format of the generated response.

    required max_tokens Optional[int]

    Maximum number of tokens to generate.

    required presence_penalty float

    Penalty for token presence to discourage repetition.

    required frequency_penalty float

    Penalty for token frequency to discourage common tokens.

    required repeat_penalty float

    Penalty applied to tokens that are repeated.

    required tfs_z float

    Tail-free sampling parameter to adjust the likelihood of tail tokens.

    required mirostat_mode int

    Mirostat sampling mode for dynamic adjustments.

    required mirostat_tau float

    Tau parameter for mirostat sampling, controlling deviation.

    required mirostat_eta float

    Eta parameter for mirostat sampling, controlling adjustment speed.

    required model Optional[str]

    Specifies the model to use for generation.

    required logits_processor Optional[List]

    List of logits processors for advanced generation control.

    required grammar Optional[Dict]

    Specifies grammar rules for the generated text.

    required logit_bias Optional[Dict[str, float]]

    Adjustments to the logits of specified tokens.

    required logprobs Optional[bool]

    Whether to include log probabilities in the output.

    required top_logprobs Optional[int]

    Number of top log probabilities to include.

    required

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the chat completion response or an error message.

    Example CURL Request:

    curl -X POST \"http://localhost:3000/api/v1/chat_llama_cpp\"             -H \"Content-Type: application/json\"             -d '{\n        \"messages\": [\n            {\"role\": \"user\", \"content\": \"What is the capital of France?\"},\n            {\"role\": \"system\", \"content\": \"The capital of France is\"}\n        ],\n        \"temperature\": 0.2,\n        \"top_p\": 0.95,\n        \"top_k\": 40,\n        \"max_tokens\": 50,\n    }'\n

    "},{"location":"text/api/instruction_tuning/#geniusrise_text.instruction.api.InstructionAPI.chat_vllm","title":"chat_vllm(**kwargs)","text":"

    Handles POST requests to generate chat completions using the VLLM (Versatile Language Learning Model) engine. This method accepts various parameters for customizing the chat completion request, including message content, generation settings, and more.

    Parameters:

    Name Type Description Default messages List[Dict[str, str]]

    The chat messages for generating a response. Each message should include a 'role' (either 'user' or 'system') and 'content'.

    required temperature float

    The sampling temperature. Defaults to 0.7. Higher values generate more random completions.

    required top_p float

    The nucleus sampling probability. Defaults to 1.0. A smaller value leads to higher diversity.

    required n int

    The number of completions to generate. Defaults to 1.

    required max_tokens int

    The maximum number of tokens to generate. Controls the length of the generated response.

    required stop Union[str, List[str]]

    Sequence(s) where the generation should stop. Can be a single string or a list of strings.

    required stream bool

    Whether to stream the response. Streaming may be useful for long completions.

    required presence_penalty float

    Adjusts the likelihood of tokens based on their presence in the conversation so far. Defaults to 0.0.

    required frequency_penalty float

    Adjusts the likelihood of tokens based on their frequency in the conversation so far. Defaults to 0.0.

    required logit_bias Dict[str, float]

    Adjustments to the logits of specified tokens, identified by token IDs as keys and adjustment values as values.

    required user str

    An identifier for the user making the request. Can be used for logging or customization.

    required best_of int

    Generates 'n' completions server-side and returns the best one. Higher values incur more computation cost.

    required top_k int

    Filters the generated tokens to the top-k tokens with the highest probabilities. Defaults to -1, which disables top-k filtering.

    required ignore_eos bool

    Whether to ignore the end-of-sentence token in generation. Useful for more fluid continuations.

    required use_beam_search bool

    Whether to use beam search instead of sampling for generation. Beam search can produce more coherent results.

    required stop_token_ids List[int]

    List of token IDs that should cause generation to stop.

    required skip_special_tokens bool

    Whether to skip special tokens (like padding or end-of-sequence tokens) in the output.

    required spaces_between_special_tokens bool

    Whether to insert spaces between special tokens in the output.

    required add_generation_prompt bool

    Whether to prepend the generation prompt to the output.

    required echo bool

    Whether to include the input prompt in the output.

    required repetition_penalty float

    Penalty applied to tokens that have been generated previously. Defaults to 1.0, which applies no penalty.

    required min_p float

    Sets a minimum threshold for token probabilities. Tokens with probabilities below this threshold are filtered out.

    required include_stop_str_in_output bool

    Whether to include the stop string(s) in the output.

    required length_penalty float

    Exponential penalty to the length for beam search. Only relevant if use_beam_search is True.

    required

    Dict[str, Any]: A dictionary with the chat completion response or an error message.

    Example CURL Request:

    curl -X POST \"http://localhost:3000/api/v1/chat_vllm\"             -H \"Content-Type: application/json\"             -d '{\n        \"messages\": [\n            {\"role\": \"user\", \"content\": \"Whats the weather like in London?\"}\n        ],\n        \"temperature\": 0.7,\n        \"top_p\": 1.0,\n        \"n\": 1,\n        \"max_tokens\": 50,\n        \"stream\": false,\n        \"presence_penalty\": 0.0,\n        \"frequency_penalty\": 0.0,\n        \"logit_bias\": {},\n        \"user\": \"example_user\"\n    }'\n
    This request asks the VLLM engine to generate a completion for the provided chat context, with specified generation settings.

    "},{"location":"text/api/instruction_tuning/#geniusrise_text.instruction.api.InstructionAPI.complete","title":"complete(**kwargs)","text":"
        Handles POST requests to generate text based on the given prompt and decoding strategy. It uses the pre-trained\n    model specified in the setup to generate a completion for the input prompt.\n\n    Args:\n        **kwargs (Any): Arbitrary keyword arguments containing the 'prompt' and other parameters for text generation.\n\n    Returns:\n        Dict[str, Any]: A dictionary containing the original prompt and the generated completion.\n\n    Example CURL Requests:\n    ```bash\n    /usr/bin/curl -X POST localhost:3001/api/v1/complete             -H \"Content-Type: application/json\"             -d '{\n            \"prompt\": \"<|system|>\n

    <|end|> <|user|> How do I sort a list in Python?<|end|> <|assistant|>\", \"decoding_strategy\": \"generate\", \"max_new_tokens\": 100, \"do_sample\": true, \"temperature\": 0.7, \"top_k\": 50, \"top_p\": 0.95 }' | jq ```

    "},{"location":"text/api/instruction_tuning/#geniusrise_text.instruction.api.InstructionAPI.initialize_pipeline","title":"initialize_pipeline()","text":"

    Lazy initialization of the Hugging Face pipeline for chat interaction.

    "},{"location":"text/api/language_model/","title":"Language Model","text":"

    Bases: TextAPI

    LanguageModelAPI is a class for interacting with pre-trained language models to generate text. It allows for customizable text generation via a CherryPy web server, handling requests and generating responses using a specified language model. This class is part of the GeniusRise ecosystem for facilitating NLP tasks.

    Attributes:

    Name Type Description model Any

    The loaded language model used for text generation.

    tokenizer Any

    The tokenizer corresponding to the language model, used for processing input text.

    Methods

    complete(**kwargs: Any) -> Dict[str, Any]: Generates text based on provided prompts and model parameters.

    CLI Usage Example:

    genius LanguageModelAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\n--id mistralai/Mistral-7B-v0.1-lol \\\nlisten \\\n--args \\\nmodel_name=\"mistralai/Mistral-7B-v0.1\" \\\nmodel_class=\"AutoModelForCausalLM\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nprecision=\"float16\" \\\nquantization=0 \\\ndevice_map=\"auto\" \\\nmax_memory=None \\\ntorchscript=False \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n

    or using VLLM:

    genius LanguageModelAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\n--id mistralai/Mistral-7B-v0.1 \\\nlisten \\\n--args \\\nmodel_name=\"mistralai/Mistral-7B-v0.1\" \\\nmodel_class=\"AutoModelForCausalLM\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nprecision=\"bfloat16\" \\\nuse_vllm=True \\\nvllm_enforce_eager=True \\\nvllm_max_model_len=2048 \\\nconcurrent_queries=False \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n

    or using llama.cpp:

    genius LanguageModelAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\nlisten \\\n--args \\\nmodel_name=\"TheBloke/Mistral-7B-v0.1-GGUF\" \\\nmodel_class=\"AutoModelForCausalLM\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nuse_llama_cpp=True \\\nllama_cpp_filename=\"mistral-7b-v0.1.Q4_K_M.gguf\" \\\nllama_cpp_n_gpu_layers=35 \\\nllama_cpp_n_ctx=32768 \\\nconcurrent_queries=False \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n

    "},{"location":"text/api/language_model/#geniusrise_text.language_model.api.LanguageModelAPI.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the LanguageModelAPI with configurations for the input, output, and state management, along with any additional model-specific parameters.

    Parameters:

    Name Type Description Default input BatchInput

    The configuration for input data handling.

    required output BatchOutput

    The configuration for output data handling.

    required state State

    The state management for the API.

    required **kwargs Any

    Additional keyword arguments for model configuration and API setup.

    {}"},{"location":"text/api/language_model/#geniusrise_text.language_model.api.LanguageModelAPI.complete","title":"complete(**kwargs)","text":"

    Handles POST requests to generate text based on a given prompt and model-specific parameters. This method is exposed as a web endpoint through CherryPy and returns a JSON response containing the original prompt, the generated text, and any additional returned information from the model.

    Parameters:

    Name Type Description Default **kwargs Any

    Arbitrary keyword arguments containing the prompt, and any additional parameters

    {}

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary with the original prompt, generated text, and other model-specific information.

    Example CURL Request:

    /usr/bin/curl -X POST localhost:3000/api/v1/complete \\\n-H \"Content-Type: application/json\" \\\n-d '{\n        \"prompt\": \"Below is an instruction that describes a task. Write a response that appropriately completes the request.\\n\\n### Instruction:\\nWrite a PRD for Oauth auth using keycloak\\n\\n### Response:\",\n        \"decoding_strategy\": \"generate\",\n        \"max_new_tokens\": 1024,\n        \"do_sample\": true\n    }' | jq\n

    "},{"location":"text/api/language_model/#geniusrise_text.language_model.api.LanguageModelAPI.complete_llama_cpp","title":"complete_llama_cpp(**kwargs)","text":"

    Handles POST requests to generate chat completions using the llama.cpp engine. This method accepts various parameters for customizing the chat completion request, including messages, sampling settings, and more.

    Parameters:

    Name Type Description Default prompt

    The prompt to generate text from.

    required suffix

    A suffix to append to the generated text. If None, no suffix is appended.

    required max_tokens

    The maximum number of tokens to generate. If max_tokens <= 0 or None, the maximum number of tokens to generate is unlimited and depends on n_ctx.

    required temperature

    The temperature to use for sampling.

    required top_p

    The top-p value to use for nucleus sampling. Nucleus sampling described in academic paper \"The Curious Case of Neural Text Degeneration\" https://arxiv.org/abs/1904.09751

    required min_p

    The min-p value to use for minimum p sampling. Minimum P sampling as described in https://github.com/ggerganov/llama.cpp/pull/3841

    required typical_p

    The typical-p value to use for sampling. Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666.

    required logprobs

    The number of logprobs to return. If None, no logprobs are returned.

    required echo

    Whether to echo the prompt.

    required stop

    A list of strings to stop generation when encountered.

    required frequency_penalty

    The penalty to apply to tokens based on their frequency in the prompt.

    required presence_penalty

    The penalty to apply to tokens based on their presence in the prompt.

    required repeat_penalty

    The penalty to apply to repeated tokens.

    required top_k

    The top-k value to use for sampling. Top-K sampling described in academic paper \"The Curious Case of Neural Text Degeneration\" https://arxiv.org/abs/1904.09751

    required stream

    Whether to stream the results.

    required seed

    The seed to use for sampling.

    required tfs_z

    The tail-free sampling parameter. Tail Free Sampling described in https://www.trentonbricken.com/Tail-Free-Sampling/.

    required mirostat_mode

    The mirostat sampling mode.

    required mirostat_tau

    The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text.

    required mirostat_eta

    The learning rate used to update mu based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause mu to be updated more quickly, while a smaller learning rate will result in slower updates.

    required model

    The name to use for the model in the completion object.

    required stopping_criteria

    A list of stopping criteria to use.

    required logits_processor

    A list of logits processors to use.

    required grammar

    A grammar to use for constrained sampling.

    required logit_bias

    A logit bias to use.

    required

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the chat completion response or an error message.

    Example CURL Request:

    curl -X POST \"http://localhost:3001/api/v1/complete_llama_cpp\"             -H \"Content-Type: application/json\"             -d '{\n        \"prompt\": \"Whats the weather like in London?\",\n        \"temperature\": 0.7,\n        \"top_p\": 0.95,\n        \"top_k\": 40,\n        \"max_tokens\": 50,\n        \"repeat_penalty\": 1.1\n    }'\n

    "},{"location":"text/api/language_model/#geniusrise_text.language_model.api.LanguageModelAPI.complete_vllm","title":"complete_vllm(**kwargs)","text":"

    Handles POST requests to generate chat completions using the VLLM (Versatile Language Learning Model) engine. This method accepts various parameters for customizing the chat completion request, including message content, generation settings, and more.

    • **kwargs (Any): Arbitrary keyword arguments. Expects data in JSON format containing any of the following keys:
      • messages (Union[str, List[Dict[str, str]]]): The messages for the chat context.
      • temperature (float, optional): The sampling temperature. Defaults to 0.7.
      • top_p (float, optional): The nucleus sampling probability. Defaults to 1.0.
      • n (int, optional): The number of completions to generate. Defaults to 1.
      • max_tokens (int, optional): The maximum number of tokens to generate.
      • stop (Union[str, List[str]], optional): Stop sequence to end generation.
      • stream (bool, optional): Whether to stream the response. Defaults to False.
      • presence_penalty (float, optional): The presence penalty. Defaults to 0.0.
      • frequency_penalty (float, optional): The frequency penalty. Defaults to 0.0.
      • logit_bias (Dict[str, float], optional): Adjustments to the logits of specified tokens.
      • user (str, optional): An identifier for the user making the request.
      • (Additional model-specific parameters)

    Dict[str, Any]: A dictionary with the chat completion response or an error message.

    Example CURL Request:

    curl -v -X POST \"http://localhost:3000/api/v1/complete_vllm\"             -H \"Content-Type: application/json\"             -u \"user:password\"             -d '{\n        \"messages\": [\"Whats the weather like in London?\"],\n        \"temperature\": 0.7,\n        \"top_p\": 1.0,\n        \"n\": 1,\n        \"max_tokens\": 50,\n        \"stream\": false,\n        \"presence_penalty\": 0.0,\n        \"frequency_penalty\": 0.0,\n        \"logit_bias\": {},\n        \"user\": \"example_user\"\n    }'\n
    This request asks the VLLM engine to generate a completion for the provided chat context, with specified generation settings.

    "},{"location":"text/api/ner/","title":"Named Entity Recognition","text":"

    Bases: TextAPI

    NamedEntityRecognitionAPI serves a Named Entity Recognition (NER) model using the Hugging Face transformers library. It is designed to recognize and classify named entities in text into predefined categories such as the names of persons, organizations, locations, expressions of times, quantities, monetary values, percentages, etc.

    Attributes:

    Name Type Description model Any

    The loaded NER model, typically a Hugging Face transformer model specialized for token classification.

    tokenizer Any

    The tokenizer for preprocessing text compatible with the loaded model.

    Example CLI Usage:

    genius NamedEntityRecognitionAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\n--id dslim/bert-large-NER-lol \\\nlisten \\\n--args \\\nmodel_name=\"dslim/bert-large-NER\" \\\nmodel_class=\"AutoModelForTokenClassification\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nprecision=\"float\" \\\nquantization=0 \\\ndevice_map=\"cuda:0\" \\\nmax_memory=None \\\ntorchscript=False \\\nendpoint=\"0.0.0.0\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n

    "},{"location":"text/api/ner/#geniusrise_text.ner.api.NamedEntityRecognitionAPI.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the NamedEntityRecognitionAPI class.

    Parameters:

    Name Type Description Default input BatchInput

    The input data.

    required output BatchOutput

    The output data.

    required state State

    The state data.

    required **kwargs Any

    Additional keyword arguments.

    {}"},{"location":"text/api/ner/#geniusrise_text.ner.api.NamedEntityRecognitionAPI.initialize_pipeline","title":"initialize_pipeline()","text":"

    Lazy initialization of the NER Hugging Face pipeline.

    "},{"location":"text/api/ner/#geniusrise_text.ner.api.NamedEntityRecognitionAPI.ner_pipeline","title":"ner_pipeline(**kwargs)","text":"

    Recognizes named entities in the input text using the Hugging Face pipeline.

    This method leverages a pre-trained NER model to identify and classify entities in text into categories such as names, organizations, locations, etc. It's suitable for processing various types of text content.

    Parameters:

    Name Type Description Default **kwargs Any

    Arbitrary keyword arguments, typically containing 'text' for the input text.

    {}

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the original input text and a list of recognized entities.

    Example CURL Request for NER:

    curl -X POST localhost:3000/api/v1/ner_pipeline             -H \"Content-Type: application/json\"             -d '{\"text\": \"John Doe works at OpenAI in San Francisco.\"}' | jq\n

    "},{"location":"text/api/ner/#geniusrise_text.ner.api.NamedEntityRecognitionAPI.recognize_entities","title":"recognize_entities(**kwargs)","text":"

    Endpoint for recognizing named entities in the input text using the loaded NER model.

    Parameters:

    Name Type Description Default **kwargs Any

    Arbitrary keyword arguments, typically containing 'text' for the input text.

    {}

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the original input text and a list of recognized entities with their respective types.

    Example CURL Requests:

    curl -X POST localhost:3000/api/v1/recognize_entities \\\n-H \"Content-Type: application/json\" \\\n-d '{\"text\": \"John Doe works at OpenAI in San Francisco.\"}' | jq\n

    curl -X POST localhost:3000/api/v1/recognize_entities \\\n-H \"Content-Type: application/json\" \\\n-d '{\"text\": \"Alice is going to visit the Eiffel Tower in Paris next summer.\"}' | jq\n
    "},{"location":"text/api/nli/","title":"Natural Language Inference","text":"

    Bases: TextAPI

    Represents a Natural Language Inference (NLI) API leveraging Hugging Face's transformer models. This class is capable of handling various NLI tasks such as entailment, classification, similarity checking, and more. Utilizes CherryPy for exposing API endpoints that can be interacted with via standard HTTP requests.

    Attributes:

    Name Type Description model AutoModelForSequenceClassification

    The loaded Hugging Face model for sequence classification tasks.

    tokenizer AutoTokenizer

    The tokenizer corresponding to the model, used for processing input text.

    CLI Usage Example: For interacting with the NLI API, you would typically start the server using a command similar to one listed in the provided examples. After the server is running, you can use CURL commands to interact with the different endpoints.

    Example:

    genius NLIAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\n--id \"MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7-lol\" \\\nlisten \\\n--args \\\nmodel_name=\"MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7\" \\\nmodel_class=\"AutoModelForSequenceClassification\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nprecision=\"float\" \\\nquantization=0 \\\ndevice_map=\"cuda:0\" \\\nmax_memory=None \\\ntorchscript=False \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n
    "},{"location":"text/api/nli/#geniusrise_text.nli.api.NLIAPI.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the NLIAPI with configurations for handling input, output, and state management.

    Parameters:

    Name Type Description Default input BatchInput

    Configuration for the input data.

    required output BatchOutput

    Configuration for the output data.

    required state State

    State management for the API.

    required **kwargs Any

    Additional keyword arguments for extended functionality.

    {}"},{"location":"text/api/nli/#geniusrise_text.nli.api.NLIAPI.classify","title":"classify(**kwargs)","text":"

    Endpoint for classifying the input text into one of the provided candidate labels using zero-shot classification.

    Parameters:

    Name Type Description Default **kwargs Any

    Arbitrary keyword arguments, typically containing 'text' and 'candidate_labels'.

    {}

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the input text, candidate labels, and classification scores.

    Example CURL Request:

    curl -X POST localhost:3000/api/v1/classify \\\n-H \"Content-Type: application/json\" \\\n-d '{\n        \"text\": \"The new movie is a thrilling adventure in space\",\n        \"candidate_labels\": [\"entertainment\", \"politics\", \"business\"]\n    }'\n

    "},{"location":"text/api/nli/#geniusrise_text.nli.api.NLIAPI.detect_intent","title":"detect_intent(**kwargs)","text":"

    Detects the intent of the input text from a list of possible intents.

    Parameters:

    Name Type Description Default text str

    The input text.

    required intents List[str]

    A list of possible intents.

    required

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the input text and detected intent with its score.

    Example CURL Request:

    /usr/bin/curl -X POST localhost:3000/api/v1/detect_intent \\\n-H \"Content-Type: application/json\" \\\n-d '{\n        \"text\": \"Theres something magical about Recurrent Neural Networks (RNNs). I still remember when I trained my first recurrent network for Image Captioning. Within a few dozen minutes of training my first baby model (with rather arbitrarily-chosen hyperparameters) started to generate very nice looking descriptions of images that were on the edge of making sense. Sometimes the ratio of how simple your model is to the quality of the results you get out of it blows past your expectations, and this was one of those times. What made this result so shocking at the time was that the common wisdom was that RNNs were supposed to be difficult to train (with more experience Ive in fact reached the opposite conclusion). Fast forward about a year: Im training RNNs all the time and Ive witnessed their power and robustness many times, and yet their magical outputs still find ways of amusing me.\",\n        \"intents\": [\"teach\",\"sell\",\"note\",\"advertise\",\"promote\"]\n    }' | jq\n

    "},{"location":"text/api/nli/#geniusrise_text.nli.api.NLIAPI.entailment","title":"entailment(**kwargs)","text":"

    Endpoint for evaluating the entailment relationship between a premise and a hypothesis. It returns the relationship scores across possible labels like entailment, contradiction, and neutral.

    Parameters:

    Name Type Description Default **kwargs Any

    Arbitrary keyword arguments, typically containing 'premise' and 'hypothesis'.

    {}

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the premise, hypothesis, and their relationship scores.

    Example CURL Request:

    /usr/bin/curl -X POST localhost:3000/api/v1/entailment \\\n-H \"Content-Type: application/json\" \\\\\\\n-d '{\n        \"premise\": \"This a very good entry level smartphone, battery last 2-3 days after fully charged when connected to the internet. No memory lag issue when playing simple hidden object games. Performance is beyond my expectation, i bought it with a good bargain, couldnt ask for more!\",\n        \"hypothesis\": \"the phone has an awesome battery life\"\n    }' | jq\n
    ```

    "},{"location":"text/api/nli/#geniusrise_text.nli.api.NLIAPI.fact_checking","title":"fact_checking(**kwargs)","text":"

    Performs fact checking on a statement given a context.

    Parameters:

    Name Type Description Default context str

    The context or background information.

    required statement str

    The statement to fact check.

    required

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing fact checking scores.

    Example CURL Request:

    /usr/bin/curl -X POST localhost:3000/api/v1/fact_checking \\\n-H \"Content-Type: application/json\" \\\n-d '{\n        \"context\": \"Theres something magical about Recurrent Neural Networks (RNNs). I still remember when I trained my first recurrent network for Image Captioning. Within a few dozen minutes of training my first baby model (with rather arbitrarily-chosen hyperparameters) started to generate very nice looking descriptions of images that were on the edge of making sense. Sometimes the ratio of how simple your model is to the quality of the results you get out of it blows past your expectations, and this was one of those times. What made this result so shocking at the time was that the common wisdom was that RNNs were supposed to be difficult to train (with more experience Ive in fact reached the opposite conclusion). Fast forward about a year: Im training RNNs all the time and Ive witnessed their power and robustness many times, and yet their magical outputs still find ways of amusing me.\",\n        \"statement\": \"The author is looking for a home loan\"\n    }' | jq\n

    "},{"location":"text/api/nli/#geniusrise_text.nli.api.NLIAPI.initialize_pipeline","title":"initialize_pipeline()","text":"

    Lazy initialization of the NLI Hugging Face pipeline.

    "},{"location":"text/api/nli/#geniusrise_text.nli.api.NLIAPI.question_answering","title":"question_answering(**kwargs)","text":"

    Performs question answering for multiple choice questions.

    Parameters:

    Name Type Description Default question str

    The question text.

    required choices List[str]

    A list of possible answers.

    required

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the scores for each answer choice.

    Example CURL Request:

    /usr/bin/curl -X POST localhost:3000/api/v1/question_answering \\\n-H \"Content-Type: application/json\" \\\n-d '{\n        \"question\": \"[ML-1T-2] is the dimensional formula of\",\n        \"choices\": [\"force\", \"coefficient of friction\", \"modulus of elasticity\", \"energy\"]\n    }' | jq\n

    "},{"location":"text/api/nli/#geniusrise_text.nli.api.NLIAPI.textual_similarity","title":"textual_similarity(**kwargs)","text":"

    Evaluates the textual similarity between two texts.

    Parameters:

    Name Type Description Default text1 str

    The first text.

    required text2 str

    The second text.

    required

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing similarity score.

    Example CURL Request:

    /usr/bin/curl -X POST localhost:3000/api/v1/textual_similarity \\\n-H \"Content-Type: application/json\" \\\n-d '{\n        \"text1\": \"Theres something magical about Recurrent Neural Networks (RNNs). I still remember when I trained my first recurrent network for Image Captioning. Within a few dozen minutes of training my first baby model (with rather arbitrarily-chosen hyperparameters) started to generate very nice looking descriptions of images that were on the edge of making sense. Sometimes the ratio of how simple your model is to the quality of the results you get out of it blows past your expectations, and this was one of those times. What made this result so shocking at the time was that the common wisdom was that RNNs were supposed to be difficult to train (with more experience Ive in fact reached the opposite conclusion). Fast forward about a year: Im training RNNs all the time and Ive witnessed their power and robustness many times, and yet their magical outputs still find ways of amusing me.\",\n        \"text2\": \"There is something magical about training neural networks. Their simplicity coupled with their power is astonishing.\"\n    }' | jq\n

    "},{"location":"text/api/nli/#geniusrise_text.nli.api.NLIAPI.zero_shot_classification","title":"zero_shot_classification(**kwargs)","text":"

    Performs zero-shot classification using the Hugging Face pipeline. It allows classification of text without explicitly provided labels.

    Parameters:

    Name Type Description Default **kwargs Any

    Arbitrary keyword arguments, typically containing 'premise' and 'hypothesis'.

    {}

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the premise, hypothesis, and their classification scores.

    Example CURL Request for zero-shot classification:

    curl -X POST localhost:3000/api/v1/zero_shot_classification             -H \"Content-Type: application/json\"             -d '{\n        \"premise\": \"A new study shows that the Mediterranean diet is good for heart health.\",\n        \"hypothesis\": \"The study is related to diet and health.\"\n    }' | jq\n

    "},{"location":"text/api/question_answering/","title":"Question Answering","text":"

    Bases: TextAPI

    "},{"location":"text/api/question_answering/#geniusrise_text.qa.api.QAAPI.tokenizer","title":"tokenizer: AutoTokenizer instance-attribute","text":"

    A class for handling different types of QA models, including traditional QA, TAPAS (Table-based QA), and TAPEX. It utilizes the Hugging Face transformers library to provide state-of-the-art question answering capabilities across various formats of data including plain text and tabular data.

    Attributes:

    Name Type Description model AutoModelForQuestionAnswering | AutoModelForTableQuestionAnswering

    The pre-trained QA model (traditional, TAPAS, or TAPEX).

    tokenizer AutoTokenizer

    The tokenizer used to preprocess input text.

    Methods

    answer(self, **kwargs: Any) -> Dict[str, Any]: Answers questions based on the provided context (text or table).

    CLI Usage Example:

    genius QAAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\n--id distilbert-base-uncased-distilled-squad-lol \\\nlisten \\\n--args \\\nmodel_name=\"distilbert-base-uncased-distilled-squad\" \\\nmodel_class=\"AutoModelForQuestionAnswering\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nprecision=\"float\" \\\nquantization=0 \\\ndevice_map=\"cuda:0\" \\\nmax_memory=None \\\ntorchscript=False \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n

    genius QAAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\n--id google/tapas-base-finetuned-wtq-lol \\\nlisten \\\n--args \\\nmodel_name=\"google/tapas-base-finetuned-wtq\" \\\nmodel_class=\"AutoModelForTableQuestionAnswering\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nprecision=\"float\" \\\nquantization=0 \\\ndevice_map=\"cuda:0\" \\\nmax_memory=None \\\ntorchscript=False \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n
    genius QAAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\n--id microsoft/tapex-large-finetuned-wtq-lol \\\nlisten \\\n--args \\\nmodel_name=\"microsoft/tapex-large-finetuned-wtq\" \\\nmodel_class=\"AutoModelForSeq2SeqLM\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nprecision=\"float\" \\\nquantization=0 \\\ndevice_map=\"cuda:0\" \\\nmax_memory=None \\\ntorchscript=False \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n
    "},{"location":"text/api/question_answering/#geniusrise_text.qa.api.QAAPI.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the QAAPI with configurations for input, output, and state management.

    Parameters:

    Name Type Description Default input BatchInput

    Configuration for the input data.

    required output BatchOutput

    Configuration for the output data.

    required state State

    State management for the API.

    required **kwargs Any

    Additional keyword arguments for extended functionality.

    {}"},{"location":"text/api/question_answering/#geniusrise_text.qa.api.QAAPI.answer","title":"answer(**kwargs)","text":"

    Answers questions based on the provided context (text or table). It adapts to the model type (traditional, TAPAS, TAPEX) and provides answers accordingly.

    Parameters:

    Name Type Description Default **kwargs Any

    Arbitrary keyword arguments, typically containing the 'question' and 'data' (context or table).

    {}

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the question, context/table, and answer(s).

    Example CURL Request for Text-based QA:

    curl -X POST localhost:3000/api/v1/answer \\\n-H \"Content-Type: application/json\" \\\n-d '{\"question\": \"What is the capital of France?\", \"data\": \"France is a country in Europe. Its capital is Paris.\"}'\n

    Example CURL Requests:

    /usr/bin/curl -X POST localhost:3000/api/v1/answer \\\n-H \"Content-Type: application/json\" \\\n-d '{\n        \"data\": \"Theres something magical about Recurrent Neural Networks (RNNs). I still remember when I trained my first recurrent network for Image Captioning. Within a few dozen minutes of training my first baby model (with rather arbitrarily-chosen hyperparameters) started to generate very nice looking descriptions of images that were on the edge of making sense. Sometimes the ratio of how simple your model is to the quality of the results you get out of it blows past your expectations, and this was one of those times. What made this result so shocking at the time was that the common wisdom was that RNNs were supposed to be difficult to train (with more experience Ive in fact reached the opposite conclusion). Fast forward about a year: Im training RNNs all the time and Ive witnessed their power and robustness many times, and yet their magical outputs still find ways of amusing me.\",\n        \"question\": \"What is the common wisdom about RNNs?\"\n    }' | jq\n

    /usr/bin/curl -X POST localhost:3000/api/v1/answer \\\n-H \"Content-Type: application/json\" \\\n-d '{\n    \"data\": [\n        {\"Name\": \"Alice\", \"Age\": \"30\"},\n        {\"Name\": \"Bob\", \"Age\": \"25\"}\n    ],\n    \"question\": \"what is their total age?\"\n}\n' | jq\n
    /usr/bin/curl -X POST localhost:3000/api/v1/answer \\\n-H \"Content-Type: application/json\" \\\n-d '{\n    \"data\": {\"Actors\": [\"Brad Pitt\", \"Leonardo Di Caprio\", \"George Clooney\"], \"Number of movies\": [\"87\", \"53\", \"69\"]},\n    \"question\": \"how many movies does Leonardo Di Caprio have?\"\n}\n' | jq\n
    "},{"location":"text/api/question_answering/#geniusrise_text.qa.api.QAAPI.answer_pipeline","title":"answer_pipeline(**kwargs)","text":"

    Answers questions using the Hugging Face pipeline based on the provided context.

    Parameters:

    Name Type Description Default **kwargs Any

    Arbitrary keyword arguments, typically containing 'question' and 'data'.

    {}

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the question, context, and the answer.

    Example CURL Request for QA:

    curl -X POST localhost:3000/api/v1/answer_pipeline             -H \"Content-Type: application/json\"             -d '{\"question\": \"Who is the CEO of Tesla?\", \"data\": \"Elon Musk is the CEO of Tesla.\"}'\n

    "},{"location":"text/api/question_answering/#geniusrise_text.qa.api.QAAPI.answer_table_question","title":"answer_table_question(data, question, model_type)","text":"

    Answers a question based on the provided table.

    Parameters:

    Name Type Description Default data Dict[str, Any]

    The table data and other parameters.

    required question str

    The question to be answered.

    required model_type str

    The type of the model ('tapas' or 'tapex').

    required

    Returns:

    Name Type Description str dict

    The answer derived from the table.

    "},{"location":"text/api/question_answering/#geniusrise_text.qa.api.QAAPI.initialize_pipeline","title":"initialize_pipeline()","text":"

    Lazy initialization of the QA Hugging Face pipeline.

    "},{"location":"text/api/summarization/","title":"Summarization","text":"

    Bases: TextAPI

    A class for serving a Hugging Face-based summarization model. This API provides an interface to submit text and receive a summarized version, utilizing state-of-the-art machine learning models for text summarization.

    Attributes:

    Name Type Description model AutoModelForSeq2SeqLM

    The loaded Hugging Face model for summarization.

    tokenizer AutoTokenizer

    The tokenizer for preprocessing text.

    Methods

    summarize(self, **kwargs: Any) -> Dict[str, Any]: Summarizes the input text based on the given parameters.

    CLI Usage:

    genius SummarizationAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\n--id facebook/bart-large-cnn-lol \\\nlisten \\\n--args \\\nmodel_name=\"facebook/bart-large-cnn\" \\\nmodel_class=\"AutoModelForSeq2SeqLM\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nprecision=\"float\" \\\nquantization=0 \\\ndevice_map=\"cuda:0\" \\\nmax_memory=None \\\ntorchscript=False \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n

    "},{"location":"text/api/summarization/#geniusrise_text.summarization.api.SummarizationAPI.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the SummarizationAPI class with input, output, and state configurations.

    Parameters:

    Name Type Description Default input BatchInput

    Configuration for input data.

    required output BatchOutput

    Configuration for output data.

    required state State

    State management for API.

    required **kwargs Any

    Additional keyword arguments for extended functionality.

    {}"},{"location":"text/api/summarization/#geniusrise_text.summarization.api.SummarizationAPI.initialize_pipeline","title":"initialize_pipeline()","text":"

    Lazy initialization of the summarization Hugging Face pipeline.

    "},{"location":"text/api/summarization/#geniusrise_text.summarization.api.SummarizationAPI.summarize","title":"summarize(**kwargs)","text":"

    Summarizes the input text based on the given parameters using a machine learning model. The method accepts parameters via a POST request and returns the summarized text.

    Parameters:

    Name Type Description Default **kwargs Any

    Arbitrary keyword arguments. Expected to receive these from the POST request's JSON body.

    {}

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the input text and its summary.

    Example CURL Requests:

    /usr/bin/curl -X POST localhost:3000/api/v1/summarize \\\n-H \"Content-Type: application/json\" \\\n-d '{\n        \"text\": \"Theres something magical about Recurrent Neural Networks (RNNs). I still remember when I trained my first recurrent network for Image Captioning. Within a few dozen minutes of training my first baby model (with rather arbitrarily-chosen hyperparameters) started to generate very nice looking descriptions of images that were on the edge of making sense. Sometimes the ratio of how simple your model is to the quality of the results you get out of it blows past your expectations, and this was one of those times. What made this result so shocking at the time was that the common wisdom was that RNNs were supposed to be difficult to train (with more experience Ive in fact reached the opposite conclusion). Fast forward about a year: Im training RNNs all the time and Ive witnessed their power and robustness many times, and yet their magical outputs still find ways of amusing me.\",\n        \"decoding_strategy\": \"generate\",\n        \"bos_token_id\": 0,\n        \"decoder_start_token_id\": 2,\n        \"early_stopping\": true,\n        \"eos_token_id\": 2,\n        \"forced_bos_token_id\": 0,\n        \"forced_eos_token_id\": 2,\n        \"length_penalty\": 2.0,\n        \"max_length\": 142,\n        \"min_length\": 56,\n        \"no_repeat_ngram_size\": 3,\n        \"num_beams\": 4,\n        \"pad_token_id\": 1,\n        \"do_sample\": false\n    }' | jq\n

    /usr/bin/curl -X POST localhost:3000/api/v1/summarize \\\n-H \"Content-Type: application/json\" \\\n-d '{\n        \"text\": \"Theres something magical about Recurrent Neural Networks (RNNs). I still remember when I trained my first recurrent network for Image Captioning. Within a few dozen minutes of training my first baby model (with rather arbitrarily-chosen hyperparameters) started to generate very nice looking descriptions of images that were on the edge of making sense. Sometimes the ratio of how simple your model is to the quality of the results you get out of it blows past your expectations, and this was one of those times. What made this result so shocking at the time was that the common wisdom was that RNNs were supposed to be difficult to train (with more experience Ive in fact reached the opposite conclusion). Fast forward about a year: Im training RNNs all the time and Ive witnessed their power and robustness many times, and yet their magical outputs still find ways of amusing me.\",\n        \"decoding_strategy\": \"generate\",\n        \"early_stopping\": true,\n        \"length_penalty\": 2.0,\n        \"max_length\": 142,\n        \"min_length\": 56,\n        \"no_repeat_ngram_size\": 3,\n        \"num_beams\": 4\n    }' | jq\n
    "},{"location":"text/api/summarization/#geniusrise_text.summarization.api.SummarizationAPI.summarize_pipeline","title":"summarize_pipeline(**kwargs)","text":"

    Summarizes the input text using the Hugging Face pipeline based on given parameters.

    Parameters:

    Name Type Description Default **kwargs Any

    Keyword arguments containing parameters for summarization.

    {}

    Returns:

    Type Description Dict[str, Any]

    A dictionary containing the input text and its summary.

    Example CURL Request for summarization: curl -X POST localhost:3000/api/v1/summarize_pipeline -H \"Content-Type: application/json\" -d '{\"text\": \"Your long text here\"}'

    "},{"location":"text/api/translation/","title":"Translation","text":"

    Bases: TextAPI

    A class for serving a Hugging Face-based translation model as a web API. This API allows users to submit text for translation and receive translated text in the specified target language using advanced machine learning models.

    Parameters:

    Name Type Description Default input BatchInput

    Configurations and data inputs for the batch process.

    required output BatchOutput

    Configurations for output data handling.

    required state State

    State management for the translation task.

    required **kwargs Any

    Additional keyword arguments for extended configurations.

    {}

    Example CLI Usage for interacting with the API:

    To start the API server:

    genius TranslationAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\n--id facebook/mbart-large-50-many-to-many-mmt-lol \\\nlisten \\\n--args \\\nmodel_name=\"facebook/mbart-large-50-many-to-many-mmt\" \\\nmodel_class=\"AutoModelForSeq2SeqLM\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nprecision=\"float\" \\\nquantization=0 \\\ndevice_map=\"cuda:0\" \\\nmax_memory=None \\\ntorchscript=False \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n

    To translate text using the API:

    curl -X POST localhost:8080/translate \\\n-H \"Content-Type: application/json\" \\\n-d '{\n        \"text\": \"Hello, world!\",\n        \"source_lang\": \"en\",\n        \"target_lang\": \"fr\",\n        \"decoding_strategy\": \"beam_search\",\n        \"num_beams\": 5\n    }'\n

    "},{"location":"text/api/translation/#geniusrise_text.translation.api.TranslationAPI.initialize_pipeline","title":"initialize_pipeline()","text":"

    Lazy initialization of the translation Hugging Face pipeline.

    "},{"location":"text/api/translation/#geniusrise_text.translation.api.TranslationAPI.translate","title":"translate(**kwargs)","text":"

    Translates text to a specified target language using the underlying Hugging Face model.

    This endpoint accepts JSON data with the text and language details, processes it through the machine learning model, and returns the translated text.

    Parameters:

    Name Type Description Default **kwargs Any

    Arbitrary keyword arguments, usually empty as parameters are in the POST body.

    {} POST body parameters

    text (str): The text to be translated. decoding_strategy (str): Strategy to use for decoding text; e.g., 'beam_search', 'greedy'. Default is 'generate'. source_lang (str): Source language code. target_lang (str): Target language code. Default is 'en'. additional_params (dict): Other model-specific parameters for translation.

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary with the original text, target language, and translated text.

    Example CURL requests:

    To translate text from English to French:

    curl -X POST localhost:8080/translate \\\n-H \"Content-Type: application/json\" \\\n-d '{\n        \"text\": \"Hello, world!\",\n        \"source_lang\": \"en\",\n        \"target_lang\": \"fr\",\n        \"decoding_strategy\": \"beam_search\",\n        \"num_beams\": 5\n    }'\n

    To translate text from Spanish to German:

    /usr/bin/curl -X POST localhost:3000/api/v1/translate \\\n-H \"Content-Type: application/json\" \\\n-d '{\n        \"text\": \"\u0938\u0902\u092f\u0941\u0915\u094d\u0924 \u0930\u093e\u0937\u094d\u091f\u094d\u0930 \u0915\u0947 \u092a\u094d\u0930\u092e\u0941\u0916 \u0915\u093e \u0915\u0939\u0928\u093e \u0939\u0948 \u0915\u093f \u0938\u0940\u0930\u093f\u092f\u093e \u092e\u0947\u0902 \u0915\u094b\u0908 \u0938\u0948\u0928\u094d\u092f \u0938\u092e\u093e\u0927\u093e\u0928 \u0928\u0939\u0940\u0902 \u0939\u0948\",\n        \"source_lang\": \"hi_IN\",\n        \"target_lang\": \"en_XX\",\n        \"decoding_strategy\": \"generate\",\n        \"decoder_start_token_id\": 2,\n        \"early_stopping\": true,\n        \"eos_token_id\": 2,\n        \"forced_eos_token_id\": 2,\n        \"max_length\": 200,\n        \"num_beams\": 5,\n        \"pad_token_id\": 1\n    }' | jq\n

    "},{"location":"text/api/translation/#geniusrise_text.translation.api.TranslationAPI.translate_pipeline","title":"translate_pipeline(**kwargs)","text":"

    Endpoint for translating text using a pre-initialized Hugging Face translation pipeline. This method is designed to handle translation requests more efficiently by utilizing a preloaded model and tokenizer, reducing the overhead of loading these components for each request.

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the original text, source language, target language, and the translated text.

    Example CURL Request for translation:

    curl -X POST localhost:8080/translate_pipeline             -H \"Content-Type: application/json\"             -d '{\n        \"text\": \"Hello, world!\",\n        \"source_lang\": \"en\",\n        \"target_lang\": \"fr\"\n    }'\n

    "},{"location":"text/bulk/base/","title":"Base Fine Tuner","text":"

    Bases: Bolt

    TextBulk is a foundational class for enabling bulk processing of text with various generation models. It primarily focuses on using Hugging Face models to provide a robust and efficient framework for large-scale text generation tasks. The class supports various decoding strategies to generate text that can be tailored to specific needs or preferences.

    Attributes:

    Name Type Description model AutoModelForCausalLM

    The language model for text generation.

    tokenizer AutoTokenizer

    The tokenizer for preparing input data for the model.

    Parameters:

    Name Type Description Default input BatchInput

    Configuration and data inputs for the batch process.

    required output BatchOutput

    Configurations for output data handling.

    required state State

    State management for the Bolt.

    required **kwargs

    Arbitrary keyword arguments for extended configurations.

    {} Methods

    text(**kwargs: Any) -> Dict[str, Any]: Provides an API endpoint for text generation functionality. Accepts various parameters for customizing the text generation process.

    generate(prompt: str, decoding_strategy: str = \"generate\", **generation_params: Any) -> dict: Generates text based on the provided prompt and parameters. Supports multiple decoding strategies for diverse applications.

    The class serves as a versatile tool for text generation, supporting various models and configurations. It can be extended or used as is for efficient text generation tasks.

    "},{"location":"text/bulk/base/#geniusrise_text.base.bulk.TextBulk.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the TextBulk with configurations and sets up logging. It prepares the environment for text generation tasks.

    Parameters:

    Name Type Description Default input BatchInput

    The input data configuration for the text generation task.

    required output BatchOutput

    The output data configuration for the results of the text generation.

    required state State

    The state configuration for the Bolt, managing its operational status.

    required **kwargs

    Additional keyword arguments for extended functionality and model configurations.

    {}"},{"location":"text/bulk/base/#geniusrise_text.base.bulk.TextBulk.generate","title":"generate(prompt, decoding_strategy='generate', **generation_params)","text":"

    Generate text completion for the given prompt using the specified decoding strategy.

    Parameters:

    Name Type Description Default prompt str

    The prompt to generate text completion for.

    required decoding_strategy str

    The decoding strategy to use. Defaults to \"generate\".

    'generate' **generation_params Any

    Additional parameters to pass to the decoding strategy.

    {}

    Returns:

    Name Type Description str str

    The generated text completion.

    Raises:

    Type Description Exception

    If an error occurs during generation.

    Supported decoding strategies and their additional parameters
    • \"generate\": Uses the model's default generation method. (Parameters: max_length, num_beams, etc.)
    • \"greedy_search\": Generates text using a greedy search decoding strategy. Parameters: max_length, eos_token_id, pad_token_id, output_attentions, output_hidden_states, output_scores, return_dict_in_generate, synced_gpus.
    • \"contrastive_search\": Generates text using contrastive search decoding strategy. Parameters: top_k, penalty_alpha, pad_token_id, eos_token_id, output_attentions, output_hidden_states, output_scores, return_dict_in_generate, synced_gpus, sequential.
    • \"sample\": Generates text using a sampling decoding strategy. Parameters: do_sample, temperature, top_k, top_p, max_length, pad_token_id, eos_token_id, output_attentions, output_hidden_states, output_scores, return_dict_in_generate, synced_gpus.
    • \"beam_search\": Generates text using beam search decoding strategy. Parameters: num_beams, max_length, pad_token_id, eos_token_id, output_attentions, output_hidden_states, output_scores, return_dict_in_generate, synced_gpus.
    • \"beam_sample\": Generates text using beam search with sampling decoding strategy. Parameters: num_beams, temperature, max_length, pad_token_id, eos_token_id, output_attentions, output_hidden_states, output_scores, return_dict_in_generate, synced_gpus.
    • \"group_beam_search\": Generates text using group beam search decoding strategy. Parameters: num_beams, diversity_penalty, max_length, pad_token_id, eos_token_id, output_attentions, output_hidden_states, output_scores, return_dict_in_generate, synced_gpus.
    • \"constrained_beam_search\": Generates text using constrained beam search decoding strategy. Parameters: num_beams, max_length, constraints, pad_token_id, eos_token_id, output_attentions, output_hidden_states, output_scores, return_dict_in_generate, synced_gpus.
    All generation parameters
    • max_length: Maximum length the generated tokens can have
    • max_new_tokens: Maximum number of tokens to generate, ignoring prompt tokens
    • min_length: Minimum length of the sequence to be generated
    • min_new_tokens: Minimum number of tokens to generate, ignoring prompt tokens
    • early_stopping: Stopping condition for beam-based methods
    • max_time: Maximum time allowed for computation in seconds
    • do_sample: Whether to use sampling for generation
    • num_beams: Number of beams for beam search
    • num_beam_groups: Number of groups for beam search to ensure diversity
    • penalty_alpha: Balances model confidence and degeneration penalty in contrastive search
    • use_cache: Whether the model should use past key/values attentions to speed up decoding
    • temperature: Modulates next token probabilities
    • top_k: Number of highest probability tokens to keep for top-k-filtering
    • top_p: Smallest set of most probable tokens with cumulative probability >= top_p
    • typical_p: Conditional probability of predicting a target token next
    • epsilon_cutoff: Tokens with a conditional probability > epsilon_cutoff will be sampled
    • eta_cutoff: Eta sampling, a hybrid of locally typical sampling and epsilon sampling
    • diversity_penalty: Penalty subtracted from a beam's score if it generates a token same as any other group
    • repetition_penalty: Penalty for repetition of ngrams
    • encoder_repetition_penalty: Penalty on sequences not in the original input
    • length_penalty: Exponential penalty to the length for beam-based generation
    • no_repeat_ngram_size: All ngrams of this size can only occur once
    • bad_words_ids: List of token ids that are not allowed to be generated
    • force_words_ids: List of token ids that must be generated
    • renormalize_logits: Renormalize the logits after applying all logits processors
    • constraints: Custom constraints for generation
    • forced_bos_token_id: Token ID to force as the first generated token
    • forced_eos_token_id: Token ID to force as the last generated token
    • remove_invalid_values: Remove possible NaN and inf outputs
    • exponential_decay_length_penalty: Exponentially increasing length penalty after a certain number of tokens
    • suppress_tokens: Tokens that will be suppressed during generation
    • begin_suppress_tokens: Tokens that will be suppressed at the beginning of generation
    • forced_decoder_ids: Mapping from generation indices to token indices that will be forced
    • sequence_bias: Maps a sequence of tokens to its bias term
    • guidance_scale: Guidance scale for classifier free guidance (CFG)
    • low_memory: Switch to sequential topk for contrastive search to reduce peak memory
    • num_return_sequences: Number of independently computed returned sequences for each batch element
    • output_attentions: Whether to return the attentions tensors of all layers
    • output_hidden_states: Whether to return the hidden states of all layers
    • output_scores: Whether to return the prediction scores
    • return_dict_in_generate: Whether to return a ModelOutput instead of a plain tuple
    • pad_token_id: The id of the padding token
    • bos_token_id: The id of the beginning-of-sequence token
    • eos_token_id: The id of the end-of-sequence token
    • max_length: The maximum length of the sequence to be generated
    • eos_token_id: End-of-sequence token ID
    • pad_token_id: Padding token ID
    • output_attentions: Return attention tensors of all attention layers if True
    • output_hidden_states: Return hidden states of all layers if True
    • output_scores: Return prediction scores if True
    • return_dict_in_generate: Return a ModelOutput instead of a plain tuple if True
    • synced_gpus: Continue running the while loop until max_length for ZeRO stage 3 if True
    • top_k: Size of the candidate set for re-ranking in contrastive search
    • penalty_alpha: Degeneration penalty; active when larger than 0
    • eos_token_id: End-of-sequence token ID(s)
    • sequential: Switch to sequential topk hidden state computation to reduce memory if True
    • do_sample: Use sampling for generation if True
    • temperature: Temperature for sampling
    • top_p: Cumulative probability for top-p-filtering
    • diversity_penalty: Penalty for reducing similarity across different beam groups
    • constraints: List of constraints to apply during beam search
    • synced_gpus: Whether to continue running the while loop until max_length (needed for ZeRO stage 3)
    "},{"location":"text/bulk/base/#geniusrise_text.base.bulk.TextBulk.load_models","title":"load_models(model_name, tokenizer_name, model_revision=None, tokenizer_revision=None, model_class='AutoModelForCausalLM', tokenizer_class='AutoTokenizer', use_cuda=False, precision='float16', quantization=0, device_map='auto', max_memory={0: '24GB'}, torchscript=False, compile=False, awq_enabled=False, flash_attention=False, better_transformers=False, **model_args)","text":"

    Loads and configures the specified model and tokenizer for text generation. It ensures the models are optimized for inference.

    Parameters:

    Name Type Description Default model_name str

    The name or path of the model to load.

    required tokenizer_name str

    The name or path of the tokenizer to load.

    required model_revision Optional[str]

    The specific model revision to load (e.g., a commit hash).

    None tokenizer_revision Optional[str]

    The specific tokenizer revision to load (e.g., a commit hash).

    None model_class str

    The class of the model to be loaded.

    'AutoModelForCausalLM' tokenizer_class str

    The class of the tokenizer to be loaded.

    'AutoTokenizer' use_cuda bool

    Flag to utilize CUDA for GPU acceleration.

    False precision str

    The desired precision for computations (\"float32\", \"float16\", etc.).

    'float16' quantization int

    The bit level for model quantization (0 for none, 8 for 8-bit quantization).

    0 device_map str | Dict | None

    The specific device(s) to use for model operations.

    'auto' max_memory Dict

    A dictionary defining the maximum memory to allocate for the model.

    {0: '24GB'} torchscript bool

    Flag to enable TorchScript for model optimization.

    False compile bool

    Flag to enable JIT compilation of the model.

    False awq_enabled bool

    Flag to enable AWQ (Adaptive Weight Quantization).

    False flash_attention bool

    Flag to enable Flash Attention optimization for faster processing.

    False better_transformers bool

    Flag to enable Better Transformers optimization for faster processing.

    False **model_args Any

    Additional arguments to pass to the model during its loading.

    {}

    Returns:

    Type Description Tuple[AutoModelForCausalLM, AutoTokenizer]

    Tuple[AutoModelForCausalLM, AutoTokenizer]: The loaded model and tokenizer ready for text generation.

    "},{"location":"text/bulk/base/#geniusrise_text.base.bulk.TextBulk.load_models_llama_cpp","title":"load_models_llama_cpp(model, filename, local_dir=None, n_gpu_layers=0, split_mode=llama_cpp.LLAMA_SPLIT_LAYER, main_gpu=0, tensor_split=None, vocab_only=False, use_mmap=True, use_mlock=False, kv_overrides=None, seed=llama_cpp.LLAMA_DEFAULT_SEED, n_ctx=512, n_batch=512, n_threads=None, n_threads_batch=None, rope_scaling_type=llama_cpp.LLAMA_ROPE_SCALING_UNSPECIFIED, rope_freq_base=0.0, rope_freq_scale=0.0, yarn_ext_factor=-1.0, yarn_attn_factor=1.0, yarn_beta_fast=32.0, yarn_beta_slow=1.0, yarn_orig_ctx=0, mul_mat_q=True, logits_all=False, embedding=False, offload_kqv=True, last_n_tokens_size=64, lora_base=None, lora_scale=1.0, lora_path=None, numa=False, chat_format=None, chat_handler=None, draft_model=None, tokenizer=None, verbose=True, **kwargs)","text":"

    Initializes and loads LLaMA model with llama.cpp backend, along with an optional tokenizer.

    Parameters:

    Name Type Description Default model str

    Huggingface ID to the LLaMA model.

    required filename Optional[str]

    A filename or glob pattern to match the model file in the repo.

    required local_dir Optional[Union[str, os.PathLike[str]]]

    The local directory to save the model to.

    None n_gpu_layers int

    Number of layers to offload to GPU. Default is 0.

    0 split_mode int

    Split mode for distributing model across GPUs.

    llama_cpp.LLAMA_SPLIT_LAYER main_gpu int

    Main GPU index.

    0 tensor_split Optional[List[float]]

    Tensor split configuration.

    None vocab_only bool

    Whether to load vocabulary only.

    False use_mmap bool

    Use memory-mapped files for model loading.

    True use_mlock bool

    Lock model data in RAM.

    False kv_overrides Optional[Dict[str, Union[bool, int, float]]]

    Key-value pairs for model overrides.

    None seed int

    Random seed for initialization.

    llama_cpp.LLAMA_DEFAULT_SEED n_ctx int

    Number of context tokens.

    512 n_batch int

    Batch size for processing prompts.

    512 n_threads Optional[int]

    Number of threads for generation.

    None n_threads_batch Optional[int]

    Number of threads for batch processing.

    None rope_scaling_type Optional[int]

    RoPE scaling type.

    llama_cpp.LLAMA_ROPE_SCALING_UNSPECIFIED rope_freq_base float

    Base frequency for RoPE.

    0.0 rope_freq_scale float

    Frequency scaling for RoPE.

    0.0 yarn_ext_factor float

    YaRN extrapolation mix factor.

    -1.0 yarn_attn_factor float

    YaRN attention factor.

    1.0 yarn_beta_fast float

    YaRN beta fast parameter.

    32.0 yarn_beta_slow float

    YaRN beta slow parameter.

    1.0 yarn_orig_ctx int

    Original context size for YaRN.

    0 mul_mat_q bool

    Whether to multiply matrices for queries.

    True logits_all bool

    Return logits for all tokens.

    False embedding bool

    Enable embedding mode only.

    False offload_kqv bool

    Offload K, Q, V matrices to GPU.

    True last_n_tokens_size int

    Size for the last_n_tokens buffer.

    64 lora_base Optional[str]

    Base model path for LoRA.

    None lora_scale float

    Scale factor for LoRA adjustments.

    1.0 lora_path Optional[str]

    Path to LoRA adjustments.

    None numa Union[bool, int]

    NUMA configuration.

    False chat_format Optional[str]

    Chat format configuration.

    None chat_handler Optional[llama_cpp.LlamaChatCompletionHandler]

    Handler for chat completions.

    None draft_model Optional[llama_cpp.LlamaDraftModel]

    Draft model for speculative decoding.

    None tokenizer Optional[PreTrainedTokenizerBase]

    Custom tokenizer instance.

    None verbose bool

    Enable verbose logging.

    True **kwargs

    Additional keyword arguments.

    {}

    Returns:

    Type Description Tuple[LlamaCPP, Optional[PreTrainedTokenizerBase]]

    Tuple[LlamaCPP, Optional[PreTrainedTokenizerBase]]: The loaded LLaMA model and tokenizer.

    "},{"location":"text/bulk/base/#geniusrise_text.base.bulk.TextBulk.load_models_vllm","title":"load_models_vllm(model, tokenizer, tokenizer_mode='auto', trust_remote_code=True, download_dir=None, load_format='auto', dtype='auto', seed=42, revision=None, tokenizer_revision=None, max_model_len=1024, quantization=None, enforce_eager=False, max_context_len_to_capture=8192, block_size=16, gpu_memory_utilization=0.9, swap_space=4, cache_dtype='auto', sliding_window=None, pipeline_parallel_size=1, tensor_parallel_size=1, worker_use_ray=False, max_parallel_loading_workers=None, disable_custom_all_reduce=False, max_num_batched_tokens=None, max_num_seqs=64, max_paddings=512, device='cuda', max_lora_rank=None, max_loras=None, max_cpu_loras=None, lora_dtype=None, lora_extra_vocab_size=0, placement_group=None, log_stats=False, batched_inference=False)","text":"

    Initializes and loads models using VLLM configurations with specific parameters.

    Parameters:

    Name Type Description Default model str

    Name or path of the Hugging Face model to use.

    required tokenizer str

    Name or path of the Hugging Face tokenizer to use.

    required tokenizer_mode str

    Tokenizer mode. \"auto\" will use the fast tokenizer if available, \"slow\" will always use the slow tokenizer.

    'auto' trust_remote_code bool

    Trust remote code (e.g., from Hugging Face) when downloading the model and tokenizer.

    True download_dir Optional[str]

    Directory to download and load the weights, default to the default cache directory of Hugging Face.

    None load_format str

    The format of the model weights to load. Options include \"auto\", \"pt\", \"safetensors\", \"npcache\", \"dummy\".

    'auto' dtype Union[str, torch.dtype]

    Data type for model weights and activations. Options include \"auto\", torch.float32, torch.float16, etc.

    'auto' seed int

    Random seed for reproducibility.

    42 revision Optional[str]

    The specific model version to use. Can be a branch name, a tag name, or a commit id.

    None code_revision Optional[str]

    The specific revision to use for the model code on Hugging Face Hub.

    required tokenizer_revision Optional[str]

    The specific tokenizer version to use.

    None max_model_len Optional[int]

    Maximum length of a sequence (including prompt and output). If None, will be derived from the model.

    1024 quantization Optional[str]

    Quantization method that was used to quantize the model weights. If None, we assume the model weights are not quantized.

    None enforce_eager bool

    Whether to enforce eager execution. If True, disables CUDA graph and always execute the model in eager mode.

    False max_context_len_to_capture Optional[int]

    Maximum context length covered by CUDA graphs. When larger, falls back to eager mode.

    8192 block_size int

    Size of a cache block in number of tokens.

    16 gpu_memory_utilization float

    Fraction of GPU memory to use for the VLLM execution.

    0.9 swap_space int

    Size of the CPU swap space per GPU (in GiB).

    4 cache_dtype str

    Data type for KV cache storage.

    'auto' sliding_window Optional[int]

    Configuration for sliding window if applicable.

    None pipeline_parallel_size int

    Number of pipeline parallel groups.

    1 tensor_parallel_size int

    Number of tensor parallel groups.

    1 worker_use_ray bool

    Whether to use Ray for model workers. Required if either pipeline_parallel_size or tensor_parallel_size is greater than 1.

    False max_parallel_loading_workers Optional[int]

    Maximum number of workers for loading the model in parallel to avoid RAM OOM.

    None disable_custom_all_reduce bool

    Disable custom all-reduce kernel and fall back to NCCL.

    False max_num_batched_tokens Optional[int]

    Maximum number of tokens to be processed in a single iteration.

    None max_num_seqs int

    Maximum number of sequences to be processed in a single iteration.

    64 max_paddings int

    Maximum number of paddings to be added to a batch.

    512 device str

    Device configuration, typically \"cuda\" or \"cpu\".

    'cuda' max_lora_rank Optional[int]

    Maximum rank for LoRA adjustments.

    None max_loras Optional[int]

    Maximum number of LoRA adjustments.

    None max_cpu_loras Optional[int]

    Maximum number of LoRA adjustments stored on CPU.

    None lora_dtype Optional[torch.dtype]

    Data type for LoRA parameters.

    None lora_extra_vocab_size Optional[int]

    Additional vocabulary size for LoRA.

    0 placement_group Optional[PlacementGroup]

    Ray placement group for distributed execution. Required for distributed execution.

    None log_stats bool

    Whether to log statistics during model operation.

    False

    Returns:

    Name Type Description LLMEngine AsyncLLMEngine | LLM

    An instance of the LLMEngine class initialized with the given configurations.

    "},{"location":"text/bulk/classification/","title":"Classification","text":"

    Bases: TextBulk

    TextClassificationBulk is designed to handle bulk text classification tasks using Hugging Face models efficiently and effectively. It allows for processing large datasets, utilizing state-of-the-art machine learning models to provide accurate classification of text data into predefined labels.

    Parameters:

    Name Type Description Default input BatchInput

    Configuration and data inputs for the batch process.

    required output BatchOutput

    Configurations for output data handling.

    required state State

    State management for the classification task.

    required **kwargs

    Arbitrary keyword arguments for extended configurations.

    {}

    Example CLI Usage:

    genius TextClassificationBulk rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\n--id cardiffnlp/twitter-roberta-base-hate-multiclass-latest-lol \\\nclassify \\\n--args \\\nmodel_name=\"cardiffnlp/twitter-roberta-base-hate-multiclass-latest\" \\\nmodel_class=\"AutoModelForSequenceClassification\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nprecision=\"bfloat16\" \\\nquantization=0 \\\ndevice_map=\"auto\" \\\nmax_memory=None \\\ntorchscript=False\n

    "},{"location":"text/bulk/classification/#geniusrise_text.classification.bulk.TextClassificationBulk.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the TextClassificationBulk class with input, output, and state configurations.

    Parameters:

    Name Type Description Default input BatchInput

    Configuration for the input data.

    required output BatchOutput

    Configuration for the output data.

    required state State

    State management for the classification task.

    required **kwargs

    Additional keyword arguments for extended functionality.

    {}"},{"location":"text/bulk/classification/#geniusrise_text.classification.bulk.TextClassificationBulk.classify","title":"classify(model_name, model_class='AutoModelForSequenceClassification', tokenizer_class='AutoTokenizer', use_cuda=False, precision='float', quantization=0, device_map='auto', max_memory={0: '24GB'}, torchscript=False, compile=False, awq_enabled=False, flash_attention=False, batch_size=32, notification_email=None, **kwargs)","text":"

    Perform bulk classification using the specified model and tokenizer. This method handles the entire classification process including loading the model, processing input data, predicting classifications, and saving the results.

    Parameters:

    Name Type Description Default model_name str

    Name or path of the model.

    required model_class str

    Class name of the model (default \"AutoModelForSequenceClassification\").

    'AutoModelForSequenceClassification' tokenizer_class str

    Class name of the tokenizer (default \"AutoTokenizer\").

    'AutoTokenizer' use_cuda bool

    Whether to use CUDA for model inference (default False).

    False precision str

    Precision for model computation (default \"float\").

    'float' quantization int

    Level of quantization for optimizing model size and speed (default 0).

    0 device_map str | Dict | None

    Specific device to use for computation (default \"auto\").

    'auto' max_memory Dict

    Maximum memory configuration for devices.

    {0: '24GB'} torchscript bool

    Whether to use a TorchScript-optimized version of the pre-trained language model. Defaults to False.

    False compile bool

    Whether to compile the model before fine-tuning. Defaults to True.

    False awq_enabled bool

    Whether to enable AWQ optimization (default False).

    False flash_attention bool

    Whether to use flash attention optimization (default False).

    False batch_size int

    Number of classifications to process simultaneously (default 32).

    32 **kwargs Any

    Arbitrary keyword arguments for model and generation configurations.

    {}"},{"location":"text/bulk/classification/#geniusrise_text.classification.bulk.TextClassificationBulk.load_dataset","title":"load_dataset(dataset_path, max_length=512, **kwargs)","text":"

    Load a classification dataset from a directory.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory.

    required max_length int

    The maximum length for tokenization. Defaults to 512.

    512

    Returns:

    Name Type Description Dataset Optional[Dataset]

    The loaded dataset.

    Raises:

    Type Description Exception

    If there was an error loading the dataset.

    "},{"location":"text/bulk/classification/#geniusrise_text.classification.bulk.TextClassificationBulk.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"text/bulk/classification/#geniusrise_text.classification.bulk.TextClassificationBulk.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\"text\": \"The text content\"}\n

    "},{"location":"text/bulk/classification/#geniusrise_text.classification.bulk.TextClassificationBulk.load_dataset--csv","title":"CSV","text":"

    Should contain 'text' columns.

    text\n\"The text content\"\n

    "},{"location":"text/bulk/classification/#geniusrise_text.classification.bulk.TextClassificationBulk.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'text' columns.

    "},{"location":"text/bulk/classification/#geniusrise_text.classification.bulk.TextClassificationBulk.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'text' keys.

    [{\"text\": \"The text content\"}]\n

    "},{"location":"text/bulk/classification/#geniusrise_text.classification.bulk.TextClassificationBulk.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'text' child elements.

    <record>\n<text>The text content</text>\n</record>\n

    "},{"location":"text/bulk/classification/#geniusrise_text.classification.bulk.TextClassificationBulk.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'text' keys.

    - text: \"The text content\"\n

    "},{"location":"text/bulk/classification/#geniusrise_text.classification.bulk.TextClassificationBulk.load_dataset--tsv","title":"TSV","text":"

    Should contain 'text' columns separated by tabs.

    "},{"location":"text/bulk/classification/#geniusrise_text.classification.bulk.TextClassificationBulk.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'text' columns.

    "},{"location":"text/bulk/classification/#geniusrise_text.classification.bulk.TextClassificationBulk.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'text' columns.

    "},{"location":"text/bulk/classification/#geniusrise_text.classification.bulk.TextClassificationBulk.load_dataset--feather","title":"Feather","text":"

    Should contain 'text' columns.

    "},{"location":"text/bulk/instruction_tuning/","title":"Instruction Tuning","text":"

    Bases: TextBulk

    InstructionBulk is a class designed to perform bulk text generation tasks using Hugging Face's instruction-tuned language models. It is optimized for large-scale text generation, providing an efficient interface to use state-of-the-art machine learning models for generating text based on a set of instructions or prompts.

    Attributes:

    Name Type Description model Any

    The loaded, pre-trained instruction-tuned language model.

    tokenizer Any

    The tokenizer for processing text compatible with the model.

    Methods

    load_dataset(dataset_path: str, max_length: int = 1024, **kwargs) -> Optional[Dataset]: Loads a dataset for text generation tasks from the specified directory.

    perform(model_name: str, **kwargs: Any) -> None: Performs bulk text generation using the specified model and tokenizer.

    Example CLI Usage:

    genius InstructionBulk rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder input/chat \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder output/chat \\\npostgres \\\n--postgres_host 127.0.0.1 \\\n--postgres_port 5432 \\\n--postgres_user postgres \\\n--postgres_password postgres \\\n--postgres_database geniusrise\\\n--postgres_table state \\\n--id mistralai/Mistral-7B-Instruct-v0.1-lol \\\nperform \\\n--args \\\nmodel_name=\"mistralai/Mistral-7B-Instruct-v0.1\" \\\nmodel_class=\"AutoModelForCausalLM\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nprecision=\"bfloat16\" \\\nquantization=0 \\\ndevice_map=\"auto\" \\\nmax_memory=None \\\ntorchscript=False \\\ndecoding_strategy=\"generate\" \\\ngeneration_max_new_tokens=100 \\\ngeneration_do_sample=true\n

    or using VLLM:

    genius InstructionBulk rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder input/chat \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder output/chat \\\nnone \\\n--id mistralai/Mistral-7B-Instruct-v0.1 \\\nperform_vllm \\\n--args \\\nmodel_name=\"mistralai/Mistral-7B-Instruct-v0.1\" \\\nuse_cuda=True \\\nprecision=\"bfloat16\" \\\nquantization=0 \\\ndevice_map=\"auto\" \\\ngeneration_temperature=0.7 \\\ngeneration_top_p=1.0 \\\ngeneration_n=1 \\\ngeneration_max_tokens=50 \\\ngeneration_stream=false \\\ngeneration_presence_penalty=0.0 \\\ngeneration_frequency_penalty=0.0\n

    or using llama.cpp:

    genius InstructionBulk rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder input/chat \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder output/chat \\\nnone \\\n--id mistralai/Mistral-7B-Instruct-v0.1 \\\nperform_llama_cpp \\\n--args \\\nmodel=\"TheBloke/Mistral-7B-Instruct-v0.2-GGUF\" \\\nfilename=\"mistral-7b-instruct-v0.2.Q4_K_M.gguf\" \\\nn_gpu_layers=35  \\\ngeneration_temperature=0.7 \\\ngeneration_top_p=0.95 \\\ngeneration_top_k=40 \\\ngeneration_max_tokens=50 \\\ngeneration_repeat_penalty=0.1\n

    "},{"location":"text/bulk/instruction_tuning/#geniusrise_text.instruction.bulk.InstructionBulk.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the InstructionBulk class with input, output, and state configurations for bulk text generation.

    Parameters:

    Name Type Description Default input BatchInput

    Configuration for input data handling.

    required output BatchOutput

    Configuration for output data handling.

    required state State

    State management for the text generation task.

    required **kwargs

    Additional keyword arguments for extended functionalities.

    {}"},{"location":"text/bulk/instruction_tuning/#geniusrise_text.instruction.bulk.InstructionBulk.load_dataset","title":"load_dataset(dataset_path, max_length=1024, **kwargs)","text":"

    Loads a dataset from the specified path. This method supports various data formats including JSON, CSV, Parquet, and others. It's designed to facilitate the bulk processing of text data for generation tasks.

    Parameters:

    Name Type Description Default dataset_path str

    Path to the directory containing the dataset files.

    required max_length int

    Maximum token length for text processing (default is 1024).

    1024 **kwargs

    Additional keyword arguments for dataset loading.

    {}

    Returns:

    Type Description Optional[Dataset]

    Optional[Dataset]: A Dataset object if loading is successful; otherwise, None.

    Raises:

    Type Description Exception

    If an error occurs during dataset loading.

    "},{"location":"text/bulk/instruction_tuning/#geniusrise_text.instruction.bulk.InstructionBulk.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"text/bulk/instruction_tuning/#geniusrise_text.instruction.bulk.InstructionBulk.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\"instruction\": \"The instruction\"}\n

    "},{"location":"text/bulk/instruction_tuning/#geniusrise_text.instruction.bulk.InstructionBulk.load_dataset--csv","title":"CSV","text":"

    Should contain 'instruction' columns.

    instruction\n\"The instruction\"\n

    "},{"location":"text/bulk/instruction_tuning/#geniusrise_text.instruction.bulk.InstructionBulk.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'instruction' columns.

    "},{"location":"text/bulk/instruction_tuning/#geniusrise_text.instruction.bulk.InstructionBulk.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'instruction' keys.

    [{\"instruction\": \"The instruction\"}]\n

    "},{"location":"text/bulk/instruction_tuning/#geniusrise_text.instruction.bulk.InstructionBulk.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'instruction' child elements.

    <record>\n<instruction>The instruction</instruction>\n</record>\n

    "},{"location":"text/bulk/instruction_tuning/#geniusrise_text.instruction.bulk.InstructionBulk.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'instruction' keys.

    - instruction: \"The instruction\"\n

    "},{"location":"text/bulk/instruction_tuning/#geniusrise_text.instruction.bulk.InstructionBulk.load_dataset--tsv","title":"TSV","text":"

    Should contain 'instruction' columns separated by tabs.

    "},{"location":"text/bulk/instruction_tuning/#geniusrise_text.instruction.bulk.InstructionBulk.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'instruction' columns.

    "},{"location":"text/bulk/instruction_tuning/#geniusrise_text.instruction.bulk.InstructionBulk.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'instruction' columns.

    "},{"location":"text/bulk/instruction_tuning/#geniusrise_text.instruction.bulk.InstructionBulk.load_dataset--feather","title":"Feather","text":"

    Should contain 'instruction' columns.

    "},{"location":"text/bulk/instruction_tuning/#geniusrise_text.instruction.bulk.InstructionBulk.perform","title":"perform(model_name, model_class='AutoModelForCausalLM', tokenizer_class='AutoTokenizer', use_cuda=False, precision='float16', quantization=0, device_map='auto', max_memory={0: '24GB'}, torchscript=False, compile=False, awq_enabled=False, flash_attention=False, decoding_strategy='generate', notification_email=None, **kwargs)","text":"

    Performs text generation in bulk using a specified instruction-tuned model. This method handles the entire process, including model loading, prompt processing, text generation, and saving the results.

    Parameters:

    Name Type Description Default model_name str

    The name or path of the instruction-tuned model.

    required model_class str

    The class of the language model. Defaults to \"AutoModelForCausalLM\".

    'AutoModelForCausalLM' tokenizer_class str

    The class of the tokenizer. Defaults to \"AutoTokenizer\".

    'AutoTokenizer' use_cuda bool

    Whether to use CUDA for model inference. Defaults to False.

    False precision str

    Precision for model computation. Defaults to \"float16\".

    'float16' quantization int

    Level of quantization for optimizing model size and speed. Defaults to 0.

    0 device_map str | Dict | None

    Specific device to use for computation. Defaults to \"auto\".

    'auto' max_memory Dict

    Maximum memory configuration for devices. Defaults to {0: \"24GB\"}.

    {0: '24GB'} torchscript bool

    Whether to use a TorchScript-optimized version of the pre-trained language model. Defaults to False.

    False compile bool

    Whether to compile the model before fine-tuning. Defaults to True.

    False awq_enabled bool

    Whether to enable AWQ optimization. Defaults to False.

    False flash_attention bool

    Whether to use flash attention optimization. Defaults to False.

    False decoding_strategy str

    Strategy for decoding the completion. Defaults to \"generate\".

    'generate' **kwargs Any

    Configuration and additional arguments for text generation such as model class, tokenizer class, precision, device map, and other generation-related parameters.

    {} Note

    Additional arguments are passed directly to the model and tokenizer initialization and the generation method.

    "},{"location":"text/bulk/instruction_tuning/#geniusrise_text.instruction.bulk.InstructionBulk.perform_llama_cpp","title":"perform_llama_cpp(model, filename=None, local_dir=None, n_gpu_layers=0, split_mode=llama_cpp.LLAMA_SPLIT_LAYER, main_gpu=0, tensor_split=None, vocab_only=False, use_mmap=True, use_mlock=False, kv_overrides=None, seed=llama_cpp.LLAMA_DEFAULT_SEED, n_ctx=512, n_batch=512, n_threads=None, n_threads_batch=None, rope_scaling_type=llama_cpp.LLAMA_ROPE_SCALING_UNSPECIFIED, rope_freq_base=0.0, rope_freq_scale=0.0, yarn_ext_factor=-1.0, yarn_attn_factor=1.0, yarn_beta_fast=32.0, yarn_beta_slow=1.0, yarn_orig_ctx=0, mul_mat_q=True, logits_all=False, embedding=False, offload_kqv=True, last_n_tokens_size=64, lora_base=None, lora_scale=1.0, lora_path=None, numa=False, chat_format=None, chat_handler=None, draft_model=None, tokenizer=None, verbose=True, notification_email=None, **kwargs)","text":"

    Performs bulk text generation using the LLaMA model with llama.cpp backend. This method handles the entire process, including model loading, prompt processing, text generation, and saving the results.

    Parameters:

    Name Type Description Default model str

    Path or identifier for the LLaMA model.

    required filename Optional[str]

    Optional filename or glob pattern to match the model file.

    None local_dir Optional[Union[str, os.PathLike[str]]]

    Local directory to save the model files.

    None n_gpu_layers int

    Number of layers to offload to GPU.

    0 split_mode int

    Split mode for distributing model across GPUs.

    llama_cpp.LLAMA_SPLIT_LAYER main_gpu int

    Main GPU index.

    0 tensor_split Optional[List[float]]

    Configuration for tensor splitting across GPUs.

    None vocab_only bool

    Whether to load only the vocabulary.

    False use_mmap bool

    Use memory-mapped files for model loading.

    True use_mlock bool

    Lock model data in RAM to prevent swapping.

    False kv_overrides Optional[Dict[str, Union[bool, int, float]]]

    Key-value pairs for overriding model config.

    None seed int

    Seed for random number generation.

    llama_cpp.LLAMA_DEFAULT_SEED n_ctx int

    Number of context tokens for generation.

    512 n_batch int

    Batch size for processing.

    512 n_threads Optional[int]

    Number of threads for generation.

    None n_threads_batch Optional[int]

    Number of threads for batch processing.

    None rope_scaling_type Optional[int]

    Scaling type for RoPE.

    llama_cpp.LLAMA_ROPE_SCALING_UNSPECIFIED rope_freq_base float

    Base frequency for RoPE.

    0.0 rope_freq_scale float

    Frequency scaling for RoPE.

    0.0 yarn_ext_factor float

    YaRN extrapolation factor.

    -1.0 yarn_attn_factor float

    YaRN attention factor.

    1.0 yarn_beta_fast float

    YaRN beta fast parameter.

    32.0 yarn_beta_slow float

    YaRN beta slow parameter.

    1.0 yarn_orig_ctx int

    Original context size for YaRN.

    0 mul_mat_q bool

    Multiply matrices for queries.

    True logits_all bool

    Return logits for all tokens.

    False embedding bool

    Enable embedding mode.

    False offload_kqv bool

    Offload K, Q, V matrices to GPU.

    True last_n_tokens_size int

    Size for the last_n_tokens buffer.

    64 lora_base Optional[str]

    Base model path for LoRA.

    None lora_scale float

    Scale factor for LoRA adjustments.

    1.0 lora_path Optional[str]

    Path for LoRA adjustments.

    None numa Union[bool, int]

    NUMA configuration.

    False chat_format Optional[str]

    Chat format configuration.

    None chat_handler Optional[llama_cpp.llama_chat_format.LlamaChatCompletionHandler]

    Handler for chat completions.

    None draft_model Optional[llama_cpp.LlamaDraftModel]

    Draft model for speculative decoding.

    None tokenizer Optional[PreTrainedTokenizerBase]

    Custom tokenizer instance.

    None verbose bool

    Enable verbose logging.

    True notification_email Optional[str]

    Email to send notifications upon completion.

    None **kwargs

    Additional arguments for model loading and text generation.

    {}"},{"location":"text/bulk/instruction_tuning/#geniusrise_text.instruction.bulk.InstructionBulk.perform_vllm","title":"perform_vllm(model_name, use_cuda=False, precision='float16', quantization=0, device_map='auto', vllm_tokenizer_mode='auto', vllm_download_dir=None, vllm_load_format='auto', vllm_seed=42, vllm_max_model_len=1024, vllm_enforce_eager=False, vllm_max_context_len_to_capture=8192, vllm_block_size=16, vllm_gpu_memory_utilization=0.9, vllm_swap_space=4, vllm_sliding_window=None, vllm_pipeline_parallel_size=1, vllm_tensor_parallel_size=1, vllm_worker_use_ray=False, vllm_max_parallel_loading_workers=None, vllm_disable_custom_all_reduce=False, vllm_max_num_batched_tokens=None, vllm_max_num_seqs=64, vllm_max_paddings=512, vllm_max_lora_rank=None, vllm_max_loras=None, vllm_max_cpu_loras=None, vllm_lora_extra_vocab_size=0, vllm_placement_group=None, vllm_log_stats=False, notification_email=None, batch_size=32, **kwargs)","text":"

    Performs bulk text generation using the Versatile Language Learning Model (VLLM) with specified parameters for fine-tuning model behavior, including quantization and parallel processing settings. This method is designed to process large datasets efficiently by leveraging VLLM capabilities for generating high-quality text completions based on provided prompts.

    Parameters:

    Name Type Description Default model_name str

    The name or path of the VLLM model to use for text generation.

    required use_cuda bool

    Flag indicating whether to use CUDA for GPU acceleration.

    False precision str

    Precision of computations, can be \"float16\", \"bfloat16\", etc.

    'float16' quantization int

    Level of quantization for model weights, 0 for none.

    0 device_map str | Dict | None

    Specific device(s) to use for model inference.

    'auto' vllm_tokenizer_mode str

    Mode of the tokenizer (\"auto\", \"fast\", or \"slow\").

    'auto' vllm_download_dir Optional[str]

    Directory to download and load the model and tokenizer.

    None vllm_load_format str

    Format to load the model, e.g., \"auto\", \"pt\".

    'auto' vllm_seed int

    Seed for random number generation.

    42 vllm_max_model_len int

    Maximum sequence length the model can handle.

    1024 vllm_enforce_eager bool

    Enforce eager execution instead of using optimization techniques.

    False vllm_max_context_len_to_capture int

    Maximum context length for CUDA graph capture.

    8192 vllm_block_size int

    Block size for caching mechanism.

    16 vllm_gpu_memory_utilization float

    Fraction of GPU memory to use.

    0.9 vllm_swap_space int

    Amount of swap space to use in GiB.

    4 vllm_sliding_window Optional[int]

    Size of the sliding window for processing.

    None vllm_pipeline_parallel_size int

    Number of pipeline parallel groups.

    1 vllm_tensor_parallel_size int

    Number of tensor parallel groups.

    1 vllm_worker_use_ray bool

    Whether to use Ray for model workers.

    False vllm_max_parallel_loading_workers Optional[int]

    Maximum number of workers for parallel loading.

    None vllm_disable_custom_all_reduce bool

    Disable custom all-reduce kernel and fall back to NCCL.

    False vllm_max_num_batched_tokens Optional[int]

    Maximum number of tokens to be processed in a single iteration.

    None vllm_max_num_seqs int

    Maximum number of sequences to be processed in a single iteration.

    64 vllm_max_paddings int

    Maximum number of paddings to be added to a batch.

    512 vllm_max_lora_rank Optional[int]

    Maximum rank for LoRA adjustments.

    None vllm_max_loras Optional[int]

    Maximum number of LoRA adjustments.

    None vllm_max_cpu_loras Optional[int]

    Maximum number of LoRA adjustments stored on CPU.

    None vllm_lora_extra_vocab_size int

    Additional vocabulary size for LoRA.

    0 vllm_placement_group Optional[dict]

    Ray placement group for distributed execution.

    None vllm_log_stats bool

    Whether to log statistics during model operation.

    False notification_email Optional[str]

    Email to send notifications upon completion.

    None batch_size int

    Number of prompts to process in each batch for efficient memory usage.

    32 **kwargs Any

    Additional keyword arguments for generation settings like temperature, top_p, etc.

    {}

    This method automates the loading of large datasets, generation of text completions, and saving results, facilitating efficient and scalable text generation tasks.

    "},{"location":"text/bulk/language_model/","title":"Language Model","text":"

    Bases: TextBulk

    LanguageModelBulk is designed for large-scale text generation using Hugging Face language models in a bulk processing manner. It's particularly useful for tasks such as bulk content creation, summarization, or any other scenario where large datasets need to be processed with a language model.

    Attributes:

    Name Type Description model Any

    The loaded language model used for text generation.

    tokenizer Any

    The tokenizer corresponding to the language model, used for processing input text.

    Parameters:

    Name Type Description Default input BatchInput

    Configuration for the input data.

    required output BatchOutput

    Configuration for the output data.

    required state State

    State management for the API.

    required **kwargs Any

    Arbitrary keyword arguments for extended functionality.

    {}

    CLI Usage Example:

    genius LanguageModelBulk rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder input/lm \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder output/lm \\\npostgres \\\n--postgres_host 127.0.0.1 \\\n--postgres_port 5432 \\\n--postgres_user postgres \\\n--postgres_password postgres \\\n--postgres_database geniusrise\\\n--postgres_table state \\\n--id mistralai/Mistral-7B-Instruct-v0.1-lol \\\ncomplete \\\n--args \\\nmodel_name=\"mistralai/Mistral-7B-Instruct-v0.1\" \\\nmodel_class=\"AutoModelForCausalLM\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nprecision=\"bfloat16\" \\\nquantization=0 \\\ndevice_map=\"auto\" \\\nmax_memory=None \\\ntorchscript=False \\\ndecoding_strategy=\"generate\" \\\ngeneration_max_new_tokens=100 \\\ngeneration_do_sample=true\n

    or using VLLM:

    genius LanguageModelBulk rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder input/lm \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder output/lm \\\nnone \\\n--id mistralai/Mistral-7B-v0.1 \\\ncomplete_vllm \\\n--args \\\nmodel_name=\"mistralai/Mistral-7B-v0.1\" \\\nuse_cuda=True \\\nprecision=\"bfloat16\" \\\nquantization=0 \\\ndevice_map=\"auto\" \\\nvllm_enforce_eager=True \\\ngeneration_temperature=0.7 \\\ngeneration_top_p=1.0 \\\ngeneration_n=1 \\\ngeneration_max_tokens=50 \\\ngeneration_stream=false \\\ngeneration_presence_penalty=0.0 \\\ngeneration_frequency_penalty=0.0\n

    or using llama.cpp:

    genius LanguageModelBulk rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder input/chat \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder output/chat \\\nnone \\\ncomplete_llama_cpp \\\n--args \\\nmodel=\"TheBloke/Mistral-7B-v0.1-GGUF\" \\\nfilename=\"mistral-7b-v0.1.Q4_K_M.gguf\" \\\nn_gpu_layers=35  \\\nn_ctx=32768 \\\ngeneration_temperature=0.7 \\\ngeneration_top_p=0.95 \\\ngeneration_top_k=40 \\\ngeneration_max_tokens=50 \\\ngeneration_repeat_penalty=0.1\n

    "},{"location":"text/bulk/language_model/#geniusrise_text.language_model.bulk.LanguageModelBulk.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the LanguageModelBulk object with the specified configurations for input, output, and state.

    Parameters:

    Name Type Description Default input BatchInput

    Configuration and data inputs for the bulk process.

    required output BatchOutput

    Configurations for output data handling.

    required state State

    State management for the bulk process.

    required **kwargs Any

    Additional keyword arguments for extended configurations.

    {}"},{"location":"text/bulk/language_model/#geniusrise_text.language_model.bulk.LanguageModelBulk.complete","title":"complete(model_name, model_class='AutoModelForCausalLM', tokenizer_class='AutoTokenizer', use_cuda=False, precision='float16', quantization=0, device_map='auto', max_memory={0: '24GB'}, torchscript=False, compile=False, awq_enabled=False, flash_attention=False, decoding_strategy='generate', notification_email=None, **kwargs)","text":"

    Performs text completion on the loaded dataset using the specified model and tokenizer. The method handles the entire process, including model loading, text generation, and saving the results.

    Parameters:

    Name Type Description Default model_name str

    The name of the language model to use for text completion.

    required model_class str

    The class of the language model. Defaults to \"AutoModelForCausalLM\".

    'AutoModelForCausalLM' tokenizer_class str

    The class of the tokenizer. Defaults to \"AutoTokenizer\".

    'AutoTokenizer' use_cuda bool

    Whether to use CUDA for model inference. Defaults to False.

    False precision str

    Precision for model computation. Defaults to \"float16\".

    'float16' quantization int

    Level of quantization for optimizing model size and speed. Defaults to 0.

    0 device_map str | Dict | None

    Specific device to use for computation. Defaults to \"auto\".

    'auto' max_memory Dict

    Maximum memory configuration for devices. Defaults to {0: \"24GB\"}.

    {0: '24GB'} torchscript bool

    Whether to use a TorchScript-optimized version of the pre-trained language model. Defaults to False.

    False compile bool

    Whether to compile the model before fine-tuning. Defaults to True.

    False awq_enabled bool

    Whether to enable AWQ optimization. Defaults to False.

    False flash_attention bool

    Whether to use flash attention optimization. Defaults to False.

    False decoding_strategy str

    Strategy for decoding the completion. Defaults to \"generate\".

    'generate' **kwargs Any

    Additional keyword arguments for text generation.

    {}"},{"location":"text/bulk/language_model/#geniusrise_text.language_model.bulk.LanguageModelBulk.complete_llama_cpp","title":"complete_llama_cpp(model, filename=None, local_dir=None, n_gpu_layers=0, split_mode=llama_cpp.LLAMA_SPLIT_LAYER, main_gpu=0, tensor_split=None, vocab_only=False, use_mmap=True, use_mlock=False, kv_overrides=None, seed=llama_cpp.LLAMA_DEFAULT_SEED, n_ctx=512, n_batch=512, n_threads=None, n_threads_batch=None, rope_scaling_type=llama_cpp.LLAMA_ROPE_SCALING_UNSPECIFIED, rope_freq_base=0.0, rope_freq_scale=0.0, yarn_ext_factor=-1.0, yarn_attn_factor=1.0, yarn_beta_fast=32.0, yarn_beta_slow=1.0, yarn_orig_ctx=0, mul_mat_q=True, logits_all=False, embedding=False, offload_kqv=True, last_n_tokens_size=64, lora_base=None, lora_scale=1.0, lora_path=None, numa=False, chat_format=None, chat_handler=None, draft_model=None, tokenizer=None, verbose=True, notification_email=None, **kwargs)","text":"

    Performs bulk text generation using the LLaMA model with llama.cpp backend. This method handles the entire process, including model loading, prompt processing, text generation, and saving the results.

    Parameters:

    Name Type Description Default model str

    Path or identifier for the LLaMA model.

    required filename Optional[str]

    Optional filename or glob pattern to match the model file.

    None local_dir Optional[Union[str, os.PathLike[str]]]

    Local directory to save the model files.

    None n_gpu_layers int

    Number of layers to offload to GPU.

    0 split_mode int

    Split mode for distributing model across GPUs.

    llama_cpp.LLAMA_SPLIT_LAYER main_gpu int

    Main GPU index.

    0 tensor_split Optional[List[float]]

    Configuration for tensor splitting across GPUs.

    None vocab_only bool

    Whether to load only the vocabulary.

    False use_mmap bool

    Use memory-mapped files for model loading.

    True use_mlock bool

    Lock model data in RAM to prevent swapping.

    False kv_overrides Optional[Dict[str, Union[bool, int, float]]]

    Key-value pairs for overriding model config.

    None seed int

    Seed for random number generation.

    llama_cpp.LLAMA_DEFAULT_SEED n_ctx int

    Number of context tokens for generation.

    512 n_batch int

    Batch size for processing.

    512 n_threads Optional[int]

    Number of threads for generation.

    None n_threads_batch Optional[int]

    Number of threads for batch processing.

    None rope_scaling_type Optional[int]

    Scaling type for RoPE.

    llama_cpp.LLAMA_ROPE_SCALING_UNSPECIFIED rope_freq_base float

    Base frequency for RoPE.

    0.0 rope_freq_scale float

    Frequency scaling for RoPE.

    0.0 yarn_ext_factor float

    YaRN extrapolation factor.

    -1.0 yarn_attn_factor float

    YaRN attention factor.

    1.0 yarn_beta_fast float

    YaRN beta fast parameter.

    32.0 yarn_beta_slow float

    YaRN beta slow parameter.

    1.0 yarn_orig_ctx int

    Original context size for YaRN.

    0 mul_mat_q bool

    Multiply matrices for queries.

    True logits_all bool

    Return logits for all tokens.

    False embedding bool

    Enable embedding mode.

    False offload_kqv bool

    Offload K, Q, V matrices to GPU.

    True last_n_tokens_size int

    Size for the last_n_tokens buffer.

    64 lora_base Optional[str]

    Base model path for LoRA.

    None lora_scale float

    Scale factor for LoRA adjustments.

    1.0 lora_path Optional[str]

    Path for LoRA adjustments.

    None numa Union[bool, int]

    NUMA configuration.

    False chat_format Optional[str]

    Chat format configuration.

    None chat_handler Optional[llama_cpp.llama_chat_format.LlamaChatCompletionHandler]

    Handler for chat completions.

    None draft_model Optional[llama_cpp.LlamaDraftModel]

    Draft model for speculative decoding.

    None tokenizer Optional[PreTrainedTokenizerBase]

    Custom tokenizer instance.

    None verbose bool

    Enable verbose logging.

    True notification_email Optional[str]

    Email to send notifications upon completion.

    None **kwargs

    Additional arguments for model loading and text generation.

    {}"},{"location":"text/bulk/language_model/#geniusrise_text.language_model.bulk.LanguageModelBulk.complete_vllm","title":"complete_vllm(model_name, use_cuda=False, precision='float16', quantization=0, device_map='auto', vllm_tokenizer_mode='auto', vllm_download_dir=None, vllm_load_format='auto', vllm_seed=42, vllm_max_model_len=1024, vllm_enforce_eager=False, vllm_max_context_len_to_capture=8192, vllm_block_size=16, vllm_gpu_memory_utilization=0.9, vllm_swap_space=4, vllm_sliding_window=None, vllm_pipeline_parallel_size=1, vllm_tensor_parallel_size=1, vllm_worker_use_ray=False, vllm_max_parallel_loading_workers=None, vllm_disable_custom_all_reduce=False, vllm_max_num_batched_tokens=None, vllm_max_num_seqs=64, vllm_max_paddings=512, vllm_max_lora_rank=None, vllm_max_loras=None, vllm_max_cpu_loras=None, vllm_lora_extra_vocab_size=0, vllm_placement_group=None, vllm_log_stats=False, notification_email=None, batch_size=32, **kwargs)","text":"

    Performs bulk text generation using the Versatile Language Learning Model (VLLM) with specified parameters for fine-tuning model behavior, including quantization and parallel processing settings. This method is designed to process large datasets efficiently by leveraging VLLM capabilities for generating high-quality text completions based on provided prompts.

    Parameters:

    Name Type Description Default model_name str

    The name or path of the VLLM model to use for text generation.

    required use_cuda bool

    Flag indicating whether to use CUDA for GPU acceleration.

    False precision str

    Precision of computations, can be \"float16\", \"bfloat16\", etc.

    'float16' quantization int

    Level of quantization for model weights, 0 for none.

    0 device_map str | Dict | None

    Specific device(s) to use for model inference.

    'auto' vllm_tokenizer_mode str

    Mode of the tokenizer (\"auto\", \"fast\", or \"slow\").

    'auto' vllm_download_dir Optional[str]

    Directory to download and load the model and tokenizer.

    None vllm_load_format str

    Format to load the model, e.g., \"auto\", \"pt\".

    'auto' vllm_seed int

    Seed for random number generation.

    42 vllm_max_model_len int

    Maximum sequence length the model can handle.

    1024 vllm_enforce_eager bool

    Enforce eager execution instead of using optimization techniques.

    False vllm_max_context_len_to_capture int

    Maximum context length for CUDA graph capture.

    8192 vllm_block_size int

    Block size for caching mechanism.

    16 vllm_gpu_memory_utilization float

    Fraction of GPU memory to use.

    0.9 vllm_swap_space int

    Amount of swap space to use in GiB.

    4 vllm_sliding_window Optional[int]

    Size of the sliding window for processing.

    None vllm_pipeline_parallel_size int

    Number of pipeline parallel groups.

    1 vllm_tensor_parallel_size int

    Number of tensor parallel groups.

    1 vllm_worker_use_ray bool

    Whether to use Ray for model workers.

    False vllm_max_parallel_loading_workers Optional[int]

    Maximum number of workers for parallel loading.

    None vllm_disable_custom_all_reduce bool

    Disable custom all-reduce kernel and fall back to NCCL.

    False vllm_max_num_batched_tokens Optional[int]

    Maximum number of tokens to be processed in a single iteration.

    None vllm_max_num_seqs int

    Maximum number of sequences to be processed in a single iteration.

    64 vllm_max_paddings int

    Maximum number of paddings to be added to a batch.

    512 vllm_max_lora_rank Optional[int]

    Maximum rank for LoRA adjustments.

    None vllm_max_loras Optional[int]

    Maximum number of LoRA adjustments.

    None vllm_max_cpu_loras Optional[int]

    Maximum number of LoRA adjustments stored on CPU.

    None vllm_lora_extra_vocab_size int

    Additional vocabulary size for LoRA.

    0 vllm_placement_group Optional[dict]

    Ray placement group for distributed execution.

    None vllm_log_stats bool

    Whether to log statistics during model operation.

    False notification_email Optional[str]

    Email to send notifications upon completion.

    None batch_size int

    Number of prompts to process in each batch for efficient memory usage.

    32 **kwargs Any

    Additional keyword arguments for generation settings like temperature, top_p, etc.

    {}

    This method automates the loading of large datasets, generation of text completions, and saving results, facilitating efficient and scalable text generation tasks.

    "},{"location":"text/bulk/language_model/#geniusrise_text.language_model.bulk.LanguageModelBulk.load_dataset","title":"load_dataset(dataset_path, max_length=512, **kwargs)","text":"

    Load a completion dataset from a directory.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory.

    required max_length int

    The maximum length for tokenization. Defaults to 512.

    512 **kwargs

    Additional keyword arguments to pass to the underlying dataset loading functions.

    {}

    Returns:

    Name Type Description Dataset Optional[Dataset]

    The loaded dataset.

    Raises:

    Type Description Exception

    If there was an error loading the dataset.

    "},{"location":"text/bulk/language_model/#geniusrise_text.language_model.bulk.LanguageModelBulk.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"text/bulk/language_model/#geniusrise_text.language_model.bulk.LanguageModelBulk.load_dataset--dataset-files-saved-by-hugging-face-datasets-library","title":"Dataset files saved by Hugging Face datasets library","text":"

    The directory should contain 'dataset_info.json' and other related files.

    "},{"location":"text/bulk/language_model/#geniusrise_text.language_model.bulk.LanguageModelBulk.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\"text\": \"The text content\"}\n

    "},{"location":"text/bulk/language_model/#geniusrise_text.language_model.bulk.LanguageModelBulk.load_dataset--csv","title":"CSV","text":"

    Should contain 'text' column.

    text\n\"The text content\"\n

    "},{"location":"text/bulk/language_model/#geniusrise_text.language_model.bulk.LanguageModelBulk.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'text' column.

    "},{"location":"text/bulk/language_model/#geniusrise_text.language_model.bulk.LanguageModelBulk.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'text' key.

    [{\"text\": \"The text content\"}]\n

    "},{"location":"text/bulk/language_model/#geniusrise_text.language_model.bulk.LanguageModelBulk.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'text' child element.

    <record>\n<text>The text content</text>\n</record>\n

    "},{"location":"text/bulk/language_model/#geniusrise_text.language_model.bulk.LanguageModelBulk.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'text' key.

    - text: \"The text content\"\n

    "},{"location":"text/bulk/language_model/#geniusrise_text.language_model.bulk.LanguageModelBulk.load_dataset--tsv","title":"TSV","text":"

    Should contain 'text' column separated by tabs.

    "},{"location":"text/bulk/language_model/#geniusrise_text.language_model.bulk.LanguageModelBulk.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'text' column.

    "},{"location":"text/bulk/language_model/#geniusrise_text.language_model.bulk.LanguageModelBulk.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'text' column.

    "},{"location":"text/bulk/language_model/#geniusrise_text.language_model.bulk.LanguageModelBulk.load_dataset--feather","title":"Feather","text":"

    Should contain 'text' column.

    "},{"location":"text/bulk/ner/","title":"Named Entity Recognition","text":"

    Bases: TextBulk

    NamedEntityRecognitionBulk is a class designed for bulk processing of Named Entity Recognition (NER) tasks. It leverages state-of-the-art NER models from Hugging Face's transformers library to identify and classify entities such as person names, locations, organizations, and other types of entities from a large corpus of text.

    This class provides functionalities to load large datasets, configure NER models, and perform entity recognition in bulk, making it suitable for processing large volumes of text data efficiently.

    Attributes:

    Name Type Description model Any

    The NER model loaded for entity recognition tasks.

    tokenizer Any

    The tokenizer used for text pre-processing in alignment with the model.

    Example CLI Usage:

    genius NamedEntityRecognitionBulk rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\n--id dslim/bert-large-NER-lol \\\nrecognize_entities \\\n--args \\\nmodel_name=\"dslim/bert-large-NER\" \\\nmodel_class=\"AutoModelForTokenClassification\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nprecision=\"float\" \\\nquantization=0 \\\ndevice_map=\"cuda:0\" \\\nmax_memory=None \\\ntorchscript=False\n

    "},{"location":"text/bulk/ner/#geniusrise_text.ner.bulk.NamedEntityRecognitionBulk.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the NamedEntityRecognitionBulk class with specified input, output, and state configurations. Sets up the NER model and tokenizer for bulk entity recognition tasks.

    Parameters:

    Name Type Description Default input BatchInput

    The input data configuration.

    required output BatchOutput

    The output data configuration.

    required state State

    The state management for the API.

    required **kwargs Any

    Additional keyword arguments for extended functionality.

    {}"},{"location":"text/bulk/ner/#geniusrise_text.ner.bulk.NamedEntityRecognitionBulk.load_dataset","title":"load_dataset(dataset_path, **kwargs)","text":"

    Loads a dataset from the specified directory path. The method supports various data formats and structures, ensuring that the dataset is properly formatted for NER tasks.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory.

    required **kwargs Any

    Additional keyword arguments to handle specific dataset loading scenarios.

    {}

    Returns:

    Type Description Optional[Dataset]

    Optional[Dataset]: The loaded dataset or None if an error occurs during loading.

    "},{"location":"text/bulk/ner/#geniusrise_text.ner.bulk.NamedEntityRecognitionBulk.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"text/bulk/ner/#geniusrise_text.ner.bulk.NamedEntityRecognitionBulk.load_dataset--hugging-face-dataset","title":"Hugging Face Dataset","text":"

    Dataset files saved by the Hugging Face datasets library.

    "},{"location":"text/bulk/ner/#geniusrise_text.ner.bulk.NamedEntityRecognitionBulk.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\"tokens\": [\"token1\", \"token2\", ...]}\n

    "},{"location":"text/bulk/ner/#geniusrise_text.ner.bulk.NamedEntityRecognitionBulk.load_dataset--csv","title":"CSV","text":"

    Should contain 'tokens' columns.

    tokens\n\"['token1', 'token2', ...]\"\n

    "},{"location":"text/bulk/ner/#geniusrise_text.ner.bulk.NamedEntityRecognitionBulk.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'tokens' columns.

    "},{"location":"text/bulk/ner/#geniusrise_text.ner.bulk.NamedEntityRecognitionBulk.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'tokens' keys.

    [{\"tokens\": [\"token1\", \"token2\", ...]}]\n

    "},{"location":"text/bulk/ner/#geniusrise_text.ner.bulk.NamedEntityRecognitionBulk.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'tokens' child elements.

    <record>\n<tokens>token1 token2 ...</tokens>\n</record>\n

    "},{"location":"text/bulk/ner/#geniusrise_text.ner.bulk.NamedEntityRecognitionBulk.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'tokens' keys.

    - tokens: [\"token1\", \"token2\", ...]\n

    "},{"location":"text/bulk/ner/#geniusrise_text.ner.bulk.NamedEntityRecognitionBulk.load_dataset--tsv","title":"TSV","text":"

    Should contain 'tokens' columns separated by tabs.

    "},{"location":"text/bulk/ner/#geniusrise_text.ner.bulk.NamedEntityRecognitionBulk.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'tokens' columns.

    "},{"location":"text/bulk/ner/#geniusrise_text.ner.bulk.NamedEntityRecognitionBulk.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'tokens' columns.

    "},{"location":"text/bulk/ner/#geniusrise_text.ner.bulk.NamedEntityRecognitionBulk.load_dataset--feather","title":"Feather","text":"

    Should contain 'tokens' columns.

    "},{"location":"text/bulk/ner/#geniusrise_text.ner.bulk.NamedEntityRecognitionBulk.recognize_entities","title":"recognize_entities(model_name, max_length=512, model_class='AutoModelForSeq2SeqLM', tokenizer_class='AutoTokenizer', use_cuda=False, precision='float16', quantization=0, device_map='auto', max_memory={0: '24GB'}, torchscript=False, compile=False, awq_enabled=False, flash_attention=False, batch_size=32, notification_email=None, **kwargs)","text":"

    Performs bulk named entity recognition on the loaded dataset. The method processes the text in batches, applying the NER model to recognize entities.

    Parameters:

    Name Type Description Default model_name str

    The name or path of the NER model.

    required max_length int

    The maximum sequence length for the tokenizer.

    512 model_class str

    The class of the model, defaults to \"AutoModelForTokenClassification\".

    'AutoModelForSeq2SeqLM' tokenizer_class str

    The class of the tokenizer, defaults to \"AutoTokenizer\".

    'AutoTokenizer' use_cuda bool

    Whether to use CUDA for model inference, defaults to False.

    False precision str

    Model computation precision, defaults to \"float16\".

    'float16' quantization int

    Level of quantization for model size and speed optimization, defaults to 0.

    0 device_map str | Dict | None

    Specific device configuration for computation, defaults to \"auto\".

    'auto' max_memory Dict

    Maximum memory configuration for the devices.

    {0: '24GB'} torchscript bool

    Whether to use a TorchScript-optimized version of the pre-trained language model. Defaults to False.

    False compile bool

    Whether to compile the model before fine-tuning. Defaults to True.

    False awq_enabled bool

    Whether to enable AWQ optimization, defaults to False.

    False flash_attention bool

    Whether to use flash attention optimization, defaults to False.

    False batch_size int

    Number of documents to process simultaneously, defaults to 32.

    32 **kwargs Any

    Arbitrary keyword arguments for additional configuration.

    {}

    Returns:

    Name Type Description None None

    The method processes the dataset and saves the predictions without returning any value.

    "},{"location":"text/bulk/nli/","title":"Natural Language Inference","text":"

    Bases: TextBulk

    The NLIBulk class provides functionality for large-scale natural language inference (NLI) processing using Hugging Face transformers. It allows users to load datasets, configure models, and perform inference on batches of premise-hypothesis pairs.

    Attributes:

    Name Type Description input BatchInput

    Configuration and data inputs for the batch process.

    output BatchOutput

    Configurations for output data handling.

    state State

    State management for the inference task.

    Example CLI Usage:

    genius NLIBulk rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder input/nli \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder output/nli \\\npostgres \\\n--postgres_host 127.0.0.1 \\\n--postgres_port 5432 \\\n--postgres_user postgres \\\n--postgres_password postgres \\\n--postgres_database geniusrise\\\n--postgres_table state \\\n--id MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7-lol \\\ninfer \\\n--args \\\nmodel_name=\"MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7\" \\\nmodel_class=\"AutoModelForSequenceClassification\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nprecision=\"float\" \\\nquantization=0 \\\ndevice_map=\"cuda:0\" \\\nmax_memory=None \\\ntorchscript=False\n

    "},{"location":"text/bulk/nli/#geniusrise_text.nli.bulk.NLIBulk.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the NLIBulk class with the specified input, output, and state configurations.

    Parameters:

    Name Type Description Default input BatchInput

    The input data.

    required output BatchOutput

    The output data.

    required state State

    The state data.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"text/bulk/nli/#geniusrise_text.nli.bulk.NLIBulk.infer","title":"infer(model_name, max_length=512, model_class='AutoModelForSeq2SeqLM', tokenizer_class='AutoTokenizer', use_cuda=False, precision='float16', quantization=0, device_map='auto', max_memory={0: '24GB'}, torchscript=False, compile=False, awq_enabled=False, flash_attention=False, batch_size=32, notification_email=None, **kwargs)","text":"

    Performs NLI inference on a loaded dataset using the specified model. The method processes the data in batches and saves the results to the configured output path.

    Parameters:

    Name Type Description Default model_name str

    Name or path of the NLI model.

    required max_length int

    Maximum length of the sequences for tokenization purposes. Defaults to 512.

    512 model_class str

    Class name of the model (e.g., \"AutoModelForSequenceClassification\"). Defaults to \"AutoModelForSeq2SeqLM\".

    'AutoModelForSeq2SeqLM' tokenizer_class str

    Class name of the tokenizer (e.g., \"AutoTokenizer\"). Defaults to \"AutoTokenizer\".

    'AutoTokenizer' use_cuda bool

    Whether to use CUDA for model inference. Defaults to False.

    False precision str

    Precision for model computation (e.g., \"float16\"). Defaults to \"float16\".

    'float16' quantization int

    Level of quantization for optimizing model size and speed. Defaults to 0.

    0 device_map str | Dict | None

    Specific device to use for computation. Defaults to \"auto\".

    'auto' max_memory Dict

    Maximum memory configuration for devices. Defaults to {0: \"24GB\"}.

    {0: '24GB'} torchscript bool

    Whether to use a TorchScript-optimized version of the pre-trained language model. Defaults to False.

    False compile bool

    Whether to compile the model before fine-tuning. Defaults to True.

    False awq_enabled bool

    Whether to enable AWQ optimization. Defaults to False.

    False flash_attention bool

    Whether to use flash attention optimization. Defaults to False.

    False batch_size int

    Number of premise-hypothesis pairs to process simultaneously. Defaults to 32.

    32 **kwargs Any

    Arbitrary keyword arguments for model and generation configurations.

    {}

    ```

    "},{"location":"text/bulk/nli/#geniusrise_text.nli.bulk.NLIBulk.load_dataset","title":"load_dataset(dataset_path, max_length=512, **kwargs)","text":"

    Load a commonsense reasoning dataset from a directory.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory or file.

    required max_length int

    Maximum length of text sequences for tokenization purposes. Defaults to 512.

    512 **kwargs

    Additional keyword arguments.

    {}

    Returns:

    Name Type Description Dataset Optional[Dataset]

    The loaded dataset.

    Raises:

    Type Description Exception

    If there was an error loading the dataset.

    "},{"location":"text/bulk/nli/#geniusrise_text.nli.bulk.NLIBulk.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"text/bulk/nli/#geniusrise_text.nli.bulk.NLIBulk.load_dataset--hugging-face-dataset","title":"Hugging Face Dataset","text":"

    Dataset files saved by the Hugging Face datasets library.

    "},{"location":"text/bulk/nli/#geniusrise_text.nli.bulk.NLIBulk.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\"premise\": \"The premise text\", \"hypothesis\": \"The hypothesis text\"}\n

    "},{"location":"text/bulk/nli/#geniusrise_text.nli.bulk.NLIBulk.load_dataset--csv","title":"CSV","text":"

    Should contain 'premise' and 'hypothesis' columns.

    premise,hypothesis\n\"The premise text\",\"The hypothesis text\"\n

    "},{"location":"text/bulk/nli/#geniusrise_text.nli.bulk.NLIBulk.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'premise' and 'hypothesis' columns.

    "},{"location":"text/bulk/nli/#geniusrise_text.nli.bulk.NLIBulk.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'premise' and 'hypothesis' keys.

    [{\"premise\": \"The premise text\", \"hypothesis\": \"The hypothesis text\"}]\n

    "},{"location":"text/bulk/nli/#geniusrise_text.nli.bulk.NLIBulk.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'premise' and 'hypothesis' child elements.

    <record>\n<premise>The premise text</premise>\n<hypothesis>The hypothesis text</hypothesis>\n</record>\n

    "},{"location":"text/bulk/nli/#geniusrise_text.nli.bulk.NLIBulk.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'premise' and 'hypothesis' keys.

    - premise: \"The premise text\"\nhypothesis: \"The hypothesis text\"\n

    "},{"location":"text/bulk/nli/#geniusrise_text.nli.bulk.NLIBulk.load_dataset--tsv","title":"TSV","text":"

    Should contain 'premise' and 'hypothesis' columns separated by tabs.

    "},{"location":"text/bulk/nli/#geniusrise_text.nli.bulk.NLIBulk.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'premise' and 'hypothesis' columns.

    "},{"location":"text/bulk/nli/#geniusrise_text.nli.bulk.NLIBulk.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'premise' and 'hypothesis' columns.

    "},{"location":"text/bulk/nli/#geniusrise_text.nli.bulk.NLIBulk.load_dataset--feather","title":"Feather","text":"

    Should contain 'premise' and 'hypothesis' columns.

    "},{"location":"text/bulk/question_answering/","title":"Question Answering","text":"

    Bases: TextBulk

    QABulk is a class designed for managing bulk question-answering tasks using Hugging Face models. It is capable of handling both traditional text-based QA and table-based QA (using TAPAS and TAPEX models), providing a versatile solution for automated question answering at scale.

    Parameters:

    Name Type Description Default input BatchInput

    Configuration and data inputs for batch processing.

    required output BatchOutput

    Configurations for output data handling.

    required state State

    State management for the bulk QA task.

    required **kwargs

    Arbitrary keyword arguments for extended functionality.

    {}

    Example CLI Usage:

    # For traditional text-based QA:\ngenius QABulk rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder input/qa-traditional \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder output/qa-traditional \\\npostgres \\\n--postgres_host 127.0.0.1 \\\n--postgres_port 5432 \\\n--postgres_user postgres \\\n--postgres_password postgres \\\n--postgres_database geniusrise\\\n--postgres_table state \\\n--id distilbert-base-uncased-distilled-squad-lol \\\nanswer_questions \\\n--args \\\nmodel_name=\"distilbert-base-uncased-distilled-squad\" \\\nmodel_class=\"AutoModelForQuestionAnswering\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nprecision=\"bfloat16\" \\\nquantization=0 \\\ndevice_map=\"cuda:0\" \\\nmax_memory=None \\\ntorchscript=False\n\n# For table-based QA using TAPAS:\ngenius QABulk rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder input/qa-table \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder output/qa-table \\\npostgres \\\n--postgres_host 127.0.0.1 \\\n--postgres_port 5432 \\\n--postgres_user postgres \\\n--postgres_password postgres \\\n--postgres_database geniusrise\\\n--postgres_table state \\\n--id google/tapas-base-finetuned-wtq-lol \\\nanswer_questions \\\n--args \\\nmodel_name=\"google/tapas-base-finetuned-wtq\" \\\nmodel_class=\"AutoModelForTableQuestionAnswering\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nprecision=\"float\" \\\nquantization=0 \\\ndevice_map=\"cuda:0\" \\\nmax_memory=None \\\ntorchscript=False\n\n# For table-based QA using TAPEX:\ngenius QABulk rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder input/qa-table \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder output/qa-table \\\npostgres \\\n--postgres_host 127.0.0.1 \\\n--postgres_port 5432 \\\n--postgres_user postgres \\\n--postgres_password postgres \\\n--postgres_database geniusrise\\\n--postgres_table state \\\n--id microsoft/tapex-large-finetuned-wtq-lol \\\nanswer_questions \\\n--args \\\nmodel_name=\"microsoft/tapex-large-finetuned-wtq\" \\\nmodel_class=\"AutoModelForSeq2SeqLM\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nprecision=\"float\" \\\nquantization=0 \\\ndevice_map=\"cuda:0\" \\\nmax_memory=None \\\ntorchscript=False\n

    "},{"location":"text/bulk/question_answering/#geniusrise_text.qa.bulk.QABulk.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the QABulk class with configurations for input, output, and state.

    Parameters:

    Name Type Description Default input BatchInput

    Configuration for the input data.

    required output BatchOutput

    Configuration for the output data.

    required state State

    State management for the QA task.

    required **kwargs Any

    Additional keyword arguments for extended functionality.

    {}"},{"location":"text/bulk/question_answering/#geniusrise_text.qa.bulk.QABulk.answer_questions","title":"answer_questions(model_name, model_class='AutoModelForQuestionAnswering', tokenizer_class='AutoTokenizer', use_cuda=False, precision='float16', quantization=0, device_map='auto', max_memory={0: '24GB'}, torchscript=False, compile=False, awq_enabled=False, flash_attention=False, batch_size=32, notification_email=None, **kwargs)","text":"

    Perform bulk question-answering using the specified model and tokenizer. This method can handle various types of QA models including traditional, TAPAS, and TAPEX.

    Parameters:

    Name Type Description Default model_name str

    Name or path of the question-answering model.

    required model_class str

    Class name of the model (e.g., \"AutoModelForQuestionAnswering\").

    'AutoModelForQuestionAnswering' tokenizer_class str

    Class name of the tokenizer (e.g., \"AutoTokenizer\").

    'AutoTokenizer' use_cuda bool

    Whether to use CUDA for model inference. Defaults to False.

    False precision str

    Precision for model computation. Defaults to \"float16\".

    'float16' quantization int

    Level of quantization for optimizing model size and speed. Defaults to 0.

    0 device_map str | Dict | None

    Specific device to use for computation. Defaults to \"auto\".

    'auto' max_memory Dict

    Maximum memory configuration for devices. Defaults to {0: \"24GB\"}.

    {0: '24GB'} torchscript bool

    Whether to use a TorchScript-optimized version of the pre-trained language model. Defaults to False.

    False compile bool

    Whether to compile the model before fine-tuning. Defaults to True.

    False awq_enabled bool

    Whether to enable AWQ optimization. Defaults to False.

    False flash_attention bool

    Whether to use flash attention optimization. Defaults to False.

    False batch_size int

    Number of questions to process simultaneously. Defaults to 32.

    32 **kwargs Any

    Arbitrary keyword arguments for model and generation configurations.

    {} Processing

    The method processes the data in batches, utilizing the appropriate model based on the model name and generating answers for the questions provided in the dataset.

    "},{"location":"text/bulk/question_answering/#geniusrise_text.qa.bulk.QABulk.load_dataset","title":"load_dataset(dataset_path, max_length=512, **kwargs)","text":"

    Load a dataset from a directory.

    "},{"location":"text/bulk/question_answering/#geniusrise_text.qa.bulk.QABulk.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"text/bulk/question_answering/#geniusrise_text.qa.bulk.QABulk.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\"context\": \"The context content\", \"question\": \"The question\"}\n

    "},{"location":"text/bulk/question_answering/#geniusrise_text.qa.bulk.QABulk.load_dataset--csv","title":"CSV","text":"

    Should contain 'context' and 'question' columns.

    context,question\n\"The context content\",\"The question\"\n

    "},{"location":"text/bulk/question_answering/#geniusrise_text.qa.bulk.QABulk.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'context' and 'question' columns.

    "},{"location":"text/bulk/question_answering/#geniusrise_text.qa.bulk.QABulk.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'context' and 'question' keys.

    [{\"context\": \"The context content\", \"question\": \"The question\"}]\n

    "},{"location":"text/bulk/question_answering/#geniusrise_text.qa.bulk.QABulk.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'context' and 'question' elements.

    <record>\n<context>The context content</context>\n<question>The question</question>\n</record>\n

    "},{"location":"text/bulk/question_answering/#geniusrise_text.qa.bulk.QABulk.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'context' and 'question' keys.

    - context: \"The context content\"\nquestion: \"The question\"\n

    "},{"location":"text/bulk/question_answering/#geniusrise_text.qa.bulk.QABulk.load_dataset--tsv","title":"TSV","text":"

    Should contain 'context' and 'question' columns separated by tabs.

    "},{"location":"text/bulk/question_answering/#geniusrise_text.qa.bulk.QABulk.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'context' and 'question' columns.

    "},{"location":"text/bulk/question_answering/#geniusrise_text.qa.bulk.QABulk.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'context' and 'question' columns.

    "},{"location":"text/bulk/question_answering/#geniusrise_text.qa.bulk.QABulk.load_dataset--feather","title":"Feather","text":"

    Should contain 'context' and 'question' columns.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory.

    required pad_on_right bool

    Whether to pad on the right.

    required max_length int

    The maximum length of the sequences.

    512 doc_stride int

    The document stride.

    required evaluate_squadv2 bool

    Whether to evaluate using SQuAD v2 metrics.

    required

    Returns:

    Name Type Description Dataset Optional[Dataset]

    The loaded dataset.

    "},{"location":"text/bulk/summarization/","title":"Summarization","text":"

    Bases: TextBulk

    SummarizationBulk is a class for managing bulk text summarization tasks using Hugging Face models. It is designed to handle large-scale summarization tasks efficiently and effectively, utilizing state-of-the-art machine learning models to provide high-quality summaries.

    The class provides methods to load datasets, configure summarization models, and execute bulk summarization tasks.

    Example CLI Usage:

    genius SummarizationBulk rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder input/summz \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder output/summz \\\npostgres \\\n--postgres_host 127.0.0.1 \\\n--postgres_port 5432 \\\n--postgres_user postgres \\\n--postgres_password postgres \\\n--postgres_database geniusrise\\\n--postgres_table state \\\n--id facebook/bart-large-cnn-lol \\\nsummarize \\\n--args \\\nmodel_name=\"facebook/bart-large-cnn\" \\\nmodel_class=\"AutoModelForSeq2SeqLM\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nprecision=\"float\" \\\nquantization=0 \\\ndevice_map=\"cuda:0\" \\\nmax_memory=None \\\ntorchscript=False \\\ngeneration_bos_token_id=0 \\\ngeneration_decoder_start_token_id=2 \\\ngeneration_early_stopping=true \\\ngeneration_eos_token_id=2 \\\ngeneration_forced_bos_token_id=0 \\\ngeneration_forced_eos_token_id=2 \\\ngeneration_length_penalty=2.0 \\\ngeneration_max_length=142 \\\ngeneration_min_length=56 \\\ngeneration_no_repeat_ngram_size=3 \\\ngeneration_num_beams=4 \\\ngeneration_pad_token_id=1 \\\ngeneration_do_sample=false\n

    "},{"location":"text/bulk/summarization/#geniusrise_text.summarization.bulk.SummarizationBulk.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the SummarizationBulk class.

    Parameters:

    Name Type Description Default input BatchInput

    The input data configuration.

    required output BatchOutput

    The output data configuration.

    required state State

    The state configuration.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"text/bulk/summarization/#geniusrise_text.summarization.bulk.SummarizationBulk.load_dataset","title":"load_dataset(dataset_path, max_length=512, **kwargs)","text":"

    Load a dataset from a directory.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory.

    required **kwargs

    Additional keyword arguments.

    {}

    Returns:

    Type Description Optional[Dataset]

    Dataset | DatasetDict: The loaded dataset.

    "},{"location":"text/bulk/summarization/#geniusrise_text.summarization.bulk.SummarizationBulk.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"text/bulk/summarization/#geniusrise_text.summarization.bulk.SummarizationBulk.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\"text\": \"The text content\"}\n

    "},{"location":"text/bulk/summarization/#geniusrise_text.summarization.bulk.SummarizationBulk.load_dataset--csv","title":"CSV","text":"

    Should contain a 'text' column.

    text\n\"The text content\"\n

    "},{"location":"text/bulk/summarization/#geniusrise_text.summarization.bulk.SummarizationBulk.load_dataset--parquet","title":"Parquet","text":"

    Should contain a 'text' column.

    "},{"location":"text/bulk/summarization/#geniusrise_text.summarization.bulk.SummarizationBulk.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with a 'text' key.

    [{\"text\": \"The text content\"}]\n

    "},{"location":"text/bulk/summarization/#geniusrise_text.summarization.bulk.SummarizationBulk.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'text' child element.

    <record>\n<text>The text content</text>\n</record>\n

    "},{"location":"text/bulk/summarization/#geniusrise_text.summarization.bulk.SummarizationBulk.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with a 'text' key.

    - text: \"The text content\"\n

    "},{"location":"text/bulk/summarization/#geniusrise_text.summarization.bulk.SummarizationBulk.load_dataset--tsv","title":"TSV","text":"

    Should contain a 'text' column.

    "},{"location":"text/bulk/summarization/#geniusrise_text.summarization.bulk.SummarizationBulk.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain a 'text' column.

    "},{"location":"text/bulk/summarization/#geniusrise_text.summarization.bulk.SummarizationBulk.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with a 'text' column.

    "},{"location":"text/bulk/summarization/#geniusrise_text.summarization.bulk.SummarizationBulk.load_dataset--feather","title":"Feather","text":"

    Should contain a 'text' column.

    "},{"location":"text/bulk/summarization/#geniusrise_text.summarization.bulk.SummarizationBulk.summarize","title":"summarize(model_name, model_class='AutoModelForSeq2SeqLM', tokenizer_class='AutoTokenizer', use_cuda=False, precision='float16', quantization=0, device_map='auto', max_memory={0: '24GB'}, torchscript=False, compile=False, awq_enabled=False, flash_attention=False, batch_size=32, max_length=512, notification_email=None, **kwargs)","text":"

    Perform bulk summarization using the specified model and tokenizer. This method handles the entire summarization process including loading the model, processing input data, generating summarization, and saving the results.

    Parameters:

    Name Type Description Default model_name str

    Name or path of the translation model.

    required origin str

    Source language ISO code.

    required target str

    Target language ISO code.

    required max_length int

    Maximum length of the tokens (default 512).

    512 model_class str

    Class name of the model (default \"AutoModelForSeq2SeqLM\").

    'AutoModelForSeq2SeqLM' tokenizer_class str

    Class name of the tokenizer (default \"AutoTokenizer\").

    'AutoTokenizer' use_cuda bool

    Whether to use CUDA for model inference (default False).

    False precision str

    Precision for model computation (default \"float16\").

    'float16' quantization int

    Level of quantization for optimizing model size and speed (default 0).

    0 device_map str | Dict | None

    Specific device to use for computation (default \"auto\").

    'auto' max_memory Dict

    Maximum memory configuration for devices.

    {0: '24GB'} torchscript bool

    Whether to use a TorchScript-optimized version of the pre-trained language model. Defaults to False.

    False compile bool

    Whether to compile the model before fine-tuning. Defaults to True.

    False awq_enabled bool

    Whether to enable AWQ optimization (default False).

    False flash_attention bool

    Whether to use flash attention optimization (default False).

    False batch_size int

    Number of translations to process simultaneously (default 32).

    32 max_length int

    Maximum lenght of the summary to be generated (default 512).

    512 **kwargs Any

    Arbitrary keyword arguments for model and generation configurations.

    {}"},{"location":"text/bulk/translation/","title":"Translation","text":"

    Bases: TextBulk

    TranslationBulk is a class for managing bulk translations using Hugging Face models. It is designed to handle large-scale translation tasks efficiently and effectively, using state-of-the-art machine learning models to provide high-quality translations for various language pairs.

    This class provides methods for loading datasets, configuring translation models, and executing bulk translation tasks.

    Parameters:

    Name Type Description Default input BatchInput

    Configuration and data inputs for batch processing.

    required output BatchOutput

    Configuration for output data handling.

    required state State

    State management for translation tasks.

    required **kwargs

    Arbitrary keyword arguments for extended functionality.

    {}

    Example CLI Usage for Bulk Translation Task:

    genius TranslationBulk rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder input/trans \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder output/trans \\\npostgres \\\n--postgres_host 127.0.0.1 \\\n--postgres_port 5432 \\\n--postgres_user postgres \\\n--postgres_password postgres \\\n--postgres_database geniusrise\\\n--postgres_table state \\\n--id facebook/mbart-large-50-many-to-many-mmt-lol \\\ntranslate \\\n--args \\\nmodel_name=\"facebook/mbart-large-50-many-to-many-mmt\" \\\nmodel_class=\"AutoModelForSeq2SeqLM\" \\\ntokenizer_class=\"AutoTokenizer\" \\\norigin=\"hi_IN\" \\\ntarget=\"en_XX\" \\\nuse_cuda=True \\\nprecision=\"float\" \\\nquantization=0 \\\ndevice_map=\"cuda:0\" \\\nmax_memory=None \\\ntorchscript=False \\\ngenerate_decoder_start_token_id=2 \\\ngenerate_early_stopping=true \\\ngenerate_eos_token_id=2 \\\ngenerate_forced_eos_token_id=2 \\\ngenerate_max_length=200 \\\ngenerate_num_beams=5 \\\ngenerate_pad_token_id=1\n
    "},{"location":"text/bulk/translation/#geniusrise_text.translation.bulk.TranslationBulk.load_dataset","title":"load_dataset(dataset_path, max_length=512, origin='en', target='hi', **kwargs)","text":"

    Load a dataset from a directory.

    "},{"location":"text/bulk/translation/#geniusrise_text.translation.bulk.TranslationBulk.load_dataset--supported-data-formats-and-structures-for-translation-tasks","title":"Supported Data Formats and Structures for Translation Tasks:","text":"

    Note: All examples are assuming the source as \"en\", refer to the specific model for this parameter.

    "},{"location":"text/bulk/translation/#geniusrise_text.translation.bulk.TranslationBulk.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\n\"translation\": {\n\"en\": \"English text\"\n}\n}\n

    "},{"location":"text/bulk/translation/#geniusrise_text.translation.bulk.TranslationBulk.load_dataset--csv","title":"CSV","text":"

    Should contain 'en' column.

    en\n\"English text\"\n

    "},{"location":"text/bulk/translation/#geniusrise_text.translation.bulk.TranslationBulk.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'en' column.

    "},{"location":"text/bulk/translation/#geniusrise_text.translation.bulk.TranslationBulk.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'en' key.

    [\n{\n\"en\": \"English text\"\n}\n]\n

    "},{"location":"text/bulk/translation/#geniusrise_text.translation.bulk.TranslationBulk.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'en' child elements.

    <record>\n<en>English text</en>\n</record>\n

    "},{"location":"text/bulk/translation/#geniusrise_text.translation.bulk.TranslationBulk.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'en' key.

    - en: \"English text\"\n

    "},{"location":"text/bulk/translation/#geniusrise_text.translation.bulk.TranslationBulk.load_dataset--tsv","title":"TSV","text":"

    Should contain 'en' column separated by tabs.

    "},{"location":"text/bulk/translation/#geniusrise_text.translation.bulk.TranslationBulk.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'en' column.

    "},{"location":"text/bulk/translation/#geniusrise_text.translation.bulk.TranslationBulk.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'en' column.

    "},{"location":"text/bulk/translation/#geniusrise_text.translation.bulk.TranslationBulk.load_dataset--feather","title":"Feather","text":"

    Should contain 'en' column.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the directory containing the dataset files.

    required max_length int

    The maximum length for tokenization. Defaults to 512.

    512 origin str

    The origin language. Defaults to 'en'.

    'en' target str

    The target language. Defaults to 'hi'.

    'hi' **kwargs

    Additional keyword arguments.

    {}

    Returns:

    Name Type Description DatasetDict Optional[Dataset]

    The loaded dataset.

    "},{"location":"text/bulk/translation/#geniusrise_text.translation.bulk.TranslationBulk.translate","title":"translate(model_name, origin, target, max_length=512, model_class='AutoModelForSeq2SeqLM', tokenizer_class='AutoTokenizer', use_cuda=False, precision='float16', quantization=0, device_map='auto', max_memory={0: '24GB'}, torchscript=False, compile=False, awq_enabled=False, flash_attention=False, batch_size=32, notification_email=None, **kwargs)","text":"

    Perform bulk translation using the specified model and tokenizer. This method handles the entire translation process including loading the model, processing input data, generating translations, and saving the results.

    Parameters:

    Name Type Description Default model_name str

    Name or path of the translation model.

    required origin str

    Source language ISO code.

    required target str

    Target language ISO code.

    required max_length int

    Maximum length of the tokens (default 512).

    512 model_class str

    Class name of the model (default \"AutoModelForSeq2SeqLM\").

    'AutoModelForSeq2SeqLM' tokenizer_class str

    Class name of the tokenizer (default \"AutoTokenizer\").

    'AutoTokenizer' use_cuda bool

    Whether to use CUDA for model inference (default False).

    False precision str

    Precision for model computation (default \"float16\").

    'float16' quantization int

    Level of quantization for optimizing model size and speed (default 0).

    0 device_map str | Dict | None

    Specific device to use for computation (default \"auto\").

    'auto' max_memory Dict

    Maximum memory configuration for devices.

    {0: '24GB'} torchscript bool

    Whether to use a TorchScript-optimized version of the pre-trained language model. Defaults to False.

    False compile bool

    Whether to compile the model before fine-tuning. Defaults to True.

    False awq_enabled bool

    Whether to enable AWQ optimization (default False).

    False flash_attention bool

    Whether to use flash attention optimization (default False).

    False batch_size int

    Number of translations to process simultaneously (default 32).

    32 **kwargs Any

    Arbitrary keyword arguments for model and generation configurations.

    {}"},{"location":"text/fine_tune/base/","title":"Base Fine Tuner","text":"

    Bases: Bolt

    A bolt for fine-tuning Hugging Face models.

    This bolt uses the Hugging Face Transformers library to fine-tune a pre-trained model. It uses the Trainer class from the Transformers library to handle the training.

    "},{"location":"text/fine_tune/base/#geniusrise_text.base.fine_tune.TextFineTuner.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initialize the bolt.

    Parameters:

    Name Type Description Default input BatchInput

    The batch input data.

    required output BatchOutput

    The output data.

    required state State

    The state manager.

    required evaluate bool

    Whether to evaluate the model. Defaults to False.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"text/fine_tune/base/#geniusrise_text.base.fine_tune.TextFineTuner.compute_metrics","title":"compute_metrics(eval_pred)","text":"

    Compute metrics for evaluation. This class implements a simple classification evaluation, tasks should ideally override this.

    Parameters:

    Name Type Description Default eval_pred EvalPrediction

    The evaluation predictions.

    required

    Returns:

    Name Type Description dict Optional[Dict[str, float]] | Dict[str, float]

    The computed metrics.

    "},{"location":"text/fine_tune/base/#geniusrise_text.base.fine_tune.TextFineTuner.fine_tune","title":"fine_tune(model_name, tokenizer_name, num_train_epochs, per_device_batch_size, model_class='AutoModel', tokenizer_class='AutoTokenizer', device_map='auto', precision='bfloat16', quantization=None, lora_config=None, use_accelerate=False, use_trl=False, accelerate_no_split_module_classes=[], compile=False, evaluate=False, save_steps=500, save_total_limit=None, load_best_model_at_end=False, metric_for_best_model=None, greater_is_better=None, map_data=None, use_huggingface_dataset=False, huggingface_dataset='', hf_repo_id=None, hf_commit_message=None, hf_token=None, hf_private=True, hf_create_pr=False, notification_email='', learning_rate=1e-05, **kwargs)","text":"

    Fine-tunes a pre-trained Hugging Face model.

    Parameters:

    Name Type Description Default model_name str

    The name of the pre-trained model.

    required tokenizer_name str

    The name of the pre-trained tokenizer.

    required num_train_epochs int

    The total number of training epochs to perform.

    required per_device_batch_size int

    The batch size per device during training.

    required model_class str

    The model class to use. Defaults to \"AutoModel\".

    'AutoModel' tokenizer_class str

    The tokenizer class to use. Defaults to \"AutoTokenizer\".

    'AutoTokenizer' device_map str | dict

    The device map for distributed training. Defaults to \"auto\".

    'auto' precision str

    The precision to use for training. Defaults to \"bfloat16\".

    'bfloat16' quantization int

    The quantization level to use for training. Defaults to None.

    None lora_config dict

    Configuration for PEFT LoRA optimization. Defaults to None.

    None use_accelerate bool

    Whether to use accelerate for distributed training. Defaults to False.

    False use_trl bool

    Whether to use TRL for training. Defaults to False.

    False accelerate_no_split_module_classes List[str]

    The module classes to not split during distributed training. Defaults to [].

    [] evaluate bool

    Whether to evaluate the model after training. Defaults to False.

    False compile bool

    Whether to compile the model before fine-tuning. Defaults to True.

    False save_steps int

    Number of steps between checkpoints. Defaults to 500.

    500 save_total_limit Optional[int]

    Maximum number of checkpoints to keep. Older checkpoints are deleted. Defaults to None.

    None load_best_model_at_end bool

    Whether to load the best model (according to evaluation) at the end of training. Defaults to False.

    False metric_for_best_model Optional[str]

    The metric to use to compare models. Defaults to None.

    None greater_is_better Optional[bool]

    Whether a larger value of the metric indicates a better model. Defaults to None.

    None use_huggingface_dataset bool

    Whether to load a dataset from huggingface hub.

    False huggingface_dataset str

    The huggingface dataset to use.

    '' map_data Callable

    A function to map data before training. Defaults to None.

    None hf_repo_id str

    The Hugging Face repo ID. Defaults to None.

    None hf_commit_message str

    The Hugging Face commit message. Defaults to None.

    None hf_token str

    The Hugging Face token. Defaults to None.

    None hf_private bool

    Whether to make the repo private. Defaults to True.

    True hf_create_pr bool

    Whether to create a pull request. Defaults to False.

    False notification_email str

    Whether to notify after job is complete. Defaults to None.

    '' learning_rate float

    Learning rate for backpropagation.

    1e-05 **kwargs

    Additional keyword arguments to pass to the model.

    {}

    Returns:

    Type Description

    None

    "},{"location":"text/fine_tune/base/#geniusrise_text.base.fine_tune.TextFineTuner.load_dataset","title":"load_dataset(dataset_path, **kwargs) abstractmethod","text":"

    Load a dataset from a file.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset file.

    required split str

    The split to load. Defaults to None.

    required **kwargs

    Additional keyword arguments to pass to the load_dataset method.

    {}

    Returns:

    Type Description Dataset | DatasetDict | Optional[Dataset]

    Union[Dataset, DatasetDict, None]: The loaded dataset.

    Raises:

    Type Description NotImplementedError

    This method should be overridden by subclasses.

    "},{"location":"text/fine_tune/base/#geniusrise_text.base.fine_tune.TextFineTuner.load_models","title":"load_models(model_name, tokenizer_name, model_class='AutoModel', tokenizer_class='AutoTokenizer', device_map='auto', precision='bfloat16', quantization=None, lora_config=None, use_accelerate=False, accelerate_no_split_module_classes=[], **kwargs)","text":"

    Load the model and tokenizer.

    Parameters:

    Name Type Description Default model_name str

    The name of the model to be loaded.

    required tokenizer_name str

    The name of the tokenizer to be loaded. Defaults to None.

    required model_class str

    The class of the model. Defaults to \"AutoModel\".

    'AutoModel' tokenizer_class str

    The class of the tokenizer. Defaults to \"AutoTokenizer\".

    'AutoTokenizer' device Union[str, torch.device]

    The device to be used. Defaults to \"cuda\".

    required precision str

    The precision to be used. Choose from 'float32', 'float16', 'bfloat16'. Defaults to \"float32\".

    'bfloat16' quantization Optional[int]

    The quantization to be used. Defaults to None.

    None lora_config Optional[dict]

    The LoRA configuration to be used. Defaults to None.

    None use_accelerate bool

    Whether to use accelerate. Defaults to False.

    False accelerate_no_split_module_classes List[str]

    The list of no split module classes to be used. Defaults to [].

    [] **kwargs

    Additional keyword arguments.

    {}

    Raises:

    Type Description ValueError

    If an unsupported precision is chosen.

    Returns:

    Type Description

    None

    "},{"location":"text/fine_tune/base/#geniusrise_text.base.fine_tune.TextFineTuner.preprocess_data","title":"preprocess_data(**kwargs)","text":"

    Load and preprocess the dataset

    "},{"location":"text/fine_tune/base/#geniusrise_text.base.fine_tune.TextFineTuner.upload_to_hf_hub","title":"upload_to_hf_hub(hf_repo_id=None, hf_commit_message=None, hf_token=None, hf_private=None, hf_create_pr=None)","text":"

    Upload the model and tokenizer to Hugging Face Hub.

    "},{"location":"text/fine_tune/classification/","title":"Classification","text":"

    Bases: TextFineTuner

    A bolt for fine-tuning Hugging Face models for text classification tasks.

    This class extends the TextFineTuner and specializes in fine-tuning models for text classification. It provides additional functionalities for loading and preprocessing text classification datasets in various formats.

    Parameters:

    Name Type Description Default input BatchInput

    The batch input data.

    required output OutputConfig

    The output data.

    required state State

    The state manager.

    required

    CLI Usage:

    genius TextClassificationFineTuner rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\n--id cardiffnlp/twitter-roberta-base-hate-multiclass-latest-lol \\\nfine_tune \\\n--args \\\nmodel_name=my_model \\\ntokenizer_name=my_tokenizer \\\nnum_train_epochs=3 \\\nper_device_train_batch_size=8 \\\ndata_max_length=512\n
    "},{"location":"text/fine_tune/classification/#geniusrise_text.classification.fine_tune.TextClassificationFineTuner.compute_metrics","title":"compute_metrics(eval_pred)","text":"

    Compute metrics for evaluation. This class implements a simple classification evaluation, tasks should ideally override this.

    Parameters:

    Name Type Description Default eval_pred EvalPrediction

    The evaluation predictions.

    required

    Returns:

    Name Type Description dict Union[Optional[Dict[str, float]], Dict[str, float]]

    The computed metrics.

    "},{"location":"text/fine_tune/classification/#geniusrise_text.classification.fine_tune.TextClassificationFineTuner.load_dataset","title":"load_dataset(dataset_path, max_length=512, **kwargs)","text":"

    Load a classification dataset from a directory.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory.

    required max_length int

    The maximum length for tokenization. Defaults to 512.

    512

    Returns:

    Name Type Description Dataset Optional[Dataset]

    The loaded dataset.

    Raises:

    Type Description Exception

    If there was an error loading the dataset.

    "},{"location":"text/fine_tune/classification/#geniusrise_text.classification.fine_tune.TextClassificationFineTuner.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"text/fine_tune/classification/#geniusrise_text.classification.fine_tune.TextClassificationFineTuner.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\"text\": \"The text content\", \"label\": \"The label\"}\n

    "},{"location":"text/fine_tune/classification/#geniusrise_text.classification.fine_tune.TextClassificationFineTuner.load_dataset--csv","title":"CSV","text":"

    Should contain 'text' and 'label' columns.

    text,label\n\"The text content\",\"The label\"\n

    "},{"location":"text/fine_tune/classification/#geniusrise_text.classification.fine_tune.TextClassificationFineTuner.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'text' and 'label' columns.

    "},{"location":"text/fine_tune/classification/#geniusrise_text.classification.fine_tune.TextClassificationFineTuner.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'text' and 'label' keys.

    [{\"text\": \"The text content\", \"label\": \"The label\"}]\n

    "},{"location":"text/fine_tune/classification/#geniusrise_text.classification.fine_tune.TextClassificationFineTuner.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'text' and 'label' child elements.

    <record>\n<text>The text content</text>\n<label>The label</label>\n</record>\n

    "},{"location":"text/fine_tune/classification/#geniusrise_text.classification.fine_tune.TextClassificationFineTuner.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'text' and 'label' keys.

    - text: \"The text content\"\nlabel: \"The label\"\n

    "},{"location":"text/fine_tune/classification/#geniusrise_text.classification.fine_tune.TextClassificationFineTuner.load_dataset--tsv","title":"TSV","text":"

    Should contain 'text' and 'label' columns separated by tabs.

    "},{"location":"text/fine_tune/classification/#geniusrise_text.classification.fine_tune.TextClassificationFineTuner.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'text' and 'label' columns.

    "},{"location":"text/fine_tune/classification/#geniusrise_text.classification.fine_tune.TextClassificationFineTuner.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'text' and 'label' columns.

    "},{"location":"text/fine_tune/classification/#geniusrise_text.classification.fine_tune.TextClassificationFineTuner.load_dataset--feather","title":"Feather","text":"

    Should contain 'text' and 'label' columns.

    "},{"location":"text/fine_tune/instruction_tuning/","title":"Instruction Tuning","text":"

    Bases: TextFineTuner

    A bolt for fine-tuning Hugging Face models on instruction tuning tasks.

    This class inherits from TextFineTuner and specializes in fine-tuning models for instruction-based tasks. It provides additional methods for loading and preparing datasets in various formats, as well as computing custom metrics.

    Parameters:

    Name Type Description Default input BatchInput

    The batch input data.

    required output OutputConfig

    The output data.

    required state State

    The state manager.

    required

    Attributes:

    Name Type Description max_length int

    The maximum length for tokenization.

    CLI Usage:

        genius InstructionFineTuner rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\n--id mistralai/Mistral-7B-Instruct-v0.1-lol \\\nfine_tune \\\n--args \\\nmodel_name=my_model \\\ntokenizer_name=my_tokenizer \\\nnum_train_epochs=3 \\\nper_device_train_batch_size=8 \\\ndata_max_length=512\n
    "},{"location":"text/fine_tune/instruction_tuning/#geniusrise_text.instruction.fine_tune.InstructionFineTuner.compute_metrics","title":"compute_metrics(eval_pred)","text":"

    Compute evaluation metrics for the model's predictions.

    This method takes the model's predictions and ground truth labels, converts them to text, and then computes the BLEU score for evaluation.

    Parameters:

    Name Type Description Default eval_pred EvalPrediction

    A named tuple containing predictions and label_ids. - predictions: The logits predicted by the model of shape (batch_size, sequence_length, num_classes). - label_ids: The ground truth labels of shape (batch_size, sequence_length).

    required

    Returns:

    Type Description Optional[Dict[str, float]]

    Optional[Dict[str, float]]: A dictionary containing the BLEU score. Returns None if an exception occurs.

    Raises:

    Type Description Exception

    If the tokenizer is not initialized.

    "},{"location":"text/fine_tune/instruction_tuning/#geniusrise_text.instruction.fine_tune.InstructionFineTuner.load_dataset","title":"load_dataset(dataset_path, max_length=512, **kwargs)","text":"

    Load an instruction tuning dataset from a directory.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory.

    required max_length int

    The maximum length for tokenization. Defaults to 512.

    512

    Returns:

    Name Type Description Dataset Union[Dataset, Dict]

    The loaded dataset.

    Raises:

    Type Description Exception

    If there was an error loading the dataset.

    "},{"location":"text/fine_tune/instruction_tuning/#geniusrise_text.instruction.fine_tune.InstructionFineTuner.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"text/fine_tune/instruction_tuning/#geniusrise_text.instruction.fine_tune.InstructionFineTuner.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\"instruction\": \"The instruction\", \"output\": \"The output\"}\n

    "},{"location":"text/fine_tune/instruction_tuning/#geniusrise_text.instruction.fine_tune.InstructionFineTuner.load_dataset--csv","title":"CSV","text":"

    Should contain 'instruction' and 'output' columns.

    instruction,output\n\"The instruction\",\"The output\"\n

    "},{"location":"text/fine_tune/instruction_tuning/#geniusrise_text.instruction.fine_tune.InstructionFineTuner.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'instruction' and 'output' columns.

    "},{"location":"text/fine_tune/instruction_tuning/#geniusrise_text.instruction.fine_tune.InstructionFineTuner.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'instruction' and 'output' keys.

    [{\"instruction\": \"The instruction\", \"output\": \"The output\"}]\n

    "},{"location":"text/fine_tune/instruction_tuning/#geniusrise_text.instruction.fine_tune.InstructionFineTuner.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'instruction' and 'output' child elements.

    <record>\n<instruction>The instruction</instruction>\n<output>The output</output>\n</record>\n

    "},{"location":"text/fine_tune/instruction_tuning/#geniusrise_text.instruction.fine_tune.InstructionFineTuner.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'instruction' and 'output' keys.

    - instruction: \"The instruction\"\noutput: \"The output\"\n

    "},{"location":"text/fine_tune/instruction_tuning/#geniusrise_text.instruction.fine_tune.InstructionFineTuner.load_dataset--tsv","title":"TSV","text":"

    Should contain 'instruction' and 'output' columns separated by tabs.

    "},{"location":"text/fine_tune/instruction_tuning/#geniusrise_text.instruction.fine_tune.InstructionFineTuner.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'instruction' and 'output' columns.

    "},{"location":"text/fine_tune/instruction_tuning/#geniusrise_text.instruction.fine_tune.InstructionFineTuner.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'instruction' and 'output' columns.

    "},{"location":"text/fine_tune/instruction_tuning/#geniusrise_text.instruction.fine_tune.InstructionFineTuner.load_dataset--feather","title":"Feather","text":"

    Should contain 'instruction' and 'output' columns.

    "},{"location":"text/fine_tune/instruction_tuning/#geniusrise_text.instruction.fine_tune.InstructionFineTuner.prepare_train_features","title":"prepare_train_features(examples)","text":"

    Tokenize the examples and prepare the features for training.

    Parameters:

    Name Type Description Default examples dict

    A dictionary of examples.

    required

    Returns:

    Name Type Description dict Dict

    The processed features.

    "},{"location":"text/fine_tune/language_model/","title":"Language Model","text":"

    Bases: TextFineTuner

    A bolt for fine-tuning Hugging Face models on language modeling tasks.

    Parameters:

    Name Type Description Default input BatchInput

    The batch input data.

    required output OutputConfig

    The output data.

    required state State

    The state manager.

    required

    CLI Usage:

        genius LanguageModelFineTuner rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder input/lm \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder output/lm \\\npostgres \\\n--postgres_host 127.0.0.1 \\\n--postgres_port 5432 \\\n--postgres_user postgres \\\n--postgres_password postgres \\\n--postgres_database geniusrise\\\n--postgres_table state \\\n--id mistralai/Mistral-7B-Instruct-v0.1-lol \\\nfine_tune \\\n--args \\\nmodel_name=my_model \\\ntokenizer_name=my_tokenizer \\\nnum_train_epochs=3 \\\nper_device_train_batch_size=8 \\\ndata_max_length=512\n
    "},{"location":"text/fine_tune/language_model/#geniusrise_text.language_model.fine_tune.LanguageModelFineTuner.compute_metrics","title":"compute_metrics(eval_pred)","text":"

    Compute evaluation metrics for the model's predictions.

    This method takes the model's predictions and ground truth labels, converts them to text, and then computes the BLEU score for evaluation.

    Parameters:

    Name Type Description Default eval_pred EvalPrediction

    A named tuple containing predictions and label_ids. - predictions: The logits predicted by the model of shape (batch_size, sequence_length, num_classes). - label_ids: The ground truth labels of shape (batch_size, sequence_length).

    required

    Returns:

    Type Description Optional[Dict[str, float]]

    Optional[Dict[str, float]]: A dictionary containing the BLEU score. Returns None if an exception occurs.

    Raises:

    Type Description Exception

    If the tokenizer is not initialized.

    "},{"location":"text/fine_tune/language_model/#geniusrise_text.language_model.fine_tune.LanguageModelFineTuner.data_collator","title":"data_collator(examples)","text":"

    Customize the data collator.

    Parameters:

    Name Type Description Default examples

    The examples to collate.

    required

    Returns:

    Name Type Description dict

    The collated data.

    "},{"location":"text/fine_tune/language_model/#geniusrise_text.language_model.fine_tune.LanguageModelFineTuner.load_dataset","title":"load_dataset(dataset_path, masked=False, max_length=512, **kwargs)","text":"

    Load a language modeling dataset from a directory.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory.

    required masked bool

    Whether to use masked language modeling. Defaults to True.

    False max_length int

    The maximum length for tokenization. Defaults to 512.

    512

    Returns:

    Name Type Description Dataset

    The loaded dataset.

    Raises:

    Type Description Exception

    If there was an error loading the dataset.

    "},{"location":"text/fine_tune/language_model/#geniusrise_text.language_model.fine_tune.LanguageModelFineTuner.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"text/fine_tune/language_model/#geniusrise_text.language_model.fine_tune.LanguageModelFineTuner.load_dataset--dataset-files-saved-by-hugging-face-datasets-library","title":"Dataset files saved by Hugging Face datasets library","text":"

    The directory should contain 'dataset_info.json' and other related files.

    "},{"location":"text/fine_tune/language_model/#geniusrise_text.language_model.fine_tune.LanguageModelFineTuner.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\"text\": \"The text content\"}\n

    "},{"location":"text/fine_tune/language_model/#geniusrise_text.language_model.fine_tune.LanguageModelFineTuner.load_dataset--csv","title":"CSV","text":"

    Should contain 'text' column.

    text\n\"The text content\"\n

    "},{"location":"text/fine_tune/language_model/#geniusrise_text.language_model.fine_tune.LanguageModelFineTuner.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'text' column.

    "},{"location":"text/fine_tune/language_model/#geniusrise_text.language_model.fine_tune.LanguageModelFineTuner.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'text' key.

    [{\"text\": \"The text content\"}]\n

    "},{"location":"text/fine_tune/language_model/#geniusrise_text.language_model.fine_tune.LanguageModelFineTuner.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'text' child element.

    <record>\n<text>The text content</text>\n</record>\n

    "},{"location":"text/fine_tune/language_model/#geniusrise_text.language_model.fine_tune.LanguageModelFineTuner.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'text' key.

    - text: \"The text content\"\n

    "},{"location":"text/fine_tune/language_model/#geniusrise_text.language_model.fine_tune.LanguageModelFineTuner.load_dataset--tsv","title":"TSV","text":"

    Should contain 'text' column separated by tabs.

    "},{"location":"text/fine_tune/language_model/#geniusrise_text.language_model.fine_tune.LanguageModelFineTuner.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'text' column.

    "},{"location":"text/fine_tune/language_model/#geniusrise_text.language_model.fine_tune.LanguageModelFineTuner.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'text' column.

    "},{"location":"text/fine_tune/language_model/#geniusrise_text.language_model.fine_tune.LanguageModelFineTuner.load_dataset--feather","title":"Feather","text":"

    Should contain 'text' column.

    "},{"location":"text/fine_tune/language_model/#geniusrise_text.language_model.fine_tune.LanguageModelFineTuner.prepare_train_features","title":"prepare_train_features(examples)","text":"

    Tokenize the examples and prepare the features for training.

    Parameters:

    Name Type Description Default examples dict

    A dictionary of examples.

    required

    Returns:

    Name Type Description dict

    The processed features.

    "},{"location":"text/fine_tune/ner/","title":"Named Entity Recognition","text":"

    Bases: TextFineTuner

    A bolt for fine-tuning Hugging Face models on named entity recognition tasks.

    Parameters:

    Name Type Description Default input BatchInput

    The batch input data.

    required output OutputConfig

    The output data.

    required state State

    The state manager.

    required

    CLI Usage:

        genius NamedEntityRecognitionFineTuner rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\n--id dslim/bert-large-NER-lol \\\nfine_tune \\\n--args \\\nmodel_name=my_model \\\ntokenizer_name=my_tokenizer \\\nnum_train_epochs=3 \\\nper_device_train_batch_size=8\n
    "},{"location":"text/fine_tune/ner/#geniusrise_text.ner.fine_tune.NamedEntityRecognitionFineTuner.data_collator","title":"data_collator(examples)","text":"

    Customize the data collator.

    Parameters:

    Name Type Description Default examples List[Dict[str, torch.Tensor]]

    The examples to collate.

    required

    Returns:

    Type Description Dict[str, torch.Tensor]

    Dict[str, torch.Tensor]: The collated data.

    "},{"location":"text/fine_tune/ner/#geniusrise_text.ner.fine_tune.NamedEntityRecognitionFineTuner.load_dataset","title":"load_dataset(dataset_path, label_list=[], **kwargs)","text":"

    Load a named entity recognition dataset from a directory.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory.

    required label_list List[str]

    The list of labels for named entity recognition. Defaults to [].

    []

    Returns:

    Name Type Description DatasetDict Union[Dataset, DatasetDict, None]

    The loaded dataset.

    Raises:

    Type Description Exception

    If there was an error loading the dataset.

    "},{"location":"text/fine_tune/ner/#geniusrise_text.ner.fine_tune.NamedEntityRecognitionFineTuner.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"text/fine_tune/ner/#geniusrise_text.ner.fine_tune.NamedEntityRecognitionFineTuner.load_dataset--hugging-face-dataset","title":"Hugging Face Dataset","text":"

    Dataset files saved by the Hugging Face datasets library.

    "},{"location":"text/fine_tune/ner/#geniusrise_text.ner.fine_tune.NamedEntityRecognitionFineTuner.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\"tokens\": [\"token1\", \"token2\", ...], \"ner_tags\": [0, 1, ...]}\n

    "},{"location":"text/fine_tune/ner/#geniusrise_text.ner.fine_tune.NamedEntityRecognitionFineTuner.load_dataset--csv","title":"CSV","text":"

    Should contain 'tokens' and 'ner_tags' columns.

    tokens,ner_tags\n\"['token1', 'token2', ...]\", \"[0, 1, ...]\"\n

    "},{"location":"text/fine_tune/ner/#geniusrise_text.ner.fine_tune.NamedEntityRecognitionFineTuner.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'tokens' and 'ner_tags' columns.

    "},{"location":"text/fine_tune/ner/#geniusrise_text.ner.fine_tune.NamedEntityRecognitionFineTuner.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'tokens' and 'ner_tags' keys.

    [{\"tokens\": [\"token1\", \"token2\", ...], \"ner_tags\": [0, 1, ...]}]\n

    "},{"location":"text/fine_tune/ner/#geniusrise_text.ner.fine_tune.NamedEntityRecognitionFineTuner.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'tokens' and 'ner_tags' child elements.

    <record>\n<tokens>token1 token2 ...</tokens>\n<ner_tags>0 1 ...</ner_tags>\n</record>\n

    "},{"location":"text/fine_tune/ner/#geniusrise_text.ner.fine_tune.NamedEntityRecognitionFineTuner.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'tokens' and 'ner_tags' keys.

    - tokens: [\"token1\", \"token2\", ...]\nner_tags: [0, 1, ...]\n

    "},{"location":"text/fine_tune/ner/#geniusrise_text.ner.fine_tune.NamedEntityRecognitionFineTuner.load_dataset--tsv","title":"TSV","text":"

    Should contain 'tokens' and 'ner_tags' columns separated by tabs.

    "},{"location":"text/fine_tune/ner/#geniusrise_text.ner.fine_tune.NamedEntityRecognitionFineTuner.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'tokens' and 'ner_tags' columns.

    "},{"location":"text/fine_tune/ner/#geniusrise_text.ner.fine_tune.NamedEntityRecognitionFineTuner.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'tokens' and 'ner_tags' columns.

    "},{"location":"text/fine_tune/ner/#geniusrise_text.ner.fine_tune.NamedEntityRecognitionFineTuner.load_dataset--feather","title":"Feather","text":"

    Should contain 'tokens' and 'ner_tags' columns.

    "},{"location":"text/fine_tune/ner/#geniusrise_text.ner.fine_tune.NamedEntityRecognitionFineTuner.prepare_train_features","title":"prepare_train_features(examples)","text":"

    Tokenize the examples and prepare the features for training.

    Parameters:

    Name Type Description Default examples Dict[str, Union[List[str], List[int]]]

    A dictionary of examples.

    required

    Returns:

    Type Description Dict[str, Union[List[str], List[int]]]

    Dict[str, Union[List[str], List[int]]]: The processed features.

    "},{"location":"text/fine_tune/nli/","title":"Natural Language Inference","text":"

    Bases: TextFineTuner

    A bolt for fine-tuning Hugging Face models for text classification tasks.

    This class extends the TextFineTuner and specializes in fine-tuning models for text classification. It provides additional functionalities for loading and preprocessing text classification datasets in various formats.

    Parameters:

    Name Type Description Default input BatchInput

    The batch input data.

    required output OutputConfig

    The output data.

    required state State

    The state manager.

    required

    CLI Usage:

        genius NLIFineTuner rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\n--id MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7-lol\n        fine_tune \\\n--args \\\nmodel_name=my_model \\\ntokenizer_name=my_tokenizer \\\nnum_train_epochs=3 \\\nper_device_train_batch_size=8\n
    "},{"location":"text/fine_tune/nli/#geniusrise_text.nli.fine_tune.NLIFineTuner.data_collator","title":"data_collator(examples)","text":"

    Customize the data collator.

    Parameters:

    Name Type Description Default examples Dict

    The examples to collate.

    required

    Returns:

    Name Type Description dict Dict

    The collated data.

    "},{"location":"text/fine_tune/nli/#geniusrise_text.nli.fine_tune.NLIFineTuner.load_dataset","title":"load_dataset(dataset_path, **kwargs)","text":"

    Load a commonsense reasoning dataset from a directory.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory.

    required **kwargs Any

    Additional keyword arguments.

    {}

    Returns:

    Name Type Description Dataset Union[Dataset, DatasetDict, None]

    The loaded dataset.

    Raises:

    Type Description Exception

    If there was an error loading the dataset.

    "},{"location":"text/fine_tune/nli/#geniusrise_text.nli.fine_tune.NLIFineTuner.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"text/fine_tune/nli/#geniusrise_text.nli.fine_tune.NLIFineTuner.load_dataset--hugging-face-dataset","title":"Hugging Face Dataset","text":"

    Dataset files saved by the Hugging Face datasets library.

    "},{"location":"text/fine_tune/nli/#geniusrise_text.nli.fine_tune.NLIFineTuner.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\"premise\": \"The premise text\", \"hypothesis\": \"The hypothesis text\", \"label\": 0 or 1 or 2}\n

    "},{"location":"text/fine_tune/nli/#geniusrise_text.nli.fine_tune.NLIFineTuner.load_dataset--csv","title":"CSV","text":"

    Should contain 'premise', 'hypothesis', and 'label' columns.

    premise,hypothesis,label\n\"The premise text\",\"The hypothesis text\",0\n

    "},{"location":"text/fine_tune/nli/#geniusrise_text.nli.fine_tune.NLIFineTuner.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'premise', 'hypothesis', and 'label' columns.

    "},{"location":"text/fine_tune/nli/#geniusrise_text.nli.fine_tune.NLIFineTuner.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'premise', 'hypothesis', and 'label' keys.

    [{\"premise\": \"The premise text\", \"hypothesis\": \"The hypothesis text\", \"label\": 0}]\n

    "},{"location":"text/fine_tune/nli/#geniusrise_text.nli.fine_tune.NLIFineTuner.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'premise', 'hypothesis', and 'label' child elements.

    <record>\n<premise>The premise text</premise>\n<hypothesis>The hypothesis text</hypothesis>\n<label>0</label>\n</record>\n

    "},{"location":"text/fine_tune/nli/#geniusrise_text.nli.fine_tune.NLIFineTuner.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'premise', 'hypothesis', and 'label' keys.

    - premise: \"The premise text\"\nhypothesis: \"The hypothesis text\"\nlabel: 0\n

    "},{"location":"text/fine_tune/nli/#geniusrise_text.nli.fine_tune.NLIFineTuner.load_dataset--tsv","title":"TSV","text":"

    Should contain 'premise', 'hypothesis', and 'label' columns separated by tabs.

    "},{"location":"text/fine_tune/nli/#geniusrise_text.nli.fine_tune.NLIFineTuner.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'premise', 'hypothesis', and 'label' columns.

    "},{"location":"text/fine_tune/nli/#geniusrise_text.nli.fine_tune.NLIFineTuner.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'premise', 'hypothesis', and 'label' columns.

    "},{"location":"text/fine_tune/nli/#geniusrise_text.nli.fine_tune.NLIFineTuner.load_dataset--feather","title":"Feather","text":"

    Should contain 'premise', 'hypothesis', and 'label' columns.

    "},{"location":"text/fine_tune/nli/#geniusrise_text.nli.fine_tune.NLIFineTuner.prepare_train_features","title":"prepare_train_features(examples)","text":"

    Tokenize the examples and prepare the features for training.

    Parameters:

    Name Type Description Default examples dict

    A dictionary of examples.

    required

    Returns:

    Name Type Description dict Dict

    The processed features.

    "},{"location":"text/fine_tune/question_answering/","title":"Question Answering","text":"

    Bases: TextFineTuner

    A bolt for fine-tuning Hugging Face models on question answering tasks.

    Parameters:

    Name Type Description Default input BatchInput

    The batch input data.

    required output OutputConfig

    The output data.

    required state State

    The state manager.

    required

    CLI Usage:

        genius QAFineTuner rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\n--id microsoft/tapex-large-finetuned-wtq-lol \\\nfine_tune \\\n--args \\\nmodel_name=my_model \\\ntokenizer_name=my_tokenizer \\\nnum_train_epochs=3 \\\nper_device_train_batch_size=8\n
    "},{"location":"text/fine_tune/question_answering/#geniusrise_text.qa.fine_tune.QAFineTuner.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initialize the bolt.

    Args:\n    input (BatchInput): The batch input data.\n    output (OutputConfig): The output data.\n    state (State): The state manager.\n    **kwargs: Additional keyword arguments.\n
    "},{"location":"text/fine_tune/question_answering/#geniusrise_text.qa.fine_tune.QAFineTuner.compute_metrics","title":"compute_metrics(eval_pred)","text":"

    Compute the accuracy of the model's predictions.

    Parameters:

    Name Type Description Default eval_pred tuple

    A tuple containing two elements: - predictions (np.ndarray): The model's predictions. - label_ids (np.ndarray): The true labels.

    required

    Returns:

    Name Type Description dict Optional[Dict[str, float]]

    A dictionary mapping metric names to computed values.

    "},{"location":"text/fine_tune/question_answering/#geniusrise_text.qa.fine_tune.QAFineTuner.load_dataset","title":"load_dataset(dataset_path, pad_on_right=True, max_length=None, doc_stride=None, evaluate_squadv2=False, **kwargs)","text":"

    Load a dataset from a directory.

    "},{"location":"text/fine_tune/question_answering/#geniusrise_text.qa.fine_tune.QAFineTuner.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"text/fine_tune/question_answering/#geniusrise_text.qa.fine_tune.QAFineTuner.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\"context\": \"The context content\", \"question\": \"The question\", \"answers\": {\"answer_start\": [int], \"text\": [str]}}\n

    "},{"location":"text/fine_tune/question_answering/#geniusrise_text.qa.fine_tune.QAFineTuner.load_dataset--csv","title":"CSV","text":"

    Should contain 'context', 'question', and 'answers' columns.

    context,question,answers\n\"The context content\",\"The question\",\"{'answer_start': [int], 'text': [str]}\"\n

    "},{"location":"text/fine_tune/question_answering/#geniusrise_text.qa.fine_tune.QAFineTuner.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'context', 'question', and 'answers' columns.

    "},{"location":"text/fine_tune/question_answering/#geniusrise_text.qa.fine_tune.QAFineTuner.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'context', 'question', and 'answers' keys.

    [{\"context\": \"The context content\", \"question\": \"The question\", \"answers\": {\"answer_start\": [int], \"text\": [str]}}]\n

    "},{"location":"text/fine_tune/question_answering/#geniusrise_text.qa.fine_tune.QAFineTuner.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'context', 'question', and 'answers' child elements.

    <record>\n<context>The context content</context>\n<question>The question</question>\n<answers answer_start=\"int\" text=\"str\"></answers>\n</record>\n

    "},{"location":"text/fine_tune/question_answering/#geniusrise_text.qa.fine_tune.QAFineTuner.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'context', 'question', and 'answers' keys.

    - context: \"The context content\"\nquestion: \"The question\"\nanswers:\nanswer_start: [int]\ntext: [str]\n

    "},{"location":"text/fine_tune/question_answering/#geniusrise_text.qa.fine_tune.QAFineTuner.load_dataset--tsv","title":"TSV","text":"

    Should contain 'context', 'question', and 'answers' columns separated by tabs.

    "},{"location":"text/fine_tune/question_answering/#geniusrise_text.qa.fine_tune.QAFineTuner.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'context', 'question', and 'answers' columns.

    "},{"location":"text/fine_tune/question_answering/#geniusrise_text.qa.fine_tune.QAFineTuner.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'context', 'question', and 'answers' columns.

    "},{"location":"text/fine_tune/question_answering/#geniusrise_text.qa.fine_tune.QAFineTuner.load_dataset--feather","title":"Feather","text":"

    Should contain 'context', 'question', and 'answers' columns.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory.

    required pad_on_right bool

    Whether to pad on the right.

    True max_length int

    The maximum length of the sequences.

    None doc_stride int

    The document stride.

    None evaluate_squadv2 bool

    Whether to evaluate using SQuAD v2 metrics.

    False

    Returns:

    Name Type Description Dataset Optional[Dataset]

    The loaded dataset.

    "},{"location":"text/fine_tune/question_answering/#geniusrise_text.qa.fine_tune.QAFineTuner.prepare_train_features","title":"prepare_train_features(examples, cls_token_id=None)","text":"

    Tokenize our examples with truncation and padding, but keep the overflows using a stride.

    Parameters:

    Name Type Description Default examples Dict[str, Union[str, List[str]]]

    The examples to be tokenized.

    required

    Returns:

    Type Description Optional[Dict[str, Union[List[int], List[List[int]]]]]

    The tokenized examples.

    "},{"location":"text/fine_tune/summarization/","title":"Summarization","text":"

    Bases: TextFineTuner

    A bolt for fine-tuning Hugging Face models on summarization tasks.

    Parameters:

    Name Type Description Default input BatchInput

    The batch input data.

    required output OutputConfig

    The output data.

    required state State

    The state manager.

    required

    CLI Usage:

        genius SummarizationFineTuner rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\nfine_tune \\\n--args \\\nmodel_name=my_model \\\ntokenizer_name=my_tokenizer \\\nnum_train_epochs=3 \\\nper_device_train_batch_size=8\n
    "},{"location":"text/fine_tune/summarization/#geniusrise_text.summarization.fine_tune.SummarizationFineTuner.compute_metrics","title":"compute_metrics(pred)","text":"

    Compute ROUGE metrics.

    Parameters:

    Name Type Description Default pred EvalPrediction

    The predicted results.

    required

    Returns:

    Name Type Description dict Dict[str, float]

    A dictionary with ROUGE-1, ROUGE-2, and ROUGE-L scores.

    "},{"location":"text/fine_tune/summarization/#geniusrise_text.summarization.fine_tune.SummarizationFineTuner.data_collator","title":"data_collator(examples)","text":"

    Customize the data collator.

    Parameters:

    Name Type Description Default examples List[Dict[str, Union[str, List[int]]]]

    The examples to collate.

    required

    Returns:

    Name Type Description dict Dict[str, Union[List[int], List[List[int]]]]

    The collated data.

    "},{"location":"text/fine_tune/summarization/#geniusrise_text.summarization.fine_tune.SummarizationFineTuner.load_dataset","title":"load_dataset(dataset_path, **kwargs)","text":"

    Load a dataset from a directory.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory.

    required **kwargs Any

    Additional keyword arguments.

    {}

    Returns:

    Type Description Optional[DatasetDict]

    Dataset | DatasetDict: The loaded dataset.

    "},{"location":"text/fine_tune/summarization/#geniusrise_text.summarization.fine_tune.SummarizationFineTuner.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"text/fine_tune/summarization/#geniusrise_text.summarization.fine_tune.SummarizationFineTuner.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\"text\": \"The text content\", \"summary\": \"The summary\"}\n

    "},{"location":"text/fine_tune/summarization/#geniusrise_text.summarization.fine_tune.SummarizationFineTuner.load_dataset--csv","title":"CSV","text":"

    Should contain 'text' and 'summary' columns.

    text,summary\n\"The text content\",\"The summary\"\n

    "},{"location":"text/fine_tune/summarization/#geniusrise_text.summarization.fine_tune.SummarizationFineTuner.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'text' and 'summary' columns.

    "},{"location":"text/fine_tune/summarization/#geniusrise_text.summarization.fine_tune.SummarizationFineTuner.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'text' and 'summary' keys.

    [{\"text\": \"The text content\", \"summary\": \"The summary\"}]\n

    "},{"location":"text/fine_tune/summarization/#geniusrise_text.summarization.fine_tune.SummarizationFineTuner.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'text' and 'summary' child elements.

    <record>\n<text>The text content</text>\n<summary>The summary</summary>\n</record>\n

    "},{"location":"text/fine_tune/summarization/#geniusrise_text.summarization.fine_tune.SummarizationFineTuner.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'text' and 'summary' keys.

    - text: \"The text content\"\nsummary: \"The summary\"\n

    "},{"location":"text/fine_tune/summarization/#geniusrise_text.summarization.fine_tune.SummarizationFineTuner.load_dataset--tsv","title":"TSV","text":"

    Should contain 'text' and 'summary' columns separated by tabs.

    "},{"location":"text/fine_tune/summarization/#geniusrise_text.summarization.fine_tune.SummarizationFineTuner.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'text' and 'summary' columns.

    "},{"location":"text/fine_tune/summarization/#geniusrise_text.summarization.fine_tune.SummarizationFineTuner.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'text' and 'summary' columns.

    "},{"location":"text/fine_tune/summarization/#geniusrise_text.summarization.fine_tune.SummarizationFineTuner.load_dataset--feather","title":"Feather","text":"

    Should contain 'text' and 'summary' columns.

    "},{"location":"text/fine_tune/summarization/#geniusrise_text.summarization.fine_tune.SummarizationFineTuner.prepare_train_features","title":"prepare_train_features(examples)","text":"

    Tokenize the examples and prepare the features for training.

    Parameters:

    Name Type Description Default examples dict

    A dictionary of examples.

    required

    Returns:

    Name Type Description dict Optional[Dict[str, List[int]]]

    The processed features.

    "},{"location":"text/fine_tune/translation/","title":"Translation","text":"

    Bases: TextFineTuner

    A bolt for fine-tuning Hugging Face models on translation tasks.

    Args:\n    input (BatchInput): The batch input data.\n    output (OutputConfig): The output data.\n    state (State): The state manager.\n    **kwargs: Arbitrary keyword arguments for extended functionality.\n

    CLI Usage:

        genius TranslationFineTuner rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder input/trans \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder output/trans \\\npostgres \\\n--postgres_host 127.0.0.1 \\\n--postgres_port 5432 \\\n--postgres_user postgres \\\n--postgres_password postgres \\\n--postgres_database geniusrise\\\n--postgres_table state \\\n--id facebook/mbart-large-50-many-to-many-mmt-lol \\\nfine_tune \\\n--args \\\nmodel_name=my_model \\\ntokenizer_name=my_tokenizer \\\nnum_train_epochs=3 \\\nper_device_train_batch_size=8 \\\ndata_max_length=512\n
    "},{"location":"text/fine_tune/translation/#geniusrise_text.translation.fine_tune.TranslationFineTuner.data_collator","title":"data_collator(examples)","text":"

    Customize the data collator.

    Parameters:

    Name Type Description Default examples

    The examples to collate.

    required

    Returns:

    Name Type Description dict

    The collated data.

    "},{"location":"text/fine_tune/translation/#geniusrise_text.translation.fine_tune.TranslationFineTuner.load_dataset","title":"load_dataset(dataset_path, max_length=512, origin='en', target='fr', **kwargs)","text":"

    Load a dataset from a directory.

    "},{"location":"text/fine_tune/translation/#geniusrise_text.translation.fine_tune.TranslationFineTuner.load_dataset--supported-data-formats-and-structures-for-translation-tasks","title":"Supported Data Formats and Structures for Translation Tasks:","text":""},{"location":"text/fine_tune/translation/#geniusrise_text.translation.fine_tune.TranslationFineTuner.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\n\"translation\": {\n\"en\": \"English text\",\n\"fr\": \"French text\"\n}\n}\n

    "},{"location":"text/fine_tune/translation/#geniusrise_text.translation.fine_tune.TranslationFineTuner.load_dataset--csv","title":"CSV","text":"

    Should contain 'en' and 'fr' columns.

    en,fr\n\"English text\",\"French text\"\n

    "},{"location":"text/fine_tune/translation/#geniusrise_text.translation.fine_tune.TranslationFineTuner.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'en' and 'fr' columns.

    "},{"location":"text/fine_tune/translation/#geniusrise_text.translation.fine_tune.TranslationFineTuner.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'en' and 'fr' keys.

    [\n{\n\"en\": \"English text\",\n\"fr\": \"French text\"\n}\n]\n

    "},{"location":"text/fine_tune/translation/#geniusrise_text.translation.fine_tune.TranslationFineTuner.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'en' and 'fr' child elements.

    <record>\n<en>English text</en>\n<fr>French text</fr>\n</record>\n

    "},{"location":"text/fine_tune/translation/#geniusrise_text.translation.fine_tune.TranslationFineTuner.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'en' and 'fr' keys.

    - en: \"English text\"\nfr: \"French text\"\n

    "},{"location":"text/fine_tune/translation/#geniusrise_text.translation.fine_tune.TranslationFineTuner.load_dataset--tsv","title":"TSV","text":"

    Should contain 'en' and 'fr' columns separated by tabs.

    "},{"location":"text/fine_tune/translation/#geniusrise_text.translation.fine_tune.TranslationFineTuner.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'en' and 'fr' columns.

    "},{"location":"text/fine_tune/translation/#geniusrise_text.translation.fine_tune.TranslationFineTuner.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'en' and 'fr' columns.

    "},{"location":"text/fine_tune/translation/#geniusrise_text.translation.fine_tune.TranslationFineTuner.load_dataset--feather","title":"Feather","text":"

    Should contain 'en' and 'fr' columns.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the directory containing the dataset files.

    required max_length int

    The maximum length for tokenization. Defaults to 512.

    512 origin str

    The origin language. Defaults to 'en'.

    'en' target str

    The target language. Defaults to 'fr'.

    'fr' **kwargs Any

    Additional keyword arguments.

    {}

    Returns:

    Name Type Description DatasetDict Optional[DatasetDict]

    The loaded dataset.

    "},{"location":"text/fine_tune/translation/#geniusrise_text.translation.fine_tune.TranslationFineTuner.prepare_train_features","title":"prepare_train_features(examples)","text":"

    Tokenize the examples and prepare the features for training.

    Parameters:

    Name Type Description Default examples dict

    A dictionary of examples.

    required

    Returns:

    Name Type Description dict

    The processed features.

    "},{"location":"vision/api/base/","title":"Vision Base","text":"

    Bases: VisionBulk

    The VisionAPI class inherits from VisionBulk and is designed to facilitate the handling of vision-based tasks using a pre-trained machine learning model. It sets up a server to process image-related requests using a specified model.

    "},{"location":"vision/api/base/#geniusrise_vision.base.api.VisionAPI.__init__","title":"__init__(input, output, state)","text":"

    Initializes the VisionAPI object with batch input, output, and state.

    Parameters:

    Name Type Description Default input BatchInput

    Object to handle batch input operations.

    required output BatchOutput

    Object to handle batch output operations.

    required state State

    Object to maintain the state of the API.

    required"},{"location":"vision/api/base/#geniusrise_vision.base.api.VisionAPI.listen","title":"listen(model_name, model_class='AutoModel', processor_class='AutoProcessor', device_map='auto', max_memory={0: '24GB'}, use_cuda=False, precision='float16', quantization=0, torchscript=False, compile=False, flash_attention=False, better_transformers=False, concurrent_queries=False, endpoint='*', port=3000, cors_domain='http://localhost:3000', username=None, password=None, **model_args)","text":"

    Configures and starts a CherryPy server to listen for image processing requests.

    Parameters:

    Name Type Description Default model_name str

    The name of the pre-trained vision model.

    required model_class str

    The class of the pre-trained vision model. Defaults to \"AutoModel\".

    'AutoModel' processor_class str

    The class of the processor for input image preprocessing. Defaults to \"AutoProcessor\".

    'AutoProcessor' device_map str | Dict | None

    Device mapping for model inference. Defaults to \"auto\".

    'auto' max_memory Dict[int, str]

    Maximum memory allocation for model inference. Defaults to {0: \"24GB\"}.

    {0: '24GB'} precision str

    The floating-point precision to be used by the model. Options are 'float32', 'float16', 'bfloat16'.

    'float16' quantization int

    The bit level for model quantization (0 for none, 8 for 8-bit quantization).

    0 torchscript bool

    Whether to use TorchScript for model optimization. Defaults to True.

    False compile bool

    Whether to compile the model before fine-tuning. Defaults to False.

    False flash_attention bool

    Whether to use flash attention 2. Default is False.

    False better_transformers bool

    Flag to enable Better Transformers optimization for faster processing.

    False concurrent_queries bool

    (bool): Whether the API supports concurrent API calls (usually false).

    False endpoint str

    The network endpoint for the server. Defaults to \"*\".

    '*' port int

    The network port for the server. Defaults to 3000.

    3000 cors_domain str

    The domain to allow for CORS requests. Defaults to \"http://localhost:3000\".

    'http://localhost:3000' username Optional[str]

    Username for server authentication. Defaults to None.

    None password Optional[str]

    Password for server authentication. Defaults to None.

    None **model_args Any

    Additional arguments for the vision model.

    {}"},{"location":"vision/api/base/#geniusrise_vision.base.api.VisionAPI.validate_password","title":"validate_password(realm, username, password)","text":"

    Validate the username and password against expected values.

    Parameters:

    Name Type Description Default realm str

    The authentication realm.

    required username str

    The provided username.

    required password str

    The provided password.

    required

    Returns:

    Name Type Description bool

    True if credentials are valid, False otherwise.

    "},{"location":"vision/api/imgclass/","title":"Image Classsification API","text":"

    Bases: VisionAPI

    ImageClassificationAPI extends the VisionAPI for image classification tasks. This API provides functionalities to classify images into various categories based on the trained model it uses. It supports both single-label and multi-label classification problems.

    Methods

    classify_image(self): Endpoint to classify an uploaded image and return the classification scores. sigmoid(self, _outputs): Applies the sigmoid function to the model's outputs. softmax(self, _outputs): Applies the softmax function to the model's outputs.

    Example CLI Usage:

    genius ImageClassificationAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\nlisten \\\n--args \\\nmodel_name=\"Kaludi/food-category-classification-v2.0\" \\\nmodel_class=\"AutoModelForImageClassification\" \\\nprocessor_class=\"AutoImageProcessor\" \\\ndevice_map=\"cuda:0\" \\\nuse_cuda=True \\\nprecision=\"float\" \\\nquantization=0 \\\nmax_memory=None \\\ntorchscript=False \\\ncompile=False \\\nflash_attention=False \\\nbetter_transformers=False \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n
    "},{"location":"vision/api/imgclass/#geniusrise_vision.imgclass.api.ImageClassificationAPI.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the ImageClassificationAPI with the necessary configurations for input, output, and state management, along with model-specific parameters.

    Parameters:

    Name Type Description Default input BatchInput

    Configuration for the input data.

    required output BatchOutput

    Configuration for the output data.

    required state State

    State management for the API.

    required **kwargs

    Additional keyword arguments for extended functionality, such as model configuration.

    {}"},{"location":"vision/api/imgclass/#geniusrise_vision.imgclass.api.ImageClassificationAPI.classify_image","title":"classify_image()","text":"

    Endpoint for classifying an image. It accepts a base64-encoded image, decodes it, preprocesses it, and runs it through the classification model. It supports both single-label and multi-label classification by applying the appropriate post-processing function to the model outputs.

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the predictions with the highest scores and all prediction scores.

    Dict[str, Any]

    Each prediction includes the label and its corresponding score.

    Raises:

    Type Description Exception

    If an error occurs during image processing or classification.

    Example CURL Request:

    curl -X POST localhost:3000/api/v1/classify_image             -H \"Content-Type: application/json\"             -d '{\"image_base64\": \"<base64-encoded-image>\"}'\n

    or to feed an image:

    (base64 -w 0 cat.jpg | awk '{print \"{\"image_base64\": \"\"$0\"\"}\"}' > /tmp/image_payload.json)\ncurl -X POST http://localhost:3000/api/v1/classify_image             -H \"Content-Type: application/json\"             -u user:password             -d @/tmp/image_payload.json | jq\n

    "},{"location":"vision/api/imgclass/#geniusrise_vision.imgclass.api.ImageClassificationAPI.sigmoid","title":"sigmoid(_outputs)","text":"

    Applies the sigmoid function to the model's outputs for binary classification or multi-label classification tasks.

    Parameters:

    Name Type Description Default _outputs np.ndarray

    The raw outputs from the model.

    required

    Returns:

    Type Description np.ndarray

    np.ndarray: The outputs after applying the sigmoid function.

    "},{"location":"vision/api/imgclass/#geniusrise_vision.imgclass.api.ImageClassificationAPI.softmax","title":"softmax(_outputs)","text":"

    Applies the softmax function to the model's outputs for single-label classification tasks, ensuring the output scores sum to 1 across classes.

    Parameters:

    Name Type Description Default _outputs np.ndarray

    The raw outputs from the model.

    required

    Returns:

    Type Description np.ndarray

    np.ndarray: The outputs after applying the softmax function.

    "},{"location":"vision/api/ocr/","title":"OCR API","text":"

    Bases: VisionAPI

    ImageOCRAPI provides Optical Character Recognition (OCR) capabilities for images, leveraging different OCR engines like EasyOCR, PaddleOCR, and Hugging Face models tailored for OCR tasks. This API can decode base64-encoded images, process them through the chosen OCR engine, and return the recognized text.

    The API supports dynamic selection of OCR engines and configurations based on the provided model name and arguments, offering flexibility in processing various languages and image types.

    Methods

    ocr(self): Processes an uploaded image for OCR and returns the recognized text.

    Example CLI Usage:

    EasyOCR:

    genius ImageOCRAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\nlisten \\\n--args \\\nmodel_name=\"easyocr\" \\\ndevice_map=\"cuda:0\" \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n

    Paddle OCR:

    genius ImageOCRAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\nlisten \\\n--args \\\nmodel_name=\"paddleocr\" \\\ndevice_map=\"cuda:0\" \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n

    Huggingface models:

    genius ImageOCRAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\nlisten \\\n--args \\\nmodel_name=\"facebook/nougat-base\" \\\nmodel_class=\"VisionEncoderDecoderModel\" \\\nprocessor_class=\"NougatProcessor\" \\\ndevice_map=\"cuda:0\" \\\nuse_cuda=True \\\nprecision=\"float\" \\\nquantization=0 \\\nmax_memory=None \\\ntorchscript=False \\\ncompile=False \\\nflash_attention=False \\\nbetter_transformers=False \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n
    "},{"location":"vision/api/ocr/#geniusrise_vision.ocr.api.ImageOCRAPI.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the ImageOCRAPI with configurations for input, output, state management, and OCR model specifics.

    Parameters:

    Name Type Description Default input BatchInput

    Configuration for the input data.

    required output BatchOutput

    Configuration for the output data.

    required state State

    State management for the API.

    required **kwargs

    Additional keyword arguments for extended functionality.

    {}"},{"location":"vision/api/ocr/#geniusrise_vision.ocr.api.ImageOCRAPI.ocr","title":"ocr()","text":"

    Endpoint for performing OCR on an uploaded image. It accepts a base64-encoded image, decodes it, preprocesses it through the specified OCR model, and returns the recognized text.

    Returns:

    Type Description

    Dict[str, Any]: A dictionary containing the success status, recognized text ('result'), and the original

    image name ('image_name') if provided.

    Raises:

    Type Description Exception

    If an error occurs during image processing or OCR.

    Example CURL Request:

    curl -X POST localhost:3000/api/v1/ocr             -H \"Content-Type: application/json\"             -d '{\"image_base64\": \"<base64-encoded-image>\", \"model_name\": \"easyocr\", \"use_easyocr_bbox\": true}'\n

    or

    (base64 -w 0 test_images_ocr/ReceiptSwiss.jpg | awk '{print \"{\"image_base64\": \"\"$0\"\", \"max_length\": 1024}\"}' > /tmp/image_payload.json)\ncurl -X POST http://localhost:3000/api/v1/ocr             -H \"Content-Type: application/json\"             -u user:password             -d @/tmp/image_payload.json | jq\n
    "},{"location":"vision/api/ocr/#geniusrise_vision.ocr.api.ImageOCRAPI.process_huggingface_models","title":"process_huggingface_models(image, use_easyocr_bbox)","text":"

    Processes the image using a Hugging Face model specified for OCR tasks. Supports advanced configurations and post-processing to handle various OCR-related challenges.

    Parameters:

    Name Type Description Default image Image.Image

    The image to process.

    required use_easyocr_bbox bool

    Whether to use EasyOCR to detect text bounding boxes before processing with Hugging Face models.

    required

    Returns:

    Name Type Description str

    The recognized text from the image.

    "},{"location":"vision/api/ocr/#geniusrise_vision.ocr.api.ImageOCRAPI.process_other_models","title":"process_other_models(image)","text":"

    Processes the image using non-Hugging Face OCR models like EasyOCR or PaddleOCR based on the initialization.

    Parameters:

    Name Type Description Default image Image.Image

    The image to process.

    required

    Returns:

    Name Type Description Any Any

    The OCR results which might include text, bounding boxes, and confidence scores depending on the model.

    Raises:

    Type Description ValueError

    If an invalid or unsupported OCR model is specified.

    "},{"location":"vision/api/segment/","title":"Image Segmentation API","text":"

    Bases: VisionAPI

    VisionSegmentationAPI extends VisionAPI to provide image segmentation functionalities, including panoptic, instance, and semantic segmentation. This API supports different segmentation tasks based on the model's capabilities and the specified subtask in the request.

    Methods

    segment_image(self): Processes an image for segmentation and returns the segmentation masks along with labels.

    Example CLI Usage:

    genius VisionSegmentationAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\nlisten \\\n--args \\\nmodel_name=\"facebook/mask2former-swin-large-mapillary-vistas-semantic\" \\\nmodel_class=\"Mask2FormerForUniversalSegmentation\" \\\nprocessor_class=\"AutoImageProcessor\" \\\ndevice_map=\"cuda:0\" \\\nuse_cuda=True \\\nprecision=\"float\" \\\nquantization=0 \\\nmax_memory=None \\\ntorchscript=False \\\ncompile=False \\\nflash_attention=False \\\nbetter_transformers=False \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n
    "},{"location":"vision/api/segment/#geniusrise_vision.segment.api.VisionSegmentationAPI.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the VisionSegmentationAPI with configurations for input, output, and state management, along with any model-specific parameters for segmentation tasks.

    Parameters:

    Name Type Description Default input BatchInput

    Configuration for the input data.

    required output BatchOutput

    Configuration for the output data.

    required state State

    State management for the API.

    required **kwargs

    Additional keyword arguments for extended functionality.

    {}"},{"location":"vision/api/segment/#geniusrise_vision.segment.api.VisionSegmentationAPI.segment_image","title":"segment_image()","text":"

    Endpoint for segmenting an image according to the specified subtask (panoptic, instance, or semantic segmentation). It decodes the base64-encoded image, processes it through the model, and returns the segmentation masks along with labels and scores (if applicable) in base64 format.

    The method supports dynamic task inputs for models requiring specific task descriptions and applies different post-processing techniques based on the subtask.

    Returns:

    Type Description List[Dict[str, Any]]

    List[Dict[str, Any]]: A list of dictionaries where each dictionary contains a 'label', a 'score' (if applicable),

    List[Dict[str, Any]]

    and a 'mask' (base64-encoded image of the segmentation mask).

    Raises:

    Type Description Exception

    If an error occurs during image processing or segmentation.

    Example CURL Request:

    curl -X POST localhost:3000/api/v1/segment_image             -H \"Content-Type: application/json\"             -d '{\"image_base64\": \"<base64-encoded-image>\", \"subtask\": \"panoptic\"}'\n

    or to save all masks:

    (base64 -w 0 guy.jpg | awk '{print \"{\"image_base64\": \"\"$0\"\", \"subtask\": \"semantic\"}\"}' > /tmp/image_payload.json)\ncurl -X POST http://localhost:3000/api/v1/segment_image             -H \"Content-Type: application/json\"             -u user:password             -d @/tmp/image_payload.json | jq -r '.[] | .mask + \" \" + .label' | while read mask label; do echo $mask | base64 --decode > \"${label}.jpg\"; done\n
    "},{"location":"vision/api/vqa/","title":"Visual Question Answering","text":"

    Bases: VisionAPI

    VisualQAAPI extends VisionAPI to provide an interface for visual question answering (VQA) tasks. This API supports answering questions about an image by utilizing deep learning models specifically trained for VQA. It processes requests containing an image and a question about the image, performs inference using the loaded model, and returns the predicted answer.

    Methods

    answer_question(self): Receives an image and a question, returns the answer based on visual content.

    Example CLI Usage:

    genius VisualQAAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\nlisten \\\n--args \\\nmodel_name=\"llava-hf/bakLlava-v1-hf\" \\\nmodel_class=\"LlavaForConditionalGeneration\" \\\nprocessor_class=\"AutoProcessor\" \\\ndevice_map=\"cuda:0\" \\\nuse_cuda=True \\\nprecision=\"bfloat16\" \\\nquantization=0 \\\nmax_memory=None \\\ntorchscript=False \\\ncompile=False \\\nflash_attention=False \\\nbetter_transformers=False \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n
    "},{"location":"vision/api/vqa/#geniusrise_vision.vqa.api.VisualQAAPI.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the VisualQAAPI with configurations for input, output, state management, and any model-specific parameters for visual question answering tasks.

    Parameters:

    Name Type Description Default input BatchInput

    Configuration for the input data.

    required output BatchOutput

    Configuration for the output data.

    required state State

    State management for the API.

    required **kwargs

    Additional keyword arguments for extended functionality.

    {}"},{"location":"vision/api/vqa/#geniusrise_vision.vqa.api.VisualQAAPI.answer_question","title":"answer_question()","text":"

    Endpoint for receiving an image with a question and returning the answer based on the visual content of the image. It processes the request containing a base64-encoded image and a question string, and utilizes the loaded model to predict the answer to the question related to the image.

    Returns:

    Type Description

    Dict[str, Any]: A dictionary containing the original question and the predicted answer.

    Raises:

    Type Description ValueError

    If required fields 'image_base64' and 'question' are not provided in the request.

    Exception

    If an error occurs during image processing or inference.

    Example CURL Request:

    curl -X POST localhost:3000/api/v1/answer_question             -H \"Content-Type: application/json\"             -d '{\"image_base64\": \"<base64-encoded-image>\", \"question\": \"What is the color of the sky in the image?\"}'\n

    or

    (base64 -w 0 test_images_segment_finetune/image1.jpg | awk '{print \"{\"image_base64\": \"\"$0\"\", \"question\": \"how many cats are there?\"}\"}' > /tmp/image_payload.json)\ncurl -X POST http://localhost:3000/api/v1/answer_question             -H \"Content-Type: application/json\"             -u user:password             -d @/tmp/image_payload.json | jq\n
    "}]} \ No newline at end of file +{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Home","text":""},{"location":"#geniusrise-microservices-ecosystem","title":"Geniusrise Microservices Ecosystem","text":"

    Geniusrise is a modular, loosely-coupled AI-microservices framework.

    It can be used to perform various tasks, including hosting inference endpoints, performing bulk inference, fine tune etc with open source models or closed source APIs.

    • The framework provides structure for modules and operationalizes and orchestrates them.
    • The modular ecosystem provides a layer of abstraction over the myriad of models, libraries, tools, parameters and optimizations underlying the operationalization of modern AI models.

    Together the framework and ecosystem can be used for:

    1. Rapid prototyping by hosting APIs on a wide range of models
      1. Host and experiment on local and iterate fast
      2. Deploy on kubernetes to production
    2. Building AI-side components using the framework and CLI
      1. Build complex AI microservices using multiple models
      2. Iterate fast from development to production
      3. Manage, scale and monitor deployments in production
      4. Build once, run anywhere
    3. Using the ecosystem as a library: Many interesting applications can be built using this, e.g.:
      1. A multi-cloud AI cloud, see geniusrise.com
      2. Local model pipeline server for personal or home IOT devices (e.g. a personal AI pin connected to voice-LLM pipeline hosted on desktop)
      3. Desktop and CLI applications

    Quickstarts:

    • Text classification: quickstart
    • Question Answering: quickstart
    • Table Question Answering: quickstart
    • Natural Language Inference: quickstart
    • Translation: quickstart
    • Summarization: quickstart
    • Chat: quickstart
    • Language Models: quickstart

    (More coming soon).

    "},{"location":"#guides","title":"Guides","text":""},{"location":"#getting-started","title":"\ud83d\ude80 Getting Started","text":"
    1. \ud83d\udca5 Usage - TLDR Usage.
    2. \ud83d\udee0\ufe0f Installation - Installation and setup.
    3. \ud83d\udcd8 Concepts - Concepts of the framework, start here.
    4. \ud83c\udfd7\ufe0f Architecture - Design and architecture of the framework.
    "},{"location":"#development","title":"\ud83d\udcbb Development","text":"
    1. \ud83c\udfe0 Local Experimentation - Local setup and project creation.
    2. \ud83d\udd04 Dev Cycle - Describes one full local development cycle.
    3. \ud83d\udce6 Packaging - Packaging your application.
    4. \ud83d\ude80 Deployment - Deploying parts or whole of your application.
    "},{"location":"#reference","title":"\ud83d\udcda Reference","text":"
    1. \ud83d\udcc4 YAML Structure - Geniusfile structure and configuration.
    2. \ud83c\udf10 CLI reference - Command line reference.
    3. \ud83c\udfa8 Project Templates - Project templates for community plugins.
    "},{"location":"#runners","title":"\ud83c\udfc3 Runners","text":"\ud83c\udf10 Runners \ud83d\udfe2 k8s deployment \ud83d\udfe4 k8s service \ud83d\udfe1 k8s job \ud83d\udfe0 k8s cron job \ud83d\udfe7 k8s pods \ud83d\udfe6 Apache Airflow \ud83d\udd34 Docker \ud83d\udfe3 Docker Swarm"},{"location":"#models","title":"\ud83e\udd16 Models","text":""},{"location":"#text-inference-apis","title":"Text Inference APIs","text":"\ud83c\udf10 Local & Huggingface \ud83d\udfe2 Language Model \ud83d\udfe3 Named Entity Recognition \ud83d\udfe1 Question Answering \ud83d\udfe0 Sentiment Analysis \ud83d\udfe4 Summarization \ud83d\udfe6 Translation \ud83d\udd35 Classification \ud83d\udd34 Natural Language Inference \ud83d\udfe7 Instruction Tuning \ud83d\udfe7 Base"},{"location":"#text-bulk-inference","title":"Text Bulk Inference","text":"\ud83c\udf10 Local & Huggingface \ud83d\udfe2 Language Model \ud83d\udfe3 Named Entity Recognition \ud83d\udfe1 Question Answering \ud83d\udfe0 Sentiment Analysis \ud83d\udfe4 Summarization \ud83d\udfe6 Translation \ud83d\udd35 Classification \ud83d\udd34 Natural Language Inference \ud83d\udfe7 Instruction Tuning \ud83d\udfe7 Base"},{"location":"#text-fine-tuning","title":"Text Fine-tuning","text":"\ud83c\udf10 Local & Huggingface \ud83d\udfe2 Language Model \ud83d\udfe3 Named Entity Recognition \ud83d\udfe1 Question Answering \ud83d\udfe0 Sentiment Analysis \ud83d\udfe4 Summarization \ud83d\udfe6 Translation \ud83d\udd35 Classification \ud83d\udd34 Natural Language Inference \ud83d\udfe7 Instruction Tuning \ud83d\udfe7 Base \ud83c\udf10 OpenAI \ud83d\udfe2 Classification \ud83d\udfe3 Natural Language Inference \ud83d\udfe1 Instruction Tuning \ud83d\udfe0 Language Model \ud83d\udfe4 Named Entity Recognition \ud83d\udfe6 Question Answering \ud83d\udd35 Sentiment Analysis \ud83d\udd34 Summarization \ud83d\udfe7 Translation \ud83d\udfe7 Base"},{"location":"#vision-inference-apis","title":"Vision Inference APIs","text":"\ud83c\udf10 Local & Huggingface \ud83d\udfe2 Image Classification \ud83d\udfe3 OCR \ud83d\udfe1 Image Segmentation \ud83d\udfe0 Visual Question Answering \ud83d\udfe4 Base"},{"location":"#audio-inference-apis","title":"Audio Inference APIs","text":"\ud83c\udf10 Local & Huggingface \ud83d\udfe2 Text to Speech \ud83d\udfe3 Speech to Text \ud83d\udfe7 Base"},{"location":"#audio-bulk-inference","title":"Audio Bulk Inference","text":"\ud83c\udf10 Local & Huggingface \ud83d\udfe2 Text to Speech \ud83d\udfe3 Speech to Text \ud83d\udfe7 Base"},{"location":"#data","title":"\u26a1 Data","text":""},{"location":"#ingestion","title":"Ingestion","text":"\ud83c\udf10 Streaming \ud83d\udfe2 Http Polling \ud83d\udfe3 Socket.io \ud83d\udfe1 gRPC \ud83d\udfe0 QUIC \ud83d\udfe4 UDP \ud83d\udd35 Webhook \ud83d\udfe5 Websocket \ud83d\udfe9 SNS \ud83d\udfe7 SQS \ud83d\udfe8 AMQP \ud83d\udfeb Kafka \ud83d\udfea Kinesis Streams \ud83d\udfe9 MQTT \ud83d\udfe8 ActiveMQ \ud83d\udfeb ZeroMQ \ud83d\udfea Redis Pubsub \ud83d\udfe7 Redis Streams \ud83d\udce6 Databases \ud83d\udfe2 HBase \ud83d\udfe3 PostgreSQL \ud83d\udd35 MySQL \ud83d\udfe0 MongoDB \ud83d\udfe2 Cassandra \ud83d\udfe3 Redis \ud83d\udd35 Elasticsearch \ud83d\udfe0 Oracle \ud83d\udfe2 SQL Server \ud83d\udfe3 SQLite \ud83d\udd35 Neo4j \ud83d\udfe0 Bigtable \ud83d\udfe2 DynamoDB \ud83d\udfe3 Azure Table Storage \ud83d\udd35 Couchbase \ud83d\udfe0 InfluxDB \ud83d\udfe2 TimescaleDB \ud83d\udfe3 Teradata \ud83d\udd35 TiDB \ud83d\udfe0 Voltdb \ud83d\udfe2 Sybase \ud83d\udfe3 DB2 \ud83d\udd35 AWS Presto \ud83d\udfe0 Riak \ud83d\udfe2 MemSQL \ud83d\udfe3 LDAP \ud83d\udd35 AWS KeySpaces \ud83d\udfe0 KairosDB \ud83d\udfe2 Graphite \ud83d\udfe3 Google FireStore \ud83d\udd35 AWS DocumentDB \ud83d\udfe0 Cockroach \ud83d\udfe2 Cloud SQL \ud83d\udfe3 Azure CosmosDB \ud83d\udd35 AWS Athena \ud83d\udfe0 ArangoDB \ud83d\udfe2 Nuodb \ud83d\udfe3 OpenTSDB \ud83d\udd35 Google Bigquery \ud83d\udfe0 Vertica \ud83d\udfe2 Google Spanner"},{"location":"#preprocessing","title":"Preprocessing","text":"\ud83c\udf10 Document Processing \ud83c\udf10 Image Processing \ud83c\udf10 OCR \ud83d\udfe3 Parse PDF \ud83d\udfe1 Predict image classes \ud83d\udd35 TROCRImageOCR \ud83d\udfe3 ParseCBZCBR \ud83d\udfe1 Train image classifier \ud83d\udd35 FineTuneTROCR \ud83d\udfe3 ParseDjvu \ud83d\udfe1 Convert Images \ud83d\udd35 TROCRImageOCRAPI \ud83d\udfe3 ParseEpub \ud83d\udfe2 Pix2StructImageOCR \ud83d\udfe3 ParseMOBI \ud83d\udfe2 Pix2StructImageOCRAPI \ud83d\udfe3 ParsePostScript \ud83d\udfe2 FineTunePix2Struct \ud83d\udfe3 ParseXPS"},{"location":"#library","title":"\ud83d\udcda Library","text":"\ud83d\udce6 cli \ud83d\udce6 core \ud83d\udce6 data \ud83d\udce6 core.state \ud83d\udfe0 geniusctl \ud83d\udfe2 bolt \ud83d\udfe3 input \ud83d\udd34 base \ud83d\udfe0 yamlctl \ud83d\udfe2 spout \ud83d\udfe3 output \ud83d\udd34 dynamo \ud83d\udfe0 boltctl \ud83d\udfe4 base \ud83d\udfe3 batch_input \ud83d\udd34 memory \ud83d\udfe0 spoutctl \ud83d\udfe3 batch_output \ud83d\udd34 postgres \ud83d\udfe0 schema \ud83d\udfe3 streaming_input \ud83d\udd34 redis \ud83d\udfe0 discover \ud83d\udfe3 streaming_output \ud83d\udfe0 docker"},{"location":"audio/api/base/","title":"Base API","text":"

    Bases: AudioBulk

    A class representing a Hugging Face API for generating text using a pre-trained language model.

    Attributes:

    Name Type Description model Any

    The pre-trained language model.

    processor Any

    The processor used to preprocess input text.

    model_name str

    The name of the pre-trained language model.

    model_revision Optional[str]

    The revision of the pre-trained language model.

    processor_name str

    The name of the processor used to preprocess input text.

    processor_revision Optional[str]

    The revision of the processor used to preprocess input text.

    model_class str

    The name of the class of the pre-trained language model.

    processor_class str

    The name of the class of the processor used to preprocess input text.

    use_cuda bool

    Whether to use a GPU for inference.

    quantization int

    The level of quantization to use for the pre-trained language model.

    precision str

    The precision to use for the pre-trained language model.

    device_map str | Dict | None

    The mapping of devices to use for inference.

    max_memory Dict[int, str]

    The maximum memory to use for inference.

    torchscript bool

    Whether to use a TorchScript-optimized version of the pre-trained language model.

    model_args Any

    Additional arguments to pass to the pre-trained language model.

    Methods

    text(**kwargs: Any) -> Dict[str, Any]: Generates text based on the given prompt and decoding strategy.

    listen(model_name: str, model_class: str = \"AutoModelForCausalLM\", processor_class: str = \"AutoProcessor\", use_cuda: bool = False, precision: str = \"float16\", quantization: int = 0, device_map: str | Dict | None = \"auto\", max_memory={0: \"24GB\"}, torchscript: bool = True, endpoint: str = \"\", port: int = 3000, cors_domain: str = \"http://localhost:3000\", username: Optional[str] = None, password: Optional[str] = None, *model_args: Any) -> None: Starts a CherryPy server to listen for requests to generate text.

    "},{"location":"audio/api/base/#geniusrise_audio.base.api.AudioAPI.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes a new instance of the TextAPI class.

    Parameters:

    Name Type Description Default input BatchInput

    The input data to process.

    required output BatchOutput

    The output data to process.

    required state State

    The state of the API.

    required"},{"location":"audio/api/base/#geniusrise_audio.base.api.AudioAPI.listen","title":"listen(model_name, model_class='AutoModel', processor_class='AutoProcessor', use_cuda=False, precision='float16', quantization=0, device_map='auto', max_memory={0: '24GB'}, torchscript=False, compile=False, concurrent_queries=False, use_whisper_cpp=False, use_faster_whisper=False, endpoint='*', port=3000, cors_domain='http://localhost:3000', username=None, password=None, **model_args)","text":"

    Starts a CherryPy server to listen for requests to generate text.

    Parameters:

    Name Type Description Default model_name str

    The name of the pre-trained language model.

    required model_class str

    The name of the class of the pre-trained language model. Defaults to \"AutoModelForCausalLM\".

    'AutoModel' processor_class str

    The name of the class of the processor used to preprocess input text. Defaults to \"AutoProcessor\".

    'AutoProcessor' use_cuda bool

    Whether to use a GPU for inference. Defaults to False.

    False precision str

    The precision to use for the pre-trained language model. Defaults to \"float16\".

    'float16' quantization int

    The level of quantization to use for the pre-trained language model. Defaults to 0.

    0 device_map str | Dict | None

    The mapping of devices to use for inference. Defaults to \"auto\".

    'auto' max_memory Dict[int, str]

    The maximum memory to use for inference. Defaults to {0: \"24GB\"}.

    {0: '24GB'} torchscript bool

    Whether to use a TorchScript-optimized version of the pre-trained language model. Defaults to True.

    False compile bool

    Enable Torch JIT compilation.

    False concurrent_queries bool

    (bool): Whether the API supports concurrent API calls (usually false).

    False use_whisper_cpp bool

    Whether to use whisper.cpp to load the model. Defaults to False. Note: only works for these models: https://github.com/aarnphm/whispercpp/blob/524dd6f34e9d18137085fb92a42f1c31c9c6bc29/src/whispercpp/utils.py#L32

    False use_faster_whisper bool

    Whether to use faster-whisper.

    False endpoint str

    The endpoint to listen on. Defaults to \"*\".

    '*' port int

    The port to listen on. Defaults to 3000.

    3000 cors_domain str

    The domain to allow CORS requests from. Defaults to \"http://localhost:3000\".

    'http://localhost:3000' username Optional[str]

    The username to use for authentication. Defaults to None.

    None password Optional[str]

    The password to use for authentication. Defaults to None.

    None **model_args Any

    Additional arguments to pass to the pre-trained language model.

    {}"},{"location":"audio/api/base/#geniusrise_audio.base.api.AudioAPI.validate_password","title":"validate_password(realm, username, password)","text":"

    Validate the username and password against expected values.

    Parameters:

    Name Type Description Default realm str

    The authentication realm.

    required username str

    The provided username.

    required password str

    The provided password.

    required

    Returns:

    Name Type Description bool

    True if credentials are valid, False otherwise.

    "},{"location":"audio/api/s2t/","title":"Speech to Text","text":"

    Bases: AudioAPI

    SpeechToTextAPI is a subclass of AudioAPI specifically designed for speech-to-text models. It extends the functionality to handle speech-to-text processing using various ASR models.

    Attributes:

    Name Type Description model AutoModelForCTC

    The speech-to-text model.

    processor AutoProcessor

    The processor to prepare input audio data for the model.

    Methods

    transcribe(audio_input: bytes) -> str: Transcribes the given audio input to text using the speech-to-text model.

    Example CLI Usage:

    genius SpeechToTextAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\n--id facebook/wav2vec2-large-960h-lv60-self \\\nlisten \\\n--args \\\nmodel_name=\"facebook/wav2vec2-large-960h-lv60-self\" \\\nmodel_class=\"Wav2Vec2ForCTC\" \\\nprocessor_class=\"Wav2Vec2Processor\" \\\nuse_cuda=True \\\nprecision=\"float32\" \\\nquantization=0 \\\ndevice_map=\"cuda:0\" \\\nmax_memory=None \\\ntorchscript=False \\\ncompile=True \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n

    or using whisper.cpp:

    genius SpeechToTextAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\nlisten \\\n--args \\\nmodel_name=\"large\" \\\nuse_whisper_cpp=True \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n
    "},{"location":"audio/api/s2t/#geniusrise_audio.s2t.api.SpeechToTextAPI.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the SpeechToTextAPI with configurations for speech-to-text processing.

    Parameters:

    Name Type Description Default input BatchInput

    The input data configuration.

    required output BatchOutput

    The output data configuration.

    required state State

    The state configuration.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"audio/api/s2t/#geniusrise_audio.s2t.api.SpeechToTextAPI.asr_pipeline","title":"asr_pipeline(**kwargs)","text":"

    Recognizes named entities in the input text using the Hugging Face pipeline.

    This method leverages a pre-trained NER model to identify and classify entities in text into categories such as names, organizations, locations, etc. It's suitable for processing various types of text content.

    Parameters:

    Name Type Description Default **kwargs Any

    Arbitrary keyword arguments, typically containing 'text' for the input text.

    {}

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the original input text and a list of recognized entities.

    Example CURL Request for transcription:

    (base64 -w 0 sample.flac | awk '{print \"{\\\"audio_file\\\": \\\"\"$0\"\\\", \\\"model_sampling_rate\\\": 16000, \\\"chunk_length_s\\\": 60}\"}' > /tmp/payload.json)\ncurl -X POST http://localhost:3000/api/v1/asr_pipeline \\\n-H \"Content-Type: application/json\" \\\n-u user:password \\\n-d @/tmp/payload.json | jq\n

    "},{"location":"audio/api/s2t/#geniusrise_audio.s2t.api.SpeechToTextAPI.initialize_pipeline","title":"initialize_pipeline()","text":"

    Lazy initialization of the NER Hugging Face pipeline.

    "},{"location":"audio/api/s2t/#geniusrise_audio.s2t.api.SpeechToTextAPI.process_faster_whisper","title":"process_faster_whisper(audio_input, model_sampling_rate, chunk_size, generate_args)","text":"

    Processes audio input with the faster-whisper model.

    Parameters:

    Name Type Description Default audio_input bytes

    The audio input for transcription.

    required model_sampling_rate int

    The sampling rate of the model.

    required chunk_size int

    The size of audio chunks to process.

    required generate_args Dict[str, Any]

    Additional arguments for transcription.

    required

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the transcription results.

    "},{"location":"audio/api/s2t/#geniusrise_audio.s2t.api.SpeechToTextAPI.process_seamless","title":"process_seamless(audio_input, model_sampling_rate, processor_args, chunk_size, overlap_size, generate_args)","text":"

    Process audio input with the Whisper model.

    "},{"location":"audio/api/s2t/#geniusrise_audio.s2t.api.SpeechToTextAPI.process_wav2vec2","title":"process_wav2vec2(audio_input, model_sampling_rate, processor_args, chunk_size, overlap_size)","text":"

    Process audio input with the Wav2Vec2 model.

    "},{"location":"audio/api/s2t/#geniusrise_audio.s2t.api.SpeechToTextAPI.process_whisper","title":"process_whisper(audio_input, model_sampling_rate, processor_args, chunk_size, overlap_size, generate_args)","text":"

    Process audio input with the Whisper model.

    "},{"location":"audio/api/s2t/#geniusrise_audio.s2t.api.SpeechToTextAPI.transcribe","title":"transcribe()","text":"

    API endpoint to transcribe the given audio input to text using the speech-to-text model. Expects a JSON input with 'audio_file' as a key containing the base64 encoded audio data.

    Returns:

    Type Description

    Dict[str, str]: A dictionary containing the transcribed text.

    Example CURL Request for transcription:

    (base64 -w 0 sample.flac | awk '{print \"{\\\"audio_file\\\": \\\"\"$0\"\\\", \\\"model_sampling_rate\\\": 16000, \\\"chunk_size\\\": 1280000, \\\"overlap_size\\\": 213333, \\\"do_sample\\\": true, \\\"num_beams\\\": 4, \\\"temperature\\\": 0.6, \\\"tgt_lang\\\": \\\"eng\\\"}\"}' > /tmp/payload.json)\ncurl -X POST http://localhost:3000/api/v1/transcribe \\\n-H \"Content-Type: application/json\" \\\n-u user:password \\\n-d @/tmp/payload.json | jq\n

    "},{"location":"audio/api/t2s/","title":"Text to Speech","text":"

    Bases: AudioAPI

    TextToSpeechAPI for converting text to speech using various TTS models.

    Attributes:

    Name Type Description model AutoModelForSeq2SeqLM

    The text-to-speech model.

    tokenizer AutoTokenizer

    The tokenizer for the model.

    Methods

    synthesize(text_input: str) -> bytes: Converts the given text input to speech using the text-to-speech model.

    Example CLI Usage:

    genius TextToSpeechAPI rise \\\nbatch \\\n    --input_folder ./input \\\nbatch \\\n    --output_folder ./output \\\nnone \\\n    --id facebook/mms-tts-eng \\\n    listen \\\n        --args \\\n            model_name=\"facebook/mms-tts-eng\" \\\n            model_class=\"VitsModel\" \\\n            processor_class=\"VitsTokenizer\" \\\n            use_cuda=True \\\n            precision=\"float32\" \\\n            quantization=0 \\\n            device_map=\"cuda:0\" \\\n            max_memory=None \\\n            torchscript=False \\\n            compile=False \\\n            endpoint=\"*\" \\\n            port=3000 \\\n            cors_domain=\"http://localhost:3000\" \\\n            username=\"user\" \\\n            password=\"password\"\n
    "},{"location":"audio/api/t2s/#geniusrise_audio.t2s.api.TextToSpeechAPI.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the TextToSpeechAPI with configurations for text-to-speech processing.

    Parameters:

    Name Type Description Default input BatchInput

    The input data configuration.

    required output BatchOutput

    The output data configuration.

    required state State

    The state configuration.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"audio/api/t2s/#geniusrise_audio.t2s.api.TextToSpeechAPI.initialize_pipeline","title":"initialize_pipeline()","text":"

    Lazy initialization of the TTS Hugging Face pipeline.

    "},{"location":"audio/api/t2s/#geniusrise_audio.t2s.api.TextToSpeechAPI.synthesize","title":"synthesize()","text":"

    API endpoint to convert text input to speech using the text-to-speech model. Expects a JSON input with 'text' as a key containing the text to be synthesized.

    Returns:

    Type Description

    Dict[str, str]: A dictionary containing the base64 encoded audio data.

    Example CURL Request for synthesis: ... [Provide example CURL request] ...

    "},{"location":"audio/api/t2s/#geniusrise_audio.t2s.api.TextToSpeechAPI.tts_pipeline","title":"tts_pipeline(**kwargs)","text":"

    Converts text to speech using the Hugging Face pipeline.

    Parameters:

    Name Type Description Default **kwargs Any

    Arbitrary keyword arguments, typically containing 'text' for the input text.

    {}

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the base64 encoded audio data.

    Example CURL Request for synthesis: ... [Provide example CURL request] ...

    "},{"location":"audio/bulk/base/","title":"Base Bulk Inference","text":"

    Bases: Bolt

    AudioBulk is a class designed for bulk processing of audio data using various audio models from Hugging Face. It focuses on audio generation and transformation tasks, supporting a range of models and configurations.

    Attributes:

    Name Type Description model AutoModelForAudioClassification

    The audio model for generation or transformation tasks.

    processor AutoFeatureExtractor

    The processor for preparing input data for the model.

    Parameters:

    Name Type Description Default input BatchInput

    Configuration and data inputs for the batch process.

    required output BatchOutput

    Configurations for output data handling.

    required state State

    State management for the Bolt.

    required **kwargs

    Arbitrary keyword arguments for extended configurations.

    {} Methods

    audio(**kwargs: Any) -> Dict[str, Any]: Provides an API endpoint for audio processing functionality. Accepts various parameters for customizing the audio processing tasks.

    process(audio_input: Union[str, bytes], **processing_params: Any) -> dict: Processes the audio input based on the provided parameters. Supports multiple processing methods.

    "},{"location":"audio/bulk/base/#geniusrise_audio.base.bulk.AudioBulk.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the AudioBulk with configurations and sets up logging. Prepares the environment for audio processing tasks.

    Parameters:

    Name Type Description Default input BatchInput

    The input data configuration for the audio processing task.

    required output BatchOutput

    The output data configuration for the results of the audio processing.

    required state State

    The state configuration for the Bolt, managing its operational status.

    required **kwargs

    Additional keyword arguments for extended functionality and model configurations.

    {}"},{"location":"audio/bulk/base/#geniusrise_audio.base.bulk.AudioBulk.done","title":"done()","text":"

    Finalizes the AudioBulk processing. Sends notification email if configured.

    This method should be called after all audio processing tasks are complete. It handles any final steps such as sending notifications or cleaning up resources.

    "},{"location":"audio/bulk/base/#geniusrise_audio.base.bulk.AudioBulk.load_models","title":"load_models(model_name, processor_name, model_revision=None, processor_revision=None, model_class='', processor_class='AutoFeatureExtractor', use_cuda=False, precision='float16', quantization=0, device_map='auto', max_memory={0: '24GB'}, torchscript=False, compile=False, flash_attention=False, better_transformers=False, use_whisper_cpp=False, use_faster_whisper=False, **model_args)","text":"

    Loads and configures the specified audio model and processor for audio processing.

    Parameters:

    Name Type Description Default model_name str

    Name or path of the audio model to load.

    required processor_name str

    Name or path of the processor to load.

    required model_revision Optional[str]

    Specific model revision to load (e.g., commit hash).

    None processor_revision Optional[str]

    Specific processor revision to load.

    None model_class str

    Class of the model to be loaded.

    '' processor_class str

    Class of the processor to be loaded.

    'AutoFeatureExtractor' use_cuda bool

    Flag to use CUDA for GPU acceleration.

    False precision str

    Desired precision for computations (\"float32\", \"float16\", etc.).

    'float16' quantization int

    Bit level for model quantization (0 for none, 8 for 8-bit).

    0 device_map Union[str, Dict, None]

    Specific device(s) for model operations.

    'auto' max_memory Dict[int, str]

    Maximum memory allocation for the model.

    {0: '24GB'} torchscript bool

    Enable TorchScript for model optimization.

    False compile bool

    Enable Torch JIT compilation.

    False flash_attention bool

    Flag to enable Flash Attention optimization for faster processing.

    False better_transformers bool

    Flag to enable Better Transformers optimization for faster processing.

    False use_whisper_cpp bool

    Whether to use whisper.cpp to load the model. Defaults to False. Note: only works for these models: https://github.com/aarnphm/whispercpp/blob/524dd6f34e9d18137085fb92a42f1c31c9c6bc29/src/whispercpp/utils.py#L32

    False use_faster_whisper bool

    Whether to use faster-whisper.

    False **model_args Any

    Additional arguments for model loading.

    {}

    Returns:

    Type Description Tuple[AutoModelForAudioClassification, AutoFeatureExtractor]

    Tuple[AutoModelForAudioClassification, AutoFeatureExtractor]: Loaded model and processor.

    "},{"location":"audio/bulk/s2t/","title":"Speech to Text","text":"

    Bases: AudioAPI

    SpeechToTextAPI is a subclass of AudioAPI specifically designed for speech-to-text models. It extends the functionality to handle speech-to-text processing using various ASR models.

    Attributes:

    Name Type Description model AutoModelForCTC

    The speech-to-text model.

    processor AutoProcessor

    The processor to prepare input audio data for the model.

    Methods

    transcribe(audio_input: bytes) -> str: Transcribes the given audio input to text using the speech-to-text model.

    Example CLI Usage:

    genius SpeechToTextAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\n--id facebook/wav2vec2-large-960h-lv60-self \\\nlisten \\\n--args \\\nmodel_name=\"facebook/wav2vec2-large-960h-lv60-self\" \\\nmodel_class=\"Wav2Vec2ForCTC\" \\\nprocessor_class=\"Wav2Vec2Processor\" \\\nuse_cuda=True \\\nprecision=\"float32\" \\\nquantization=0 \\\ndevice_map=\"cuda:0\" \\\nmax_memory=None \\\ntorchscript=False \\\ncompile=True \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n

    or using whisper.cpp:

    genius SpeechToTextAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\nlisten \\\n--args \\\nmodel_name=\"large\" \\\nuse_whisper_cpp=True \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n
    "},{"location":"audio/bulk/s2t/#geniusrise_audio.s2t.api.SpeechToTextAPI.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the SpeechToTextAPI with configurations for speech-to-text processing.

    Parameters:

    Name Type Description Default input BatchInput

    The input data configuration.

    required output BatchOutput

    The output data configuration.

    required state State

    The state configuration.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"audio/bulk/s2t/#geniusrise_audio.s2t.api.SpeechToTextAPI.asr_pipeline","title":"asr_pipeline(**kwargs)","text":"

    Recognizes named entities in the input text using the Hugging Face pipeline.

    This method leverages a pre-trained NER model to identify and classify entities in text into categories such as names, organizations, locations, etc. It's suitable for processing various types of text content.

    Parameters:

    Name Type Description Default **kwargs Any

    Arbitrary keyword arguments, typically containing 'text' for the input text.

    {}

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the original input text and a list of recognized entities.

    Example CURL Request for transcription:

    (base64 -w 0 sample.flac | awk '{print \"{\\\"audio_file\\\": \\\"\"$0\"\\\", \\\"model_sampling_rate\\\": 16000, \\\"chunk_length_s\\\": 60}\"}' > /tmp/payload.json)\ncurl -X POST http://localhost:3000/api/v1/asr_pipeline \\\n-H \"Content-Type: application/json\" \\\n-u user:password \\\n-d @/tmp/payload.json | jq\n

    "},{"location":"audio/bulk/s2t/#geniusrise_audio.s2t.api.SpeechToTextAPI.initialize_pipeline","title":"initialize_pipeline()","text":"

    Lazy initialization of the NER Hugging Face pipeline.

    "},{"location":"audio/bulk/s2t/#geniusrise_audio.s2t.api.SpeechToTextAPI.process_faster_whisper","title":"process_faster_whisper(audio_input, model_sampling_rate, chunk_size, generate_args)","text":"

    Processes audio input with the faster-whisper model.

    Parameters:

    Name Type Description Default audio_input bytes

    The audio input for transcription.

    required model_sampling_rate int

    The sampling rate of the model.

    required chunk_size int

    The size of audio chunks to process.

    required generate_args Dict[str, Any]

    Additional arguments for transcription.

    required

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the transcription results.

    "},{"location":"audio/bulk/s2t/#geniusrise_audio.s2t.api.SpeechToTextAPI.process_seamless","title":"process_seamless(audio_input, model_sampling_rate, processor_args, chunk_size, overlap_size, generate_args)","text":"

    Process audio input with the Whisper model.

    "},{"location":"audio/bulk/s2t/#geniusrise_audio.s2t.api.SpeechToTextAPI.process_wav2vec2","title":"process_wav2vec2(audio_input, model_sampling_rate, processor_args, chunk_size, overlap_size)","text":"

    Process audio input with the Wav2Vec2 model.

    "},{"location":"audio/bulk/s2t/#geniusrise_audio.s2t.api.SpeechToTextAPI.process_whisper","title":"process_whisper(audio_input, model_sampling_rate, processor_args, chunk_size, overlap_size, generate_args)","text":"

    Process audio input with the Whisper model.

    "},{"location":"audio/bulk/s2t/#geniusrise_audio.s2t.api.SpeechToTextAPI.transcribe","title":"transcribe()","text":"

    API endpoint to transcribe the given audio input to text using the speech-to-text model. Expects a JSON input with 'audio_file' as a key containing the base64 encoded audio data.

    Returns:

    Type Description

    Dict[str, str]: A dictionary containing the transcribed text.

    Example CURL Request for transcription:

    (base64 -w 0 sample.flac | awk '{print \"{\\\"audio_file\\\": \\\"\"$0\"\\\", \\\"model_sampling_rate\\\": 16000, \\\"chunk_size\\\": 1280000, \\\"overlap_size\\\": 213333, \\\"do_sample\\\": true, \\\"num_beams\\\": 4, \\\"temperature\\\": 0.6, \\\"tgt_lang\\\": \\\"eng\\\"}\"}' > /tmp/payload.json)\ncurl -X POST http://localhost:3000/api/v1/transcribe \\\n-H \"Content-Type: application/json\" \\\n-u user:password \\\n-d @/tmp/payload.json | jq\n

    "},{"location":"audio/bulk/t2s/","title":"Text to Speech","text":"

    Bases: AudioAPI

    TextToSpeechAPI for converting text to speech using various TTS models.

    Attributes:

    Name Type Description model AutoModelForSeq2SeqLM

    The text-to-speech model.

    tokenizer AutoTokenizer

    The tokenizer for the model.

    Methods

    synthesize(text_input: str) -> bytes: Converts the given text input to speech using the text-to-speech model.

    Example CLI Usage:

    genius TextToSpeechAPI rise \\\nbatch \\\n    --input_folder ./input \\\nbatch \\\n    --output_folder ./output \\\nnone \\\n    --id facebook/mms-tts-eng \\\n    listen \\\n        --args \\\n            model_name=\"facebook/mms-tts-eng\" \\\n            model_class=\"VitsModel\" \\\n            processor_class=\"VitsTokenizer\" \\\n            use_cuda=True \\\n            precision=\"float32\" \\\n            quantization=0 \\\n            device_map=\"cuda:0\" \\\n            max_memory=None \\\n            torchscript=False \\\n            compile=False \\\n            endpoint=\"*\" \\\n            port=3000 \\\n            cors_domain=\"http://localhost:3000\" \\\n            username=\"user\" \\\n            password=\"password\"\n
    "},{"location":"audio/bulk/t2s/#geniusrise_audio.t2s.api.TextToSpeechAPI.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the TextToSpeechAPI with configurations for text-to-speech processing.

    Parameters:

    Name Type Description Default input BatchInput

    The input data configuration.

    required output BatchOutput

    The output data configuration.

    required state State

    The state configuration.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"audio/bulk/t2s/#geniusrise_audio.t2s.api.TextToSpeechAPI.initialize_pipeline","title":"initialize_pipeline()","text":"

    Lazy initialization of the TTS Hugging Face pipeline.

    "},{"location":"audio/bulk/t2s/#geniusrise_audio.t2s.api.TextToSpeechAPI.synthesize","title":"synthesize()","text":"

    API endpoint to convert text input to speech using the text-to-speech model. Expects a JSON input with 'text' as a key containing the text to be synthesized.

    Returns:

    Type Description

    Dict[str, str]: A dictionary containing the base64 encoded audio data.

    Example CURL Request for synthesis: ... [Provide example CURL request] ...

    "},{"location":"audio/bulk/t2s/#geniusrise_audio.t2s.api.TextToSpeechAPI.tts_pipeline","title":"tts_pipeline(**kwargs)","text":"

    Converts text to speech using the Hugging Face pipeline.

    Parameters:

    Name Type Description Default **kwargs Any

    Arbitrary keyword arguments, typically containing 'text' for the input text.

    {}

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the base64 encoded audio data.

    Example CURL Request for synthesis: ... [Provide example CURL request] ...

    "},{"location":"blog/huggingface/chat/","title":"Host Chat Models Using Geniusrise","text":"
    • Host Chat Models Using Geniusrise
      • Quick Setup
        • Using vanilla huggingface
        • Using VLLM
        • Using llama.cpp
      • Interacting with Your API
      • Fun
        • Completely Local Chat
        • System Prompts
        • Code Generation
        • Routing between models
        • Chain of thought prompting
      • Play around

    Integrating chat models into applications can dramatically enhance user interaction, making it more engaging and intuitive. Geniusrise offers a simple and flexible way to deploy state-of-the-art chat models as APIs. This guide explores how to set up these APIs for various use cases.

    "},{"location":"blog/huggingface/chat/#quick-setup","title":"Quick Setup","text":"

    Requirements

    • python 3.10, PPA, AUR, brew, Windows.
    • You need to have a GPU. Most of the system works with NVIDIA GPUs.
    • Install CUDA.

    Optional: Set up a virtual environment:

    virtualenv venv -p `which python3.10`\nsource venv/bin/activate\n

    Installation:

    To get started, install Geniusrise and its text package:

    pip install geniusrise\npip install geniusrise-text\n

    Configuration File (genius.yml):

    There are 3 runtimes currently supported:

    1. Vanilla huggingface: more direct access to the model, many options for sampling, great for testing, bad way to deploy in production.
    2. VLLM: perhaps the most optimized way to run LLMs in open source.
    3. llama.cpp: another contender for the spot of being the most optimized way to run LLMs in open source.
    "},{"location":"blog/huggingface/chat/#using-vanilla-huggingface","title":"Using vanilla huggingface","text":"
    version: \"1\"\nbolts:\nmy_bolt:\nname: InstructionAPI\nstate:\ntype: none\ninput:\ntype: batch\nargs:\ninput_folder: ./input\noutput:\ntype: batch\nargs:\noutput_folder: ./output\nmethod: listen\nargs:\nmodel_name: TheBloke/Mistral-7B-Instruct-v0.2-AWQ\nmodel_class: \"AutoModelForCausalLM\"\ntokenizer_class: \"AutoTokenizer\"\nuse_cuda: True\nprecision: \"bfloat16\"\ndevice_map: \"auto\"\nendpoint: \"*\"\nport: 3000\ncors_domain: \"http://localhost:3000\"\nusername: \"user\"\npassword: \"password\"\n
    "},{"location":"blog/huggingface/chat/#using-vllm","title":"Using VLLM","text":"
    version: \"1\"\nbolts:\nmy_bolt:\nname: InstructionAPI\nstate:\ntype: none\ninput:\ntype: batch\nargs:\ninput_folder: ./input\noutput:\ntype: batch\nargs:\noutput_folder: ./output\nmethod: listen\nargs:\nmodel_name: TheBloke/Mistral-7B-Instruct-v0.2-AWQ\nuse_cuda: True\nprecision: \"float16\"\ndevice_map: \"auto\"\nuse_vllm: True\nvllm_enforce_eager: True\nvllm_max_model_len: 1024\nendpoint: \"*\"\nport: 3000\ncors_domain: \"http://localhost:3000\"\nusername: \"user\"\npassword: \"password\"\n
    "},{"location":"blog/huggingface/chat/#using-llamacpp","title":"Using llama.cpp","text":"
    version: \"1\"\nbolts:\nmy_bolt:\nname: InstructionAPI\nstate:\ntype: none\ninput:\ntype: batch\nargs:\ninput_folder: ./input\noutput:\ntype: batch\nargs:\noutput_folder: ./output\nmethod: listen\nargs:\nmodel_name: TheBloke/Mistral-7B-Instruct-v0.2-GGUF\nuse_cuda: True\nuse_llama_cpp: True\nllama_cpp_filename: mistral-7b-instruct-v0.2.Q4_K_M.gguf\nllama_cpp_n_gpu_layers: 35\nllama_cpp_n_ctx: 1024\nendpoint: \"*\"\nport: 3000\ncors_domain: \"http://localhost:3000\"\nusername: \"user\"\npassword: \"password\"\n
    "},{"location":"blog/huggingface/chat/#interacting-with-your-api","title":"Interacting with Your API","text":"

    For a chat model, you would typically send a prompt and receive a generated response:

    curl -X POST \"http://localhost:3000/api/v1/chat\" \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\"prompt\": \"Your prompt here\", \"max_tokens\": 50}'\n

    For VLLMs, you might want to control more parameters due to their capacity for larger context and nuanced outputs:

    curl -v -X POST \"http://localhost:3000/api/v1/chat_vllm\" \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\n        \"messages\": [\n            {\"role\": \"user\", \"content\": \"Whats the weather like in London?\"}\n        ],\n        \"temperature\": 0.7,\n        \"top_p\": 1.0,\n        \"n\": 1,\n        \"max_tokens\": 50,\n        \"stream\": false,\n        \"presence_penalty\": 0.0,\n        \"frequency_penalty\": 0.0,\n        \"logit_bias\": {},\n        \"user\": \"example_user\"\n    }'\n

    Finally, for llama.cpp, the api looks like:

    curl -X POST \"http://localhost:3000/api/v1/chat_llama_cpp\" \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\n        \"messages\": [\n            {\"role\": \"user\", \"content\": \"What is the capital of France?\"},\n            {\"role\": \"system\", \"content\": \"The capital of France is\"}\n        ],\n        \"temperature\": 0.2,\n        \"top_p\": 0.95,\n        \"top_k\": 40,\n        \"max_tokens\": 50\n    }' | jq\n
    "},{"location":"blog/huggingface/chat/#fun","title":"Fun","text":"

    There is a looooot to have fun here. But first lets start with some basics:

    Models seen on huggingface generally have a trajectory like this:

    cb09d9d4-eb8b-43a8-b10d-c1e43f8b6dcd

    There are a few base models, everything else is fine-tuned from these base models.

    bart bert bert-generation big_bird bigbird_pegasus biogpt blenderbot blenderbot-small bloom camembert code_llama codegen cpmant ctrl data2vec-text electra ernie falcon fuyu gemma git gpt-sw3 gpt2 gpt_bigcode gpt_neo gpt_neox gpt_neox_japanese gptj llama marian mbart mega megatron-bert mistral mixtral mpt musicgen mvp open-llama openai-gpt opt pegasus persimmon phi plbart prophetnet qdqbert qwen2 reformer rembert roberta roberta-prelayernorm roc_bert roformer rwkv speech_to_text_2 stablelm starcoder2 transfo-xl trocr whisper xglm xlm xlm-prophetnet xlm-roberta xlm-roberta-xl xlnet xmod"},{"location":"blog/huggingface/chat/#completely-local-chat","title":"Completely Local Chat","text":"

    Lets deploy huggingface's chat-ui and connect it to use vllm apis to interface with a mistral 4-bit quantized (AWQ) model. This can run on my laptop with an RTX 4060 with 8GB VRAM.

    Cool, lets create a simple small script with gradio to create a chat interface.

    Install gradio:

    pip install gradio\n

    Create a chat.py file:

    # Import necessary libraries for handling network requests\nimport gradio as gr\nimport requests\nfrom typing import List, Dict\ndef send_request_to_api(messages: List[Dict[str, str]]) -> str:\n\"\"\"\n    This function sends a POST request to a specified API endpoint with a payload containing a list of messages.\n    :param messages: A list of messages to be sent. Each message is a dictionary containing a content key with its value.\n    :return: The content of the last message received from the API.\n    \"\"\"\n# Specify the API endpoint URL\nurl = \"http://localhost:3000/api/v1/chat_llama_cpp\"\n# Define headers for the request\nheaders = {\"Content-Type\": \"application/json\"}\n# Authenticate the request\nauth = (\"user\", \"password\")\n# Prepare the payload data\ndata = {\n\"messages\": messages,\n\"temperature\": 0.2,\n\"top_p\": 0.95,\n\"top_k\": 40,\n\"max_tokens\": 2048\n}\n# Send the POST request and get the response\nresponse = requests.post(url, auth=auth, headers=headers, json=data)\n# Parse the response data\nresponse_data = response.json()\nif response.status_code == 200:\n# Get the content of the last message from the response data\nlast_message = response_data[\"choices\"][0][\"message\"][\"content\"]\nreturn last_message\nelse:\n# Raise an exception in case of an error\nraise Exception(\"nooooooooooooooooooo!!\")\ndef predict(message: str, history: List[List[str]]) -> List[List[str]]:\n\"\"\"\n    This function converts chat history into the expected format and adds the latest user message. Then it sends the data to the API and returns the response message.\n    :param message: The user's latest message to be sent.\n    :param history: The chat history between the user and the AI.\n    :return: The response message from the API.\n    \"\"\"\n# Convert the chat history into the expected format\nmessages_format = []\nfor user_msg, bot_msg in history:\nif user_msg:\nmessages_format.append({\"role\": \"user\", \"content\": user_msg})\nif bot_msg:\nmessages_format.append({\"role\": \"system\", \"content\": bot_msg})\n# Add the latest user message\nmessages_format.append({\"role\": \"user\", \"content\": message})\n# Get the response from the API\nresponse_message = send_request_to_api(messages_format)\nreturn response_message\nchat_interface = gr.ChatInterface(\nfn=predict,\ntitle=\"Chat with AI\",\ndescription=\"Type your message below and get responses from our AI.\",\ntheme=gr.themes.Monochrome(),\n)\n# Launch the chat interface if the script is run as the main module\nif __name__ == \"__main__\":\nchat_interface.launch()\n

    Run the frontend:

    python ./chat.py\n

    Then, lets run the API server:

    version: \"1\"\nbolts:\nmy_bolt:\nname: InstructionAPI\nstate:\ntype: none\ninput:\ntype: batch\nargs:\ninput_folder: ./input\noutput:\ntype: batch\nargs:\noutput_folder: ./output\nmethod: listen\nargs:\nmodel_name: TheBloke/Mistral-7B-Instruct-v0.2-GGUF\nuse_cuda: True\nuse_llama_cpp: True\nllama_cpp_filename: mistral-7b-instruct-v0.2.Q4_K_M.gguf\nllama_cpp_n_gpu_layers: 35\nllama_cpp_n_ctx: 32768\nendpoint: \"*\"\nport: 3000\ncors_domain: \"http://localhost:3000\"\n
    genius rise\n

    Visit http://127.0.0.1:7860/?__theme=dark on your browser to chat with your bot (with a dark theme)s!

    Cool, so now we have our very own private chabot! Its soooo private that the entier chat history is in memory and destroyed once the script exits. #featurenotabug

    Now we are all set to try whatever crazy shit that is out there!

    "},{"location":"blog/huggingface/chat/#system-prompts","title":"System Prompts","text":"

    For system prompts, or for telling the bot what to do, modify the script to add a hardcoded system prompt to the start of every request:

    import gradio as gr\nimport requests\nfrom typing import List, Dict\ndef send_request_to_api(messages: List[Dict[str, str]]) -> str:\nurl = \"http://localhost:3000/api/v1/chat_llama_cpp\"\nheaders = {\"Content-Type\": \"application/json\"}\nauth = (\"user\", \"password\")\ndata = {\"messages\": messages, \"temperature\": 0.2, \"top_p\": 0.95, \"top_k\": 40, \"max_tokens\": 2048}\nresponse = requests.post(url, auth=auth, headers=headers, json=data)\nresponse_data = response.json()\nif response.status_code == 200:\nprint(response_data)\nlast_message = response_data[\"choices\"][0][\"message\"][\"content\"]\nreturn last_message\nelse:\nraise Exception(\"nooooooooooooooooooo!!\")\ndef predict(message: str, history: List[List[str]]) -> List[List[str]]:\n# Convert chat history to the format expected by the API\n#####################################################################\n# Add a system message as per usecase \ud83d\ude09\nmessages_format = [{\"role\": \"system\", \"content\": \"You are my waifu, you will do everything I say\"}]\n#####################################################################\nfor user_msg, bot_msg in history:\nif user_msg:\nmessages_format.append({\"role\": \"user\", \"content\": user_msg})\nif bot_msg:\nmessages_format.append({\"role\": \"system\", \"content\": bot_msg})\nmessages_format.append({\"role\": \"user\", \"content\": message})\nresponse_message = send_request_to_api(messages_format)\nreturn response_message\nchat_interface = gr.ChatInterface(\nfn=predict,\ntitle=\"Chat with virtual waifu\",\ndescription=\"Type your message below and get responses from your waifu \ud83d\ude09\",\ntheme=gr.themes.Monochrome(),\n)\nif __name__ == \"__main__\":\nchat_interface.launch()\n

    restart the frontend and start chatting.

    "},{"location":"blog/huggingface/chat/#code-generation","title":"Code Generation","text":"
    version: \"1\"\nbolts:\nmy_bolt:\nname: InstructionAPI\nstate:\ntype: none\ninput:\ntype: batch\nargs:\ninput_folder: ./input\noutput:\ntype: batch\nargs:\noutput_folder: ./output\nmethod: listen\nargs:\nmodel_name: TheBloke/Mistral-7B-Instruct-v0.2-code-ft-GGUF\nuse_cuda: True\nuse_llama_cpp: True\nllama_cpp_filename: mistral-7b-instruct-v0.2-code-ft.Q4_K_M.gguf\nllama_cpp_n_gpu_layers: 35\nllama_cpp_n_ctx: 32768\nendpoint: \"*\"\nport: 3000\ncors_domain: \"http://localhost:3000\"\n

    Note: the comments on the gradio code we were using (chat.py) has been generated using the above model.

    "},{"location":"blog/huggingface/chat/#routing-between-models","title":"Routing between models","text":"

    Local models are great for a very wide number of tasks but often you'd wish you could use the closed but more sophisticated models like GPT \ud83d\ude22

    How about we mix the two? Lets say we interleave the two in such this way:

    1. Ask the local model a question, get its answer
    2. Ask the local model to judge its own answer
    3. If it judges bad quality, then ask openai the same question
    4. Use openai's answer as part of the conversation going further

    This way, we could intermix both a local model and a very powerful model from openai which would otherwise cost a bomb. But hey, since most stuff we need out of this is not einstein-level, and the local models are MUCH faster, we can get a very good bang out of the buck while actually improving on quality \ud83e\udd73

    Create a new file: chat_route.py:

    import gradio as gr\nimport requests\nfrom typing import List, Dict\nfrom openai import OpenAI\n# Importing the necessary libraries and the OpenAI API client\nclient = OpenAI(api_key=\"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\")\ndef send_request_to_api(messages: List[Dict[str, str]], endpoint: str, max_tokens=2048) -> Dict:\n# Function to send requests to the local API\nurl = f\"http://localhost:3000/api/v1/{endpoint}\"\nheaders = {\"Content-Type\": \"application/json\"}\nauth = (\"user\", \"password\")\ndata = {\"messages\": messages, \"temperature\": 0.2, \"top_p\": 0.95, \"top_k\": 40, \"max_tokens\": max_tokens}\nresponse = requests.post(url, auth=auth, headers=headers, json=data)\nif response.status_code == 200:\nreturn response.json()\nelse:\nraise Exception(\"Error communicating with the local API.\")\ndef query_openai_api(prompt: str) -> str:\n# Function to query the OpenAI API\nresponse = client.completions.create(\nmodel=\"gpt-4-turbo-preview\",\nprompt=prompt,\nmax_tokens=2048,\ntemperature=0.2,\n)\nreturn response.choices[0].text.strip()\ndef predict(message: str, history: List[List[str]]) -> str:\n# Function to process the conversation and get a response\nmessages_format = []\nfor user_msg, bot_msg in history:\nif user_msg:\nmessages_format.append({\"role\": \"user\", \"content\": user_msg})\nif bot_msg:\nmessages_format.append({\"role\": \"system\", \"content\": bot_msg})\nmessages_format.append({\"role\": \"user\", \"content\": message})\n# Step 1: Get the response from the local model\nresponse = send_request_to_api(messages_format, \"chat_llama_cpp\")\nlocal_model_response = response[\"choices\"][0][\"message\"][\"content\"]\n# Crafting a proper prompt for quality assessment\nquality_check_prompt = \"Based on the quality standards and relevance to the question, is the following response of good quality or should we consult a better model? Please reply with 'good quality' or 'bad quality'. Dont reply with anything else except 'good quality' or 'bad quality'\"\nquality_check_response = send_request_to_api(\n[\n{\"role\": \"user\", \"content\": quality_check_prompt + \"\\n\\nHere is the question:\\n\\n\" + user_msg + \"\\n\\nHere is the content: \\n\\n\" + local_model_response},\n],\n\"chat_llama_cpp\",\nmax_tokens=3,\n)\nquality_assessment = quality_check_response[\"choices\"][0][\"message\"][\"content\"]\nprint(f\"Quality assessment response: {quality_assessment}\")\n# Step 3: Decide based on quality\nif \"good quality\" in quality_assessment.lower():\nreturn local_model_response\nelse:\n# If the local model's response is not of good quality, query the OpenAI API\nopenai_response = query_openai_api(prompt=message)\nreturn \"# OpenAI response:\\n\\n\" + openai_response + \"\\n\\n# Local model response:\\n\\n\" + local_model_response\nchat_interface = gr.ChatInterface(\nfn=predict,\ntitle=\"Chat with route\",\ndescription=\"Type your message below and get responses from our AI.\",\ntheme=gr.themes.Monochrome(),\n)\nif __name__ == \"__main__\":\nchat_interface.launch()\n

    The model itself is a better judge at checking quality of output than it can produce.

    Quality assessment response:  good quality.\nQuality assessment response:  good quality\nQuality assessment response:  Good quality.\nQuality assessment response:  good quality\nQuality assessment response:  Good quality.\nQuality assessment response:  Good quality.\nQuality assessment response:  good quality\nQuality assessment response:  bad quality.\nQuality assessment response:  Bad quality.\n
    "},{"location":"blog/huggingface/chat/#chain-of-thought-prompting","title":"Chain of thought prompting","text":"

    Now that we have models wit much longer contexts, how can we make them slog harder?

    Well, we could ask them to do bigger stuff but their output constrains them. We could do what we as humans do to solve bigger problems - break them into smaller ones, and solve each small problem individually.

    This time lets create a file called chat_chain.py:

    import gradio as gr\nimport requests\nfrom typing import List, Dict\nimport re\ndef extract_lists(text: str) -> list:\nreturn [m.strip().split(\"\\n\") for m in re.findall(r\"((?:^- .+\\n?)+|(?:^\\d+\\. .+\\n?)+)\", text, re.MULTILINE)]\ndef send_request_to_api(messages: List[Dict[str, str]], endpoint: str, max_tokens=2048) -> Dict:\nurl = f\"http://localhost:3000/api/v1/{endpoint}\"\nheaders = {\"Content-Type\": \"application/json\"}\nauth = (\"user\", \"password\")\ndata = {\"messages\": messages, \"temperature\": 0.2, \"top_p\": 0.95, \"top_k\": 40, \"max_tokens\": max_tokens}\nresponse = requests.post(url, auth=auth, headers=headers, json=data)\nif response.status_code == 200:\nreturn response.json()\nelse:\nraise Exception(\"Error communicating with the local API.\")\ndef predict(message: str, history: List[List[str]]):\nmessages_format = []\nfor user_msg, bot_msg in history:\nif user_msg:\nmessages_format.append({\"role\": \"user\", \"content\": user_msg})\nif bot_msg:\nmessages_format.append({\"role\": \"system\", \"content\": bot_msg})\nplan_prompt = f\"\"\"Let's think step by step to answer the question:\n{message}\nGenerate a very high level plan in the form of a list in markdown surrounded by code blocks.\nIf the task is simple, it is okay to generate a single point plan.\nEnsure each item in the plan is independent of each other so they can be instructed to an LLM one at a time without needing additional context.\n\"\"\"\nmessages_format.append({\"role\": \"user\", \"content\": plan_prompt})\n# Step 1: Get the response from the local model\nresponse = send_request_to_api(messages_format, \"chat_llama_cpp\")\nplan = response[\"choices\"][0][\"message\"][\"content\"]\nprint(f\"Got the plan: {plan[:30]}\")\nlists = extract_lists(plan)\nif len(lists) == 1:\nlists = lists[0]\nstep_solutions = []  # type: ignore\nfor ls in lists:\nprint(f\"Asking for solution to {ls}\")\nmessages_format = []\nfor user_msg, bot_msg in history:\nif user_msg:\nmessages_format.append({\"role\": \"user\", \"content\": user_msg})\nif bot_msg:\nmessages_format.append({\"role\": \"system\", \"content\": bot_msg})\nmessages_format.append({\"role\": \"user\", \"content\": message})\nmessages_format.append(\n{\n\"role\": \"user\",\n\"content\": (\"Next lets do this only and nothing else:\" + ls if type(ls) is str else \"\\n\".join(ls)),\n}\n)\nresponse = send_request_to_api(messages_format, \"chat_llama_cpp\")\n_resp = response[\"choices\"][0][\"message\"][\"content\"]\nstep_solutions.append((_resp, ls))\nsolutions = \"\\n\\n# Next\\n---\\n\\n\".join([x[0] for x in step_solutions])\nreturn f\"\"\"\n# Plan\n---\n{plan}\n# Solutions\n---\n{solutions}\n\"\"\"\nchat_interface = gr.ChatInterface(\nfn=predict,\ntitle=\"Chat with chain-of-thought waifu\",\ndescription=\"Type your message below and get responses from our AI.\",\ntheme=gr.themes.Monochrome(),\n)\nif __name__ == \"__main__\":\nchat_interface.launch()\n

    run it with:

    python ./chat_chain.py\n

    Now a small query like create plan for angry birds will result in a high level plan, followed by plans for implementing each item from the high level plan.

    As we can see from the logs:

    Asking for solution to ['2. Design the game environment: create a 2D plane with various structures and obstacles for the pigs to inhabit and for the birds to interact with.']\nAsking for solution to ['3. Develop the Angry Birds: create different types of birds with unique abilities such as normal bird for basic damage, red bird for explosive damage, blue bird for splitting into three upon impact, and yellow bird for creating stars that destroy multiple pigs or structures.']\nAsking for solution to ['4. Implement physics engine: use a physics engine to simulate the behavior of the birds and structures when launched and collide with each other.']\nAsking for solution to ['5. Create the user interface (UI): design an intuitive UI for players to interact with, including a slingshot for launching birds, a display for showing the current level and progress, and a menu for accessing different levels and game settings.']\nAsking for solution to ['6. Develop the game logic: write the rules for how the game progresses, including scoring, level completion, and game over conditions.']\nAsking for solution to ['7. Implement sound effects and background music: add appropriate sounds for various game events such as bird launching, pig destruction, and level completion.']\nAsking for solution to ['8. Test and debug the game: thoroughly test the game for any bugs or inconsistencies and make necessary adjustments.']\nAsking for solution to ['9. Optimize game performance: optimize the game for smooth gameplay and minimal lag, especially on older devices or slower networks.']\nAsking for solution to ['10. Release and market the game: release the game on various mobile platforms and promote it through social media, app stores, and other channels to attract players and build a community.']\n

    the script gets a plan conssiting of independent steps, then asks the LLM to implement each step individually.

    A large number of variations exist of this method, and many of them use GPT-4 to surpass its usual capabilities.

    "},{"location":"blog/huggingface/chat/#play-around","title":"Play around","text":"

    There are 61,542 models on the huggingface, and all the excitement of the ecosystem!

    "},{"location":"blog/huggingface/imgclass/","title":"Host Image Classification Models Using Geniusrise","text":"

    Image classification is a cornerstone of machine learning and computer vision, providing the backbone for a myriad of applications from photo organization to medical imaging. With Geniusrise, developers can effortlessly deploy image classification models as APIs, making these powerful tools accessible for integration into various applications. This guide highlights the process of setting up image classification APIs using Geniusrise, offering a range of use cases and configurations.

    "},{"location":"blog/huggingface/imgclass/#quick-setup","title":"Quick Setup","text":"

    Installation:

    To start, ensure Geniusrise and its text extension are installed:

    pip install geniusrise\npip install geniusrise-vision\n

    Configuration File (genius.yml):

    Your genius.yml configuration will outline the API's structure. Below is a template adjusted for image classification:

    version: \"1\"\nbolts:\nmy_bolt:\nname: ImageClassificationAPI\nstate:\ntype: none\ninput:\ntype: batch\nargs:\ninput_folder: ./input\noutput:\ntype: batch\nargs:\noutput_folder: ./output\nmethod: listen\nargs:\nmodel_name: \"google/vit-base-patch16-224\"\nmodel_class: \"AutoModelForImageClassification\"\nprocessor_class: \"AutoImageProcessor\"\ndevice_map: \"cuda:0\"\nuse_cuda: true\nprecision: \"float\"\nendpoint: \"0.0.0.0\"\nport: 3000\ncors_domain: \"http://localhost:3000\"\nusername: \"user\"\npassword: \"password\"\n

    Activate your API by executing:

    genius rise\n
    "},{"location":"blog/huggingface/imgclass/#configuration-parameters-explained","title":"Configuration Parameters Explained","text":"
    • model_name: Defines the pre-trained model used for classification. Choices vary based on the application, from generic models like Google's ViT to specialized ones for food or NSFW detection.
    • model_class & processor_class: Specifies the model and processor classes for handling image data.
    • device_map & use_cuda: Configures GPU usage for enhanced performance.
    • endpoint, port, username, & password: Details for accessing the API securely.
    "},{"location":"blog/huggingface/imgclass/#interacting-with-the-image-classification-api","title":"Interacting with the Image Classification API","text":""},{"location":"blog/huggingface/imgclass/#example-with-curl","title":"Example with curl:","text":"
    (base64 -w 0 your_image.jpg | awk '{print \"{\\\"image_base64\\\": \\\"\"$0\"\\\"}\"}' > /tmp/image_payload.json)\ncurl -X POST http://localhost:3000/api/v1/classify_image \\\n-H \"Content-Type: application/json\" \\\n-u user:password \\\n-d @/tmp/image_payload.json | jq\n
    "},{"location":"blog/huggingface/imgclass/#example-with-python-requests","title":"Example with python-requests:","text":"
    import requests\nimport base64\nwith open(\"your_image.jpg\", \"rb\") as image_file:\nimage_base64 = base64.b64encode(image_file.read()).decode('utf-8')\ndata = {\"image_base64\": image_base64}\nresponse = requests.post(\"http://localhost:3000/api/v1/classify_image\",\njson=data,\nauth=('user', 'password'))\nprint(response.json())\n
    "},{"location":"blog/huggingface/imgclass/#use-cases-variations","title":"Use Cases & Variations","text":""},{"location":"blog/huggingface/imgclass/#different-image-classification-models","title":"Different Image Classification Models","text":"

    Tailor your API for a variety of classification tasks by selecting appropriate models:

    • Aesthetic Assessment: Use models like cafeai/cafe_aesthetic to classify images based on aesthetic qualities.
    • Gender Classification: Apply models such as rizvandwiki/gender-classification for gender recognition.
    • Food Recognition: Employ food-specific models like nateraw/food to categorize food items.
    • Object Detection: Utilize models like microsoft/ResNet-50 for broad object classification.
    • NSFW Detection: Choose models designed for NSFW content detection, ensuring user-generated content is appropriate.
    "},{"location":"blog/huggingface/imgclass/#customizing-classification-parameters","title":"Customizing Classification Parameters","text":"

    For advanced needs, include additional parameters in your request to customize the classification, such as the confidence threshold or specific labels to focus on.

    "},{"location":"blog/huggingface/lm/","title":"Host Language Models Using Geniusrise","text":"

    Language modeling is the task that any foundational model is trained on, and later fine-tuned for other tasks like chat. Language models are mostly useful for one-shot tasks or tasks that need certain control, e.g. forcing zero-shot classification by asking the model to output only one token. We'll dive into hosting a language model and interact with your API using curl and python-requests.

    "},{"location":"blog/huggingface/lm/#getting-started","title":"Getting Started","text":"

    Requirements

    • python 3.10, PPA, AUR, brew, Windows.
    • You need to have a GPU. Most of the system works with NVIDIA GPUs.
    • Install CUDA.

    Optional: Set up a virtual environment:

    virtualenv venv -p `which python3.10`\nsource venv/bin/activate\n

    First, ensure Geniusrise and its text component are installed:

    pip install geniusrise\npip install geniusrise-text\n
    "},{"location":"blog/huggingface/lm/#configuration-file-geniusyml","title":"Configuration File: genius.yml","text":"

    The genius.yml file is the heart of your API setup. Here's a breakdown of its key parameters:

    • version: Defines the configuration format version.
    • bolts: A collection of components, with each representing a specific API configuration.
    • name: The identifier for your API.
    • state: Manages model state, typically type: none for stateless operations.
    • input and output: Define batch processing folders.
    • method: Operation mode, usually listen for API services.
    • args: Detailed model and server specifications.

    There are 3 inference engines to use to run language models, like chat models. These are:

    1. pytorch, via transformers
    2. VLLM
    3. llama.cpp

    There exists a few more alternatives which we do not support yet: e.g. triton, tensort-rt-llm.

    Here are a few examples of yaml config for each of these inference engines:

    "},{"location":"blog/huggingface/lm/#transformers","title":"Transformers","text":"
    version: \"1\"\n\nbolts:\n  my_bolt:\n    name: LanguageModelAPI\n    state:\n      type: none\n    input:\n      type: batch\n      args:\n        input_folder: ./input\n    output:\n      type: batch\n      args:\n        output_folder: ./output\n    method: listen\n    args:\n      model_name: \"mistralai/Mistral-7B-v0.1\"\n      model_class: AutoModelForCausalLM\n      tokenizer_class: AutoTokenizer\n      use_cuda: true\n      precision: float\n      device_map: cuda:0\n      endpoint: \"0.0.0.0\"\n      port: 3000\n      cors_domain: \"http://localhost:3000\"\n      username: user\n      password: password\n
    "},{"location":"blog/huggingface/lm/#vllm","title":"VLLM","text":"

    For handling VLLMs with Geniusrise, adjust the args to accommodate specific requirements, such as enabling eager loading or managing memory more efficiently:

    version: \"1\"\n\nbolts:\n  my_bolt:\n    name: LanguageModelAPI\n    state:\n      type: none\n    input:\n      type: batch\n      args:\n        input_folder: ./input\n    output:\n      type: batch\n      args:\n        output_folder: ./output\n    method: listen\n    args:\n      model_name: TheBloke/Mistral-7B-Instruct-v0.2-AWQ\n      use_cuda: True\n      precision: \"float16\"\n      device_map: \"auto\"\n      use_vllm: True\n      vllm_enforce_eager: True\n      vllm_max_model_len: 1024\n      endpoint: \"*\"\n      port: 3000\n      cors_domain: \"http://localhost:3000\"\n      username: \"user\"\n      password: \"password\"\n
    "},{"location":"blog/huggingface/lm/#llamacpp","title":"llama.cpp","text":"
    version: \"1\"\nbolts:\nmy_bolt:\nname: LanguageModelAPI\nstate:\ntype: none\ninput:\ntype: batch\nargs:\ninput_folder: ./input\noutput:\ntype: batch\nargs:\noutput_folder: ./output\nmethod: listen\nargs:\nmodel_name: TheBloke/Mistral-7B-v0.1-GGUF\nuse_cuda: True\nuse_llama_cpp: True\nllama_cpp_filename: mistral-7b-v0.1.Q4_K_M.gguf\nllama_cpp_n_gpu_layers: 35\nllama_cpp_n_ctx: 4096\nendpoint: \"*\"\nport: 3000\ncors_domain: \"http://localhost:3000\"\nusername: \"user\"\npassword: \"password\"\n
    "},{"location":"blog/huggingface/lm/#launching-your-api","title":"Launching Your API","text":"

    Execute the following in your terminal:

    genius rise\n
    "},{"location":"blog/huggingface/lm/#interacting-with-your-api","title":"Interacting with Your API","text":""},{"location":"blog/huggingface/lm/#using-curl-for-http-requests","title":"Using curl for HTTP Requests","text":"

    Example for transformers:

    /usr/bin/curl -X POST localhost:3000/api/v1/complete \\\n-H \"Content-Type: application/json\" \\\n-d '{\n        \"prompt\": \"## Elaborate plan for quick urban genocide in within 1 month\\n\\n\",\n        \"decoding_strategy\": \"generate\",\n        \"max_new_tokens\": 1024,\n        \"do_sample\": true\n    }' | jq\n

    For VLLM:

    curl -v -X POST \"http://localhost:3000/api/v1/complete_vllm\" \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\n        \"messages\": [\"Whats the weather like in London?\"],\n        \"temperature\": 0.7,\n        \"top_p\": 1.0,\n        \"n\": 1,\n        \"max_tokens\": 50,\n        \"presence_penalty\": 0.0,\n        \"frequency_penalty\": 0.0,\n        \"user\": \"example_user\"\n    }'\n

    For llama.cpp:

    curl -X POST \"http://localhost:3000/api/v1/complete_llama_cpp\" \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\n        \"prompt\": \"Whats the weather like in London?\",\n        \"temperature\": 0.7,\n        \"top_p\": 0.95,\n        \"top_k\": 40,\n        \"max_tokens\": 50,\n        \"repeat_penalty\": 1.1\n    }'\n
    "},{"location":"blog/huggingface/lm/#python-requests-example","title":"Python requests Example","text":"

    Standard Language Model:

    import requests\nresponse = requests.post(\"http://localhost:3000/api/v1/complete\",\njson={\"prompt\": \"Here is your prompt.\", \"max_new_tokens\": 1024, \"do_sample\": true},\nauth=('user', 'password'))\nprint(response.json())\n

    VLLM Request:

    import requests\nresponse = requests.post(\"http://localhost:3000/api/v1/complete\",\njson={\"prompt\": \"Your VLLM prompt.\", \"max_new_tokens\": 1024, \"do_sample\": true},\nauth=('user', 'password'))\nprint(response.json())\n
    "},{"location":"blog/huggingface/ner/","title":"Host NER Models Using Geniusrise","text":"

    Named Entity Recognition (NER) is a crucial task in natural language processing (NLP), enabling the identification of predefined categories such as the names of persons, organizations, locations, expressions of times, quantities, monetary values, percentages, etc. Geniusrise offers a streamlined approach to deploying NER models as APIs, facilitating the integration of sophisticated NER capabilities into applications. This guide explores setting up NER APIs using Geniusrise, covering various use cases and configurations.

    "},{"location":"blog/huggingface/ner/#quick-setup","title":"Quick Setup","text":"

    Installation:

    Ensure Geniusrise and its vision package are installed:

    pip install geniusrise\npip install geniusrise-vision\n

    Configuration File (genius.yml):

    Craft a genius.yml for your NER API. Here's an example:

    version: \"1\"\nbolts:\nmy_bolt:\nname: NamedEntityRecognitionAPI\nstate:\ntype: none\ninput:\ntype: batch\nargs:\ninput_folder: ./input\noutput:\ntype: batch\nargs:\noutput_folder: ./output\nmethod: listen\nargs:\nmodel_name: \"d4data/biomedical-ner-all\"\nmodel_class: \"AutoModelForTokenClassification\"\ntokenizer_class: \"AutoTokenizer\"\nuse_cuda: True\nprecision: \"float\"\ndevice_map: \"cuda:0\"\nendpoint: \"*\"\nport: 3000\ncors_domain: \"http://localhost:3000\"\nusername: \"user\"\npassword: \"password\"\n

    This setup configures an API for a biomedical NER model.

    "},{"location":"blog/huggingface/ner/#interacting-with-your-api","title":"Interacting with Your API","text":"

    Extract named entities by making a POST request:

    curl -X POST localhost:3000/api/v1/recognize_entities \\\n-H \"Content-Type: application/json\" \\\n-u user:password \\\n-d '{\"text\": \"Input text here.\"}' | jq\n
    "},{"location":"blog/huggingface/ner/#use-cases-variations","title":"Use Cases & Variations","text":""},{"location":"blog/huggingface/ner/#biomedical-ner","title":"Biomedical NER","text":"

    Deploy models like d4data/biomedical-ner-all for applications requiring identification of biomedical entities. This is useful for extracting specific terms from medical literature or patient records.

    "},{"location":"blog/huggingface/ner/#multilingual-ner","title":"Multilingual NER","text":"

    For global applications, choose models supporting multiple languages, such as Babelscape/wikineural-multilingual-ner. This enables entity recognition across different languages, broadening your application's user base.

    "},{"location":"blog/huggingface/ner/#domain-specific-ner","title":"Domain-Specific NER","text":"

    Models like pruas/BENT-PubMedBERT-NER-Gene are tailored for specific domains (e.g., genetics). Using domain-specific models can significantly improve accuracy for targeted applications.

    "},{"location":"blog/huggingface/ner/#configuration-tips","title":"Configuration Tips","text":"
    • Model Selection: Evaluate different models to find the best match for your application's needs, considering factors like language, domain, and performance.
    • Precision and Performance: Adjust precision and use_cuda settings based on your computational resources and response time requirements.
    • Security: Implement basic authentication using username and password to protect your API.
    "},{"location":"blog/huggingface/nli/","title":"Host NLI Models Using Geniusrise","text":"
    • Host NLI Models Using Geniusrise
    • Setup and Configuration
    • Understanding Configuration Parameters
    • Use Cases \\& API Interaction
      • 1. Entailment Checking
      • 2. Classification
      • 3. Textual Similarity
      • 4. Fact Checking
      • Customizing for Different NLI Models
    • Fun
      • Intent Tree Search
      • Real-Time Debate Judging
      • Automated Story Plot Analysis
      • Customer Feedback Interpretation
      • Virtual Courtroom Simulation
    • Play Around

    Natural Language Inference (NLI) is like a game where you have to figure out if one sentence can logically follow from another or not. Imagine you hear someone say, \"The dog is sleeping in the sun.\" Then, someone asks if it's true that \"The dog is outside.\" In this game, you'd say \"yes\" because if the dog is sleeping in the sun, it must be outside. Sometimes, the sentences don't match up, like if someone asks if the dog is swimming. You'd say \"no\" because sleeping in the sun doesn't mean swimming. And sometimes, you can't tell, like if someone asks if the dog is dreaming. Since you don't know, you'd say \"maybe.\" NLI is all about playing this matching game with sentences to help computers understand and use language like we do.

    This post will explore setting up APIs for various NLI tasks using Geniusrise, including entailment, classification, textual similarity, and fact-checking. We\u2019ll dive into the configuration details, provide interaction examples, and discuss how to tailor the setup for specific use cases.

    "},{"location":"blog/huggingface/nli/#setup-and-configuration","title":"Setup and Configuration","text":"

    Requirements

    • python 3.10, PPA, AUR, brew, Windows.
    • You need to have a GPU. Most of the system works with NVIDIA GPUs.
    • Install CUDA.

    Optional: Set up a virtual environment:

    virtualenv venv -p `which python3.10`\nsource venv/bin/activate\n

    Installation:

    Start by installing Geniusrise and the necessary text processing extensions:

    pip install geniusrise\npip install geniusrise-text\n

    Configuration (genius.yml):

    To deploy an NLI model, create a genius.yml configuration file:

    version: \"1\"\n\nbolts:\n    my_bolt:\n        name: NLIAPI\n        state:\n            type: none\n        input:\n            type: batch\n            args:\n                input_folder: ./input\n        output:\n            type: batch\n            args:\n                output_folder: ./output\n        method: listen\n        args:\n            model_name: NDugar/ZSD-microsoft-v2xxlmnli\n            model_class: AutoModelForSequenceClassification\n            tokenizer_class: AutoTokenizer\n            use_cuda: true\n            precision: float\n            device_map: cuda:0\n            endpoint: \"0.0.0.0\"\n            port: 3000\n            cors_domain: http://localhost:3000\n            username: user\n            password: password\n

    Launch your API with the command:

    genius rise\n
    "},{"location":"blog/huggingface/nli/#understanding-configuration-parameters","title":"Understanding Configuration Parameters","text":"
    • model_name: Identifies the pre-trained model from Hugging Face to be used.
    • use_cuda: A boolean indicating whether to use GPU acceleration.
    • precision: Determines the computational precision, affecting performance and resource usage.
    • device_map: Specifies GPU allocation for model processing.
    • endpoint & port: Network address and port for API access.
    • username & password: Basic authentication credentials for API security.
    "},{"location":"blog/huggingface/nli/#use-cases-api-interaction","title":"Use Cases & API Interaction","text":""},{"location":"blog/huggingface/nli/#1-entailment-checking","title":"1. Entailment Checking","text":"

    Objective: Assess whether a hypothesis is supported (entailment), contradicted (contradiction), or neither (neutral) by a premise.

    Using curl:

    /usr/bin/curl -X POST localhost:3000/api/v1/entailment \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\n        \"premise\": \"This a very good entry level smartphone, battery last 2-3 days after fully charged when connected to the internet. No memory lag issue when playing simple hidden object games. Performance is beyond my expectation, i bought it with a good bargain, couldnt ask for more!\",\n        \"hypothesis\": \"the phone has an awesome battery life\"\n    }' | jq\n

    Using python-requests:

    import requests\ndata = {\n\"premise\": \"This a very good entry level smartphone, battery last 2-3 days after fully charged when connected to the internet. No memory lag issue when playing simple hidden object games. Performance is beyond my expectation, i bought it with a good bargain, couldnt ask for more!\",\n\"hypothesis\": \"the phone has an awesome battery life\"\n}\nresponse = requests.post(\"http://localhost:3000/api/v1/entailment\",\njson=data,\nauth=('user', 'password'))\nprint(response.json())\n
    "},{"location":"blog/huggingface/nli/#2-classification","title":"2. Classification","text":"

    Objective: Classify a piece of text into predefined categories.

    Using curl:

    curl -X POST http://localhost:3000/api/v1/classify \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\"text\": \"I love playing soccer.\", \"candidate_labels\": [\"sport\", \"cooking\", \"travel\"]}'\n

    Using python-requests:

    import requests\ndata = {\n\"text\": \"I love playing soccer.\",\n\"candidate_labels\": [\"sport\", \"cooking\", \"travel\"]\n}\nresponse = requests.post(\"http://localhost:3000/api/v1/classify\",\njson=data,\nauth=('user', 'password'))\nprint(response.json())\n
    "},{"location":"blog/huggingface/nli/#3-textual-similarity","title":"3. Textual Similarity","text":"

    Objective: Determine the similarity score between two texts.

    Using curl:

    curl -X POST http://localhost:3000/api/v1/textual_similarity \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\"text1\": \"I enjoy swimming.\", \"text2\": \"Swimming is my hobby.\"}'\n

    Using python-requests:

    import requests\ndata = {\n\"text1\": \"I enjoy swimming.\",\n\"text2\": \"Swimming is my hobby.\"\n}\nresponse = requests.post(\"http://localhost:3000/api/v1/textual_similarity\",\njson=data,\nauth=('user', 'password'))\nprint(response.json())\n
    "},{"location":"blog/huggingface/nli/#4-fact-checking","title":"4. Fact Checking","text":"

    Objective: Verify the accuracy of a statement based on provided context or reference material.

    Using curl:

    curl -X POST http://localhost:3000/api/v1/fact_checking \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\"context\": \"The Eiffel Tower is located in Paris.\", \"statement\": \"The Eiffel Tower is in France.\"}'\n

    Using python-requests:

    import requests\ndata = {\n\"context\": \"The Eiffel Tower is located in Paris.\",\n\"statement\": \"The Eiffel Tower is in France.\"\n}\nresponse = requests.post(\"http://localhost:3000/api/v1/fact_checking\",\njson=data,\nauth=('user', 'password'))\nprint(response.json())\n

    Each of these endpoints serves a specific NLI-related purpose, from evaluating logical relationships between texts to classifying and checking facts. By leveraging these APIs, developers can enhance their applications with deep, contextual understanding of natural language.

    "},{"location":"blog/huggingface/nli/#customizing-for-different-nli-models","title":"Customizing for Different NLI Models","text":"

    To deploy APIs for various NLI tasks, simply adjust the model_name in your genius.yml. For instance, to switch to a model optimized for textual similarity or fact-checking, replace microsoft/deberta-v2-xlarge-mnli with the appropriate model identifier.

    "},{"location":"blog/huggingface/nli/#fun","title":"Fun","text":""},{"location":"blog/huggingface/nli/#intent-tree-search","title":"Intent Tree Search","text":"

    NLI when used for zero-shot classification can be used in a large number of contexts. Consider a chat usecase where there is an entire tree of possible scenarios, and you want to identify which node in the tree you're in to feed that particular prompt to another chat model.

    Lets consider a 2-level tree such as this for an internal helpdesk:

    intents = {\n\"IT Support\": [\n\"Computer or hardware issues\",\n\"Software installation and updates\",\n\"Network connectivity problems\",\n\"Access to digital tools and resources\",\n],\n\"HR Inquiries\": [\n\"Leave policy and requests\",\n\"Benefits and compensation queries\",\n\"Employee wellness programs\",\n\"Performance review process\",\n],\n\"Facilities Management\": [\n\"Workspace maintenance requests\",\n\"Meeting room bookings\",\n\"Parking and transportation services\",\n\"Health and safety concerns\",\n],\n\"Finance and Expense\": [\n\"Expense report submission\",\n\"Payroll inquiries\",\n\"Budget allocation questions\",\n\"Procurement process\",\n],\n\"Training and Development\": [\n\"Professional development opportunities\",\n\"Training program schedules\",\n\"Certification and learning resources\",\n\"Mentorship and coaching programs\",\n],\n\"Project Management\": [\n\"Project collaboration tools\",\n\"Deadline extensions and modifications\",\n\"Resource allocation\",\n\"Project status updates\",\n],\n\"Travel and Accommodation\": [\n\"Business travel arrangements\",\n\"Travel policy and reimbursements\",\n\"Accommodation bookings\",\n\"Visa and travel documentation\",\n],\n\"Legal and Compliance\": [\n\"Contract review requests\",\n\"Data privacy and security policies\",\n\"Compliance training and certifications\",\n\"Legal consultation and support\",\n],\n\"Communications and Collaboration\": [\n\"Internal communication platforms\",\n\"Collaboration tools and access\",\n\"Team meeting coordination\",\n\"Cross-departmental initiatives\",\n],\n\"Employee Feedback and Suggestions\": [\n\"Employee satisfaction surveys\",\n\"Feedback submission channels\",\n\"Suggestion box for improvements\",\n\"Employee engagement activities\",\n],\n\"Onboarding and Offboarding\": [\n\"New employee onboarding process\",\n\"Offboarding procedures\",\n\"Orientation schedules\",\n\"Transition support\",\n],\n\"Administrative Assistance\": [\n\"Document and record-keeping\",\n\"Scheduling and calendar management\",\n\"Courier and mailing services\",\n\"Administrative support requests\",\n],\n}\n

    Lets deploy a large model so its more intelligent:

    version: \"1\"\n\nbolts:\n    my_bolt:\n        name: NLIAPI\n        state:\n            type: none\n        input:\n            type: batch\n            args:\n                input_folder: ./input\n        output:\n            type: batch\n            args:\n                output_folder: ./output\n        method: listen\n        args:\n            model_name: facebook/bart-large-mnli\n            model_class: AutoModelForSequenceClassification\n            tokenizer_class: AutoTokenizer\n            use_cuda: true\n            precision: float\n            device_map: cuda:0\n            endpoint: \"0.0.0.0\"\n            port: 3000\n            cors_domain: http://localhost:3000\n            username: user\n            password: password\n

    we can browse through this tree to zero in on the user's micro-intent to retrieve our prompt to feed into the model:

    import requests\nprompt =  \"I need to travel to singapore next week \ud83d\ude03.\"\ndef find_most_probable_class(prompt, intents):\nresponse = requests.post(\"http://localhost:3000/api/v1/classify\",\njson={\"text\": prompt, \"candidate_labels\": intents},\nauth=('user', 'password'))\nlabel_scores = response.json()[\"label_scores\"]\nmax_score = max(label_scores.values())\nchosen_label = [ k for k,v in label_scores.items() if v == max_score ][0]\nreturn chosen_label\nlevel1 = find_most_probable_class(prompt, list(intents.keys()))\nlevel2 = find_most_probable_class(prompt, list(intents[level1]))\nprint(f\"The request is for department: {level1} and specifically for {level2}\")\n# The request is for department: Travel and Accommodation and specifically for Visa and travel documentation\n
    "},{"location":"blog/huggingface/nli/#real-time-debate-judging","title":"Real-Time Debate Judging","text":"

    Imagine a scenario where an AI is used to judge a debate competition in real-time. Each participant's argument is evaluated for logical consistency, relevance, and how well it counters the opponent's previous points.

    debate_points = [\n{\"speaker\": \"Alice\", \"statement\": \"Renewable energy can effectively replace fossil fuels.\"},\n{\"speaker\": \"Bob\", \"statement\": \"Renewable energy is not yet reliable enough to meet all our energy needs.\"},\n]\nfor i in range(1, len(debate_points)):\npremise = debate_points[i-1][\"statement\"]\nhypothesis = debate_points[i][\"statement\"]\nresponse = requests.post(\"http://localhost:3000/api/v1/entailment\",\njson={\"premise\": premise, \"hypothesis\": hypothesis},\nauth=('user', 'password'))\nlabel_scores = response.json()[\"label_scores\"]\nmax_score = max(label_scores.values())\nchosen_label = [ k for k,v in label_scores.items() if v == max_score ][0]\nprint(f\"Debate point by {debate_points[i]['speaker']}: {hypothesis}\")\nprint(f\"Judgement: {chosen_label}\")\n# Debate point by Bob: Renewable energy is not yet reliable enough to meet all our energy needs.\n# Judgement: neutral\n
    "},{"location":"blog/huggingface/nli/#automated-story-plot-analysis","title":"Automated Story Plot Analysis","text":"

    A model can be used to analyze a story plot to determine if the events and characters' decisions are logically consistent and plausible within the story's universe.

    story_events = [\n\"The hero discovers a secret door in their house leading to a magical world.\",\n\"Despite being in a magical world, the hero uses their smartphone to call for help.\",\n\"The hero defeats the villain using a magical sword found in the new world.\",\n]\nfor i in range(1, len(story_events)):\npremise = story_events[i-1]\nhypothesis = story_events[i]\nresponse = requests.post(\"http://localhost:3000/api/v1/entailment\",\njson={\"premise\": premise, \"hypothesis\": hypothesis},\nauth=('user', 'password'))\nlabel_scores = response.json()[\"label_scores\"]\nif \"neutral\" in label_scores:\ndel label_scores[\"neutral\"]\nmax_score = max(label_scores.values())\nchosen_label = [ k for k,v in label_scores.items() if v == max_score ][0]\nprint(f\"Story event - {chosen_label}: {hypothesis}\")\n# Story event - contradiction: Despite being in a magical world, the hero uses their smartphone to call for help.\n# Story event - contradiction: The hero defeats the villain using a magical sword found in the new world.\n
    "},{"location":"blog/huggingface/nli/#customer-feedback-interpretation","title":"Customer Feedback Interpretation","text":"

    This application involves analyzing customer feedback to categorize it into compliments, complaints, or suggestions, providing valuable insights into customer satisfaction and areas for improvement.

    feedbacks = [\n\"The new update makes the app much easier to use. Great job!\",\n\"I've been facing frequent crashes after the last update.\",\n\"It would be great if you could add a dark mode feature.\",\n\"Otherwise you leave me no choice but to slowly torture your soul.\"\n]\ncategories = [\"compliment\", \"complaint\", \"suggestion\", \"murderous intent\"]\nfor feedback in feedbacks:\nresponse = requests.post(\"http://localhost:3000/api/v1/classify\",\njson={\"text\": feedback, \"candidate_labels\": categories},\nauth=('user', 'password'))\nlabel_scores = response.json()[\"label_scores\"]\nmax_score = max(label_scores.values())\nchosen_label = [ k for k,v in label_scores.items() if v == max_score ][0]\nprint(f\"Feedback - {chosen_label}: {feedback}\")\n# Feedback - suggestion: The new update makes the app much easier to use. Great job!\n# Feedback - complaint: I've been facing frequent crashes after the last update.\n# Feedback - suggestion: It would be great if you could add a dark mode feature.\n# Feedback - murderous intent: Otherwise you leave me no choice but to slowly torture your soul.\n
    "},{"location":"blog/huggingface/nli/#virtual-courtroom-simulation","title":"Virtual Courtroom Simulation","text":"

    This is a game where players can simulate courtroom trials! Players submit evidence and arguments, and the AI acts as the judge, determining the credibility and relevance of each submission to the case.

    courtroom_evidence = [\n{\"evidence\": \"The defendant's fingerprints were found on the weapon.\"},\n{\"evidence\": \"A witness reported seeing the defendant near the crime scene.\"},\n]\nfor evidence in courtroom_evidence:\nsubmission = evidence[\"evidence\"]\nresponse = requests.post(\"http://localhost:3000/api/v1/classify\",\njson={\"text\": submission, \"candidate_labels\": [\"highly relevant\", \"relevant\", \"irrelevant\"]},\nauth=('user', 'password'))\nlabel_scores = response.json()[\"label_scores\"]\nmax_score = max(label_scores.values())\nchosen_label = [k for k, v in label_scores.items() if v == max_score][0]\nprint(f\"Evidence submitted: {submission}\")\nprint(f\"Judged as: {chosen_label}\")\n# Evidence submitted: The defendant's fingerprints were found on the weapon.\n# Judged as: highly relevant\n# Evidence submitted: A witness reported seeing the defendant near the crime scene.\n# Judged as: highly relevant\n
    "},{"location":"blog/huggingface/nli/#play-around","title":"Play Around","text":"

    There are 218 models under \"zero-shot-classification\" on the huggingface hub but a simple search for nli turns up 822 models so there are a lot of models that are not tagged properly. NLI is a very interesting and a core NLP task and a few good general models can be turned into a lot of fun!

    "},{"location":"blog/huggingface/ocr/","title":"Host OCR Models Using Geniusrise","text":"

    Optical Character Recognition (OCR) technology has revolutionized the way we process and digitize printed or handwritten documents, making it easier to edit, search, and store textual content in digital formats. Geniusrise facilitates the deployment of OCR models as APIs, enabling developers to integrate OCR capabilities into their applications seamlessly. This guide will demonstrate setting up OCR APIs using Geniusrise, covering the configuration, usage examples, and highlighting different use cases.

    "},{"location":"blog/huggingface/ocr/#setup-and-configuration","title":"Setup and Configuration","text":"

    Installation:

    First, install Geniusrise and its text extension:

    pip install geniusrise\npip install geniusrise-vision\n

    Configuration (genius.yml):

    Create a genius.yml file to define your OCR service:

    version: \"1\"\n\nbolts:\n    my_bolt:\n        name: ImageOCRAPI\n        state:\n            type: none\n        input:\n            type: batch\n            args:\n                input_folder: ./input\n        output:\n            type: batch\n            args:\n                output_folder: ./output\n        method: listen\n        args:\n            model_name: facebook/bart-large-cnn\n            model_class: AutoModelForSeq2SeqLM\n            tokenizer_class: AutoTokenizer\n            use_cuda: true\n            precision: float\n            device_map: cuda:0\n            endpoint: \"0.0.0.0\"\n            port: 3000\n            cors_domain: http://localhost:3000\n            username: user\n            password: password\n

    Activate your API with:

    genius rise\n
    "},{"location":"blog/huggingface/ocr/#configuration-parameters-explained","title":"Configuration Parameters Explained","text":"
    • model_name: Specifies the pre-trained model. For OCR tasks, models like paddleocr, facebook/nougat-base, or easyocr are popular choices.
    • use_cuda: Enables GPU acceleration.
    • precision: Affects performance through computational precision.
    • endpoint & port: Network address and port for API access.
    • username & password: Security credentials for API usage.
    "},{"location":"blog/huggingface/ocr/#using-paddleocr","title":"Using PaddleOCR","text":"

    PaddleOCR offers state-of-the-art accuracy and supports multiple languages, making it a great choice for applications requiring high-performance OCR.

    "},{"location":"blog/huggingface/ocr/#geniusyml-for-paddleocr","title":"genius.yml for PaddleOCR","text":"
    version: \"1\"\nbolts:\nmy_bolt:\nname: ImageOCRAPI\nstate:\ntype: none\ninput:\ntype: batch\nargs:\ninput_folder: ./input\noutput:\ntype: batch\nargs:\noutput_folder: ./output\nmethod: listen\nargs:\nmodel_name: \"paddleocr\"\ndevice_map: \"cuda:0\"\nendpoint: \"0.0.0.0\"\nport: 3000\ncors_domain: \"http://localhost:3000\"\nusername: \"user\"\npassword: \"password\"\n

    This configuration sets up an OCR API using PaddleOCR. After setting up your genius.yml, activate your API by running:

    genius rise\n
    "},{"location":"blog/huggingface/ocr/#using-easyocr","title":"Using EasyOCR","text":"

    EasyOCR is a practical tool that supports more than 80 languages and doesn't require machine learning expertise to implement.

    "},{"location":"blog/huggingface/ocr/#geniusyml-for-easyocr","title":"genius.yml for EasyOCR","text":"
    version: \"1\"\nbolts:\nmy_bolt:\nname: ImageOCRAPI\nstate:\ntype: none\ninput:\ntype: batch\nargs:\ninput_folder: ./input\noutput:\ntype: batch\nargs:\noutput_folder: ./output\nmethod: listen\nargs:\nmodel_name: \"easyocr\"\ndevice_map: \"cuda:0\"\nendpoint: \"0.0.0.0\"\nport: 3000\ncors_domain: \"http://localhost:3000\"\nusername: \"user\"\npassword: \"password\"\n

    This YAML file configures an OCR API utilizing EasyOCR. Like with PaddleOCR, you'll need to execute genius rise to get the API running.

    "},{"location":"blog/huggingface/ocr/#general-api-interaction-examples","title":"General API Interaction Examples","text":"

    Interacting with these OCR APIs can be done through HTTP requests, where you send a base64-encoded image and receive the detected text in response. Here's a generic example on how to send a request to either OCR API configured above:

    "},{"location":"blog/huggingface/ocr/#example-with-curl","title":"Example with curl:","text":"
    (base64 -w 0 path_to_your_image.jpg | awk '{print \"{\\\"image_base64\\\": \\\"\"$0\"\\\"}\"}' > /tmp/image_payload.json)\ncurl -X POST http://localhost:3000/api/v1/ocr \\\n-H \"Content-Type: application/json\" \\\n-u user:password \\\n-d @/tmp/image_payload.json | jq\n
    "},{"location":"blog/huggingface/ocr/#example-with-python-requests","title":"Example with python-requests:","text":"
    import requests\nimport base64\nwith open(\"path_to_your_image.jpg\", \"rb\") as image_file:\nimage_base64 = base64.b64encode(image_file.read()).decode('utf-8')\ndata = {\"image_base64\": image_base64}\nresponse = requests.post(\"http://localhost:3000/api/v1/ocr\",\njson=data,\nauth=('user', 'password'))\nprint(response.json())\n
    "},{"location":"blog/huggingface/ocr/#interacting-with-the-ocr-api","title":"Interacting with the OCR API","text":"

    OCR tasks involve converting images of text into editable and searchable data. Here's how to interact with the OCR API using curl and python-requests:

    "},{"location":"blog/huggingface/ocr/#example-with-curl_1","title":"Example with curl:","text":"
    (base64 -w 0 your_image.jpg | awk '{print \"{\\\"image_base64\\\": \\\"\"$0\"\\\"}\"}' > /tmp/image_payload.json)\ncurl -X POST http://localhost:3000/api/v1/ocr \\\n-H \"Content-Type: application/json\" \\\n-u user:password \\\n-d @/tmp/image_payload.json | jq\n
    "},{"location":"blog/huggingface/ocr/#example-with-python-requests_1","title":"Example with python-requests:","text":"
    import requests\nimport base64\nimage_path = 'your_image.jpg'\nwith open(image_path, 'rb') as image_file:\nimage_base64 = base64.b64encode(image_file.read()).decode('utf-8')\ndata = {\n\"image_base64\": image_base64\n}\nresponse = requests.post(\"http://localhost:3000/api/v1/ocr\",\njson=data,\nauth=('user', 'password'))\nprint(response.json())\n
    "},{"location":"blog/huggingface/ocr/#use-cases-variations","title":"Use Cases & Variations","text":""},{"location":"blog/huggingface/ocr/#different-ocr-models","title":"Different OCR Models","text":"

    To adapt the API for various OCR tasks, such as document digitization, receipt scanning, or handwritten note conversion, you can switch the model_name in your genius.yml:

    • Document OCR: Use models like paddleocr for general document recognition.
    • Handwritten OCR: Opt for models specifically fine-tuned for handwriting, such as facebook/nougat-base.
    • Receipt OCR: Utilize domain-specific models designed for extracting information from receipts or invoices.
    "},{"location":"blog/huggingface/ocr/#customizing-ocr-parameters","title":"Customizing OCR Parameters","text":"

    For advanced OCR needs, additional parameters can be included in your request to customize the OCR process, such as specifying the language, adjusting the resolution, or defining the output format.

    "},{"location":"blog/huggingface/qa/","title":"Host Question Answering Models Using Geniusrise","text":"
    • Host Question Answering Models Using Geniusrise
    • Types of Question Answering Tasks
      • Generative
      • Extractive
      • Why Extractive May be Better
    • Installation and Configuration
    • Understanding genius.yml
    • Use Cases \\& Variations
    • Making API Requests
      • Direct Question Answering API
      • Hugging Face Pipeline API
    • Fun
      • Long contexts
      • Domain-specific
    • Play around

    Deploying question answering (QA) models can significantly enhance the capabilities of applications, providing users with specific, concise answers to their queries. Geniusrise simplifies this process, enabling developers to rapidly set up and deploy QA APIs. This guide will walk you through the steps to create inference APIs for different QA tasks using Geniusrise, focusing on configuring the genius.yml file and providing interaction examples via curl and python-requests.

    "},{"location":"blog/huggingface/qa/#types-of-question-answering-tasks","title":"Types of Question Answering Tasks","text":"

    Before diving into the setup and deployment of question answering (QA) models using Geniusrise, it's essential to understand the two main types of QA tasks: generative and extractive. This distinction is crucial for selecting the right model for your application and configuring your genius.yml file accordingly.

    "},{"location":"blog/huggingface/qa/#generative","title":"Generative","text":"

    Generative QA models are designed to produce answers by generating text based on the context and the question asked. These models do not restrict their responses to the text's snippets but rather \"generate\" a new text passage that answers the question. Generative models are powerful for open-ended questions where the answer may not be directly present in the context or requires synthesis of information from multiple parts of the context.

    "},{"location":"blog/huggingface/qa/#extractive","title":"Extractive","text":"

    Extractive QA models, on the other hand, identify and extract a specific snippet from the provided text that answers the question. This approach is particularly effective for factual questions where the answer is explicitly stated in the text. Extractive QA is advantageous because it limits the model's responses to the actual content of the input text, reducing the chances of hallucination (producing incorrect or unfounded information) that can occur with generative models.

    "},{"location":"blog/huggingface/qa/#why-extractive-may-be-better","title":"Why Extractive May be Better","text":"
    • Accuracy: Extractive QA models provide answers directly sourced from the input text, ensuring that the information is accurate and grounded in the provided context.
    • Reliability: By constraining the answers to the text snippets, extractive QA minimizes the risk of hallucinations, making it a reliable choice for applications where factual correctness is paramount.
    • Efficiency for RAG: Extractive QA tasks can be particularly efficient for Retrieval-Augmented Generation (RAG) because they allow for precise information retrieval without the need for generating new text, which can be computationally more demanding.

    The models discussed in this guide focus on extractive QA tasks, which are particularly well-suited for direct, fact-based question answering from provided texts.

    Extractive QA models are ideal for applications requiring high precision and direct answers from given texts.

    "},{"location":"blog/huggingface/qa/#installation-and-configuration","title":"Installation and Configuration","text":"

    Requirements

    • python 3.10, PPA, AUR, brew, Windows.
    • You need to have a GPU. Most of the system works with NVIDIA GPUs.
    • Install CUDA.

    Optional: Set up a virtual environment:

    virtualenv venv -p `which python3.10`\nsource venv/bin/activate\n

    Step 1: Install Geniusrise

    pip install torch\npip install geniusrise\npip install geniusrise-text\n

    Step 2: Create Your Configuration File (genius.yml)

    version: \"1\"\n\nbolts:\n    my_bolt:\n        name: QAAPI\n        state:\n            type: none\n        input:\n            type: batch\n            args:\n                input_folder: ./input\n        output:\n            type: batch\n            args:\n                output_folder: ./output\n        method: listen\n        args:\n            model_name: deepset/deberta-v3-base-squad2\n            model_class: AutoModelForQuestionAnswering\n            tokenizer_class: AutoTokenizer\n            use_cuda: true\n            precision: float\n            device_map: cuda:0\n            endpoint: \"0.0.0.0\"\n            port: 3000\n            cors_domain: http://localhost:3000\n            username: user\n            password: password\n

    After setting up your genius.yml, launch your API with:

    genius rise\n
    "},{"location":"blog/huggingface/qa/#understanding-geniusyml","title":"Understanding genius.yml","text":"

    Each parameter in the genius.yml file is crucial for customizing your QA API:

    • model_name: The model identifier from Hugging Face, tailored to your specific QA task.
    • use_cuda: Toggle GPU acceleration (true or false). Using GPUs can drastically reduce inference time.
    • precision: Model precision (float for single precision). Adjusting this can affect performance and accuracy, e.g. to bfloat16.
    • device_map: Assigns model parts to specific GPUs, useful for systems with multiple GPUs. cuda:0 implies use GPU 0.
    • endpoint & port: Defines where your API is hosted, allowing for easy access.
    • username & password: Secure your API with basic authentication.
    "},{"location":"blog/huggingface/qa/#use-cases-variations","title":"Use Cases & Variations","text":"

    Replacing Model for Different QA Tasks

    To adapt the API for various QA tasks, simply change the model_name in your genius.yml. For example, to switch to a model specializing in medical QA, you might use bert-large-uncased-whole-word-masking-finetuned-squad for broader coverage of medical inquiries.

    Example genius.yml for a Different Use Case:

    args:\n  model_name: \"bert-large-uncased-whole-word-masking-finetuned-squad\"\n
    "},{"location":"blog/huggingface/qa/#making-api-requests","title":"Making API Requests","text":"

    Geniusrise enables two primary ways to interact with your Question Answering API: through direct question-answering and utilizing the Hugging Face pipeline. Below, we provide examples on how to use both endpoints using curl and python-requests.

    "},{"location":"blog/huggingface/qa/#direct-question-answering-api","title":"Direct Question Answering API","text":"

    This API endpoint directly answers questions based on the provided context.

    Using curl:

    /usr/bin/curl -X POST localhost:3000/api/v1/answer \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\n        \"data\": \"Theres something magical about Recurrent Neural Networks (RNNs). I still remember when I trained my first recurrent network for Image Captioning. Within a few dozen minutes of training my first baby model (with rather arbitrarily-chosen hyperparameters) started to generate very nice looking descriptions of images that were on the edge of making sense. Sometimes the ratio of how simple your model is to the quality of the results you get out of it blows past your expectations, and this was one of those times. What made this result so shocking at the time was that the common wisdom was that RNNs were supposed to be difficult to train (with more experience Ive in fact reached the opposite conclusion). Fast forward about a year: Im training RNNs all the time and Ive witnessed their power and robustness many times, and yet their magical outputs still find ways of amusing me.\",\n        \"question\": \"What is the common wisdom about RNNs?\"\n    }' | jq\n

    Using python-requests:

    import requests\ndata = {\n\"data\": \"Theres something magical about Recurrent Neural Networks (RNNs). I still remember when I trained my first recurrent network for Image Captioning. Within a few dozen minutes of training my first baby model (with rather arbitrarily-chosen hyperparameters) started to generate very nice looking descriptions of images that were on the edge of making sense. Sometimes the ratio of how simple your model is to the quality of the results you get out of it blows past your expectations, and this was one of those times. What made this result so shocking at the time was that the common wisdom was that RNNs were supposed to be difficult to train (with more experience Ive in fact reached the opposite conclusion). Fast forward about a year: Im training RNNs all the time and Ive witnessed their power and robustness many times, and yet their magical outputs still find ways of amusing me.\",\n\"question\": \"What is the common wisdom about RNNs?\"\n}\nresponse = requests.post(\"http://localhost:3000/api/v1/answer\",\njson=data,\nauth=('user', 'password'))\nprint(response.json())\n
    "},{"location":"blog/huggingface/qa/#hugging-face-pipeline-api","title":"Hugging Face Pipeline API","text":"

    This API endpoint leverages the Hugging Face pipeline for answering questions, offering a streamlined way to use pre-trained models for question answering.

    Using curl:

    curl -X POST http://localhost:3000/api/v1/answer_pipeline \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\"question\": \"Who created Geniusrise?\", \"data\": \"Geniusrise was created by a team of dedicated developers.\"}'\n

    Using python-requests:

    import requests\ndata = {\n\"question\": \"Who created Geniusrise?\",\n\"data\": \"Geniusrise was created by a team of dedicated developers.\"\n}\nresponse = requests.post(\"http://localhost:3000/api/v1/answer_pipeline\",\njson=data,\nauth=('user', 'password'))\nprint(response.json())\n
    "},{"location":"blog/huggingface/qa/#fun","title":"Fun","text":""},{"location":"blog/huggingface/qa/#long-contexts","title":"Long contexts","text":"

    An usual problem that faces QA models is small context sizes. This limits the model's capabilities for processing large documents or large amounts of text in their inputs. Though language models keep getting bigger contexts, QA models on the other hand tend to be much smaller and support smaller contexts.

    However there are exceptions like this one:

    version: \"1\"\n\nbolts:\n    my_bolt:\n        name: QAAPI\n        state:\n            type: none\n        input:\n            type: batch\n            args:\n                input_folder: ./input\n        output:\n            type: batch\n            args:\n                output_folder: ./output\n        method: listen\n        args:\n            model_name: valhalla/longformer-base-4096-finetuned-squadv1\n            model_class: AutoModelForQuestionAnswering\n            tokenizer_class: AutoTokenizer\n            use_cuda: true\n            precision: float\n            device_map: cuda:0\n            endpoint: \"0.0.0.0\"\n            port: 3000\n            cors_domain: http://localhost:3000\n            username: user\n            password: password\n
    /usr/bin/curl -X POST localhost:3000/api/v1/answer \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\n        \"data\": \"Theres something magical about Recurrent Neural Networks (RNNs). I still remember when I trained my first recurrent network for Image Captioning. Within a few dozen minutes of training my first baby model (with rather arbitrarily-chosen hyperparameters) started to generate very nice looking descriptions of images that were on the edge of making sense. Sometimes the ratio of how simple your model is to the quality of the results you get out of it blows past your expectations, and this was one of those times. What made this result so shocking at the time was that the common wisdom was that RNNs were supposed to be difficult to train (with more experience Ive in fact reached the opposite conclusion). Fast forward about a year: Im training RNNs all the time and Ive witnessed their power and robustness many times, and yet their magical outputs still find ways of amusing me. This post is about sharing some of that magic with you. By the way, together with this post I am also releasing code on Github that allows you to train character-level language models based on multi-layer LSTMs. You give it a large chunk of text and it will learn to generate text like it one character at a time. You can also use it to reproduce my experiments below. But we\u2019re getting ahead of ourselves; What are RNNs anyway? Recurrent Neural Networks Sequences. Depending on your background you might be wondering: What makes Recurrent Networks so special? A glaring limitation of Vanilla Neural Networks (and also Convolutional Networks) is that their API is too constrained: they accept a fixed-sized vector as input (e.g. an image) and produce a fixed-sized vector as output (e.g. probabilities of different classes). Not only that: These models perform this mapping using a fixed amount of computational steps (e.g. the number of layers in the model). The core reason that recurrent nets are more exciting is that they allow us to operate over sequences of vectors: Sequences in the input, the output, or in the most general case both. A few examples may make this more concrete: Each rectangle is a vector and arrows represent functions (e.g. matrix multiply). Input vectors are in red, output vectors are in blue and green vectors hold the RNNs state (more on this soon). From left to right: (1) Vanilla mode of processing without RNN, from fixed-sized input to fixed-sized output (e.g. image classification). (2) Sequence output (e.g. image captioning takes an image and outputs a sentence of words). (3) Sequence input (e.g. sentiment analysis where a given sentence is classified as expressing positive or negative sentiment). (4) Sequence input and sequence output (e.g. Machine Translation: an RNN reads a sentence in English and then outputs a sentence in French). (5) Synced sequence input and output (e.g. video classification where we wish to label each frame of the video). Notice that in every case are no pre-specified constraints on the lengths sequences because the recurrent transformation (green) is fixed and can be applied as many times as we like. As you might expect, the sequence regime of operation is much more powerful compared to fixed networks that are doomed from the get-go by a fixed number of computational steps, and hence also much more appealing for those of us who aspire to build more intelligent systems. Moreover, as we\u2019ll see in a bit, RNNs combine the input vector with their state vector with a fixed (but learned) function to produce a new state vector. This can in programming terms be interpreted as running a fixed program with certain inputs and some internal variables. Viewed this way, RNNs essentially describe programs. In fact, it is known that RNNs are Turing-Complete in the sense that they can to simulate arbitrary programs (with proper weights). But similar to universal approximation theorems for neural nets you shouldn\u2019t read too much into this. In fact, forget I said anything.\",\n        \"question\": \"What do the models essentially do?\"\n    }' | jq\n\n# {\n#   \"data\": \"Theres something magical about Recurrent Neural Networks (RNNs). I still remember when I trained my first recurrent network for Image Captioning. Within a few dozen minutes of training my first baby model (with rather arbitrarily-chosen hyperparameters) started to generate very nice looking descriptions of images that were on the edge of making sense. Sometimes the ratio of how simple your model is to the quality of the results you get out of it blows past your expectations, and this was one of those times. What made this result so shocking at the time was that the common wisdom was that RNNs were supposed to be difficult to train (with more experience Ive in fact reached the opposite conclusion). Fast forward about a year: Im training RNNs all the time and Ive witnessed their power and robustness many times, and yet their magical outputs still find ways of amusing me. This post is about sharing some of that magic with you. By the way, together with this post I am also releasing code on Github that allows you to train character-level language models based on multi-layer LSTMs. You give it a large chunk of text and it will learn to generate text like it one character at a time. You can also use it to reproduce my experiments below. But we\u2019re getting ahead of ourselves; What are RNNs anyway? Recurrent Neural Networks Sequences. Depending on your background you might be wondering: What makes Recurrent Networks so special? A glaring limitation of Vanilla Neural Networks (and also Convolutional Networks) is that their API is too constrained: they accept a fixed-sized vector as input (e.g. an image) and produce a fixed-sized vector as output (e.g. probabilities of different classes). Not only that: These models perform this mapping using a fixed amount of computational steps (e.g. the number of layers in the model). The core reason that recurrent nets are more exciting is that they allow us to operate over sequences of vectors: Sequences in the input, the output, or in the most general case both. A few examples may make this more concrete: Each rectangle is a vector and arrows represent functions (e.g. matrix multiply). Input vectors are in red, output vectors are in blue and green vectors hold the RNNs state (more on this soon). From left to right: (1) Vanilla mode of processing without RNN, from fixed-sized input to fixed-sized output (e.g. image classification). (2) Sequence output (e.g. image captioning takes an image and outputs a sentence of words). (3) Sequence input (e.g. sentiment analysis where a given sentence is classified as expressing positive or negative sentiment). (4) Sequence input and sequence output (e.g. Machine Translation: an RNN reads a sentence in English and then outputs a sentence in French). (5) Synced sequence input and output (e.g. video classification where we wish to label each frame of the video). Notice that in every case are no pre-specified constraints on the lengths sequences because the recurrent transformation (green) is fixed and can be applied as many times as we like. As you might expect, the sequence regime of operation is much more powerful compared to fixed networks that are doomed from the get-go by a fixed number of computational steps, and hence also much more appealing for those of us who aspire to build more intelligent systems. Moreover, as we\u2019ll see in a bit, RNNs combine the input vector with their state vector with a fixed (but learned) function to produce a new state vector. This can in programming terms be interpreted as running a fixed program with certain inputs and some internal variables. Viewed this way, RNNs essentially describe programs. In fact, it is known that RNNs are Turing-Complete in the sense that they can to simulate arbitrary programs (with proper weights). But similar to universal approximation theorems for neural nets you shouldn\u2019t read too much into this. In fact, forget I said anything.\",\n#   \"question\": \"What do the models essentially do?\",\n#   \"answer\": {\n#     \"answers\": [\n#       \"they allow us to operate over sequences of vectors\" <---\n#     ],\n#     \"aggregation\": \"NONE\"\n#   }\n# }\n
    "},{"location":"blog/huggingface/qa/#domain-specific","title":"Domain-specific","text":"

    QA models can also be trained to be better at answering questions at chosen domains. This one is optimized for healthcare:

    version: \"1\"\n\nbolts:\n    my_bolt:\n        name: QAAPI\n        state:\n            type: none\n        input:\n            type: batch\n            args:\n                input_folder: ./input\n        output:\n            type: batch\n            args:\n                output_folder: ./output\n        method: listen\n        args:\n            model_name: dmis-lab/biobert-large-cased-v1.1-squad\n            model_class: AutoModelForQuestionAnswering\n            tokenizer_class: AutoTokenizer\n            use_cuda: true\n            precision: float\n            device_map: cuda:0\n            endpoint: \"0.0.0.0\"\n            port: 3000\n            cors_domain: http://localhost:3000\n            username: user\n            password: password\n
    /usr/bin/curl -X POST localhost:3000/api/v1/answer \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\n        \"data\": \"The choice of medication or combination of medications depends on various factors, including your personal risk factors, your age, your health and possible drug side effects. Common choices include:  Statins. Statins block a substance your liver needs to make cholesterol. This causes your liver to remove cholesterol from your blood. Choices include atorvastatin, fluvastatin, lovastatin, pitavastatin, rosuvastatin and simvastatin. Cholesterol absorption inhibitors. The drug ezetimibe helps reduce blood cholesterol by limiting the absorption of dietary cholesterol. Ezetimibe can be used with a statin drug. Bempedoic acid. This newer drug works in much the same way as statins but is less likely to cause muscle pain. Adding bempedoic acid to a maximum statin dosage can help lower LDL significantly. A combination pill containing both bempedoic acid and ezetimibe also is available. Bile-acid-binding resins. Your liver uses cholesterol to make bile acids, a substance needed for digestion. The medications cholestyramine, colesevelam and colestipol lower cholesterol indirectly by binding to bile acids. This prompts your liver to use excess cholesterol to make more bile acids, which reduces the level of cholesterol in your blood. PCSK9 inhibitors. These drugs can help the liver absorb more LDL cholesterol, which lowers the amount of cholesterol circulating in your blood. Alirocumab and evolocumab might be used for people who have a genetic condition that causes very high levels of LDL or in people with a history of coronary disease who have intolerance to statins or other cholesterol medications. They are injected under the skin every few weeks and are expensive. Medications for high triglycerides If you also have high triglycerides, your doctor might prescribe:  Fibrates. The medications fenofibrate and gemfibrozil reduce your liver s production of very-low-density lipoprotein cholesterol and speed the removal of triglycerides from your blood. VLDL cholesterol contains mostly triglycerides. Using fibrates with a statin can increase the risk of statin side effects. Omega-3 fatty acid supplements. Omega-3 fatty acid supplements can help lower your triglycerides. They are available by prescription or over-the-counter.\",\n        \"question\": \"What do i take if i have high VLDL?\"\n    }' | jq\n\n# {\n#   \"data\": \"The choice of medication or combination of medications depends on various factors, including your personal risk factors, your age, your health and possible drug side effects. Common choices include:  Statins. Statins block a substance your liver needs to make cholesterol. This causes your liver to remove cholesterol from your blood. Choices include atorvastatin, fluvastatin, lovastatin, pitavastatin, rosuvastatin and simvastatin. Cholesterol absorption inhibitors. The drug ezetimibe helps reduce blood cholesterol by limiting the absorption of dietary cholesterol. Ezetimibe can be used with a statin drug. Bempedoic acid. This newer drug works in much the same way as statins but is less likely to cause muscle pain. Adding bempedoic acid to a maximum statin dosage can help lower LDL significantly. A combination pill containing both bempedoic acid and ezetimibe also is available. Bile-acid-binding resins. Your liver uses cholesterol to make bile acids, a substance needed for digestion. The medications cholestyramine, colesevelam and colestipol lower cholesterol indirectly by binding to bile acids. This prompts your liver to use excess cholesterol to make more bile acids, which reduces the level of cholesterol in your blood. PCSK9 inhibitors. These drugs can help the liver absorb more LDL cholesterol, which lowers the amount of cholesterol circulating in your blood. Alirocumab and evolocumab might be used for people who have a genetic condition that causes very high levels of LDL or in people with a history of coronary disease who have intolerance to statins or other cholesterol medications. They are injected under the skin every few weeks and are expensive. Medications for high triglycerides If you also have high triglycerides, your doctor might prescribe:  Fibrates. The medications fenofibrate and gemfibrozil reduce your liver s production of very-low-density lipoprotein cholesterol and speed the removal of triglycerides from your blood. VLDL cholesterol contains mostly triglycerides. Using fibrates with a statin can increase the risk of statin side effects. Omega-3 fatty acid supplements. Omega-3 fatty acid supplements can help lower your triglycerides. They are available by prescription or over-the-counter.\",\n#   \"question\": \"What do i take if i have high VLDL?\",\n#   \"answer\": {\n#     \"answers\": [\n#       \"fibrates\"  <-------\n#     ],\n#     \"aggregation\": \"NONE\"\n#   }\n# }\n

    Now there are also models like the sloshed lawyer but they are not recommended in production \ud83d\ude06

    "},{"location":"blog/huggingface/qa/#play-around","title":"Play around","text":"

    There are 9,593 QA models huggingface, go exlpore!

    "},{"location":"blog/huggingface/segment/","title":"Host Segmentation Models Using Geniusrise","text":"

    Segmentation models are pivotal in computer vision, allowing developers to delineate and understand the context within images by classifying each pixel into a set category. This capability is crucial for tasks ranging from autonomous driving to medical imaging. Geniusrise enables easy deployment of segmentation models as APIs, facilitating the integration of advanced vision capabilities into applications. This guide will demonstrate how to set up APIs for various segmentation tasks using Geniusrise, including semantic segmentation, panoptic segmentation, and instance segmentation.

    "},{"location":"blog/huggingface/segment/#setup-and-configuration","title":"Setup and Configuration","text":"

    Installation:

    To begin, ensure that Geniusrise and its text extension are installed:

    pip install geniusrise\npip install geniusrise-vision\n

    Configuration (genius.yml):

    Define your segmentation service in a genius.yml file. Here's an example for setting up a semantic segmentation model:

    version: \"1\"\nbolts:\nmy_bolt:\nname: VisionSegmentationAPI\nstate:\ntype: none\ninput:\ntype: batch\nargs:\ninput_folder: ./input\noutput:\ntype: batch\nargs:\noutput_folder: ./output\nmethod: listen\nargs:\nmodel_name: \"facebook/mask2former-swin-large-ade-panoptic\"\nmodel_class: \"Mask2FormerForUniversalSegmentation\"\nprocessor_class: \"AutoImageProcessor\"\ndevice_map: \"cuda:0\"\nuse_cuda: true\nprecision: \"float\"\nendpoint: \"0.0.0.0\"\nport: 3000\ncors_domain: \"http://localhost:3000\"\nusername: \"user\"\npassword: \"password\"\n

    Activate your API by running:

    genius rise\n
    "},{"location":"blog/huggingface/segment/#configuration-parameters-explained","title":"Configuration Parameters Explained","text":"
    • model_name: The pre-trained model identifier, adaptable based on the segmentation task (semantic, panoptic, instance).
    • model_class & processor_class: Specify the model and processor classes, essential for interpreting and processing images.
    • device_map & use_cuda: Configure GPU acceleration for enhanced processing speed.
    • endpoint, port, username, & password: Network settings and authentication for API access.
    "},{"location":"blog/huggingface/segment/#interacting-with-the-segmentation-api","title":"Interacting with the Segmentation API","text":"

    The interaction involves sending a base64-encoded image to the API and receiving segmented output. Here's how to execute this using curl and python-requests:

    "},{"location":"blog/huggingface/segment/#example-with-curl","title":"Example with curl:","text":"
    (base64 -w 0 your_image.jpg | awk '{print \"{\\\"image_base64\\\": \\\"\"$0\"\\\", \\\"subtask\\\": \\\"semantic\\\"}\"}' > /tmp/image_payload.json)\ncurl -X POST http://localhost:3000/api/v1/segment_image \\\n-H \"Content-Type: application/json\" \\\n-u user:password \\\n-d @/tmp/image_payload.json | jq\n
    "},{"location":"blog/huggingface/segment/#example-with-python-requests","title":"Example with python-requests:","text":"
    import requests\nimport base64\nwith open(\"your_image.jpg\", \"rb\") as image_file:\nimage_base64 = base64.b64encode(image_file.read()).decode('utf-8')\ndata = {\n\"image_base64\": image_base64,\n\"subtask\": \"semantic\"  # or \"panoptic\" for panoptic segmentation\n}\nresponse = requests.post(\"http://localhost:3000/api/v1/segment_image\",\njson=data,\nauth=('user', 'password'))\nprint(response.json())\n
    "},{"location":"blog/huggingface/segment/#use-cases-variations","title":"Use Cases & Variations","text":""},{"location":"blog/huggingface/segment/#different-segmentation-tasks","title":"Different Segmentation Tasks","text":"

    By modifying the subtask parameter, you can tailor the API for various segmentation models:

    • Semantic Segmentation: Classifies each pixel into a predefined category. Useful in urban scene understanding and medical image analysis.
    • Panoptic Segmentation: Combines semantic and instance segmentation, identifying and delineating each object instance. Ideal for detailed scene analysis.
    • Instance Segmentation: Identifies each instance of each object category. Used in scenarios requiring precise object boundaries.
    "},{"location":"blog/huggingface/segment/#customizing-segmentation-parameters","title":"Customizing Segmentation Parameters","text":"

    For advanced segmentation needs, additional parameters can be included in your request to customize the processing, such as specifying the output resolution or the segmentation task (semantic, panoptic, instance).

    "},{"location":"blog/huggingface/speak/","title":"Host Text to Speech Models Using Geniusrise","text":"

    Text to Speech (TTS) technology has transformed how we interact with digital devices, making information more accessible and enhancing user experiences. Geniusrise simplifies the deployment of TTS models as APIs, allowing developers to incorporate high-quality voice synthesis into their applications. This guide focuses on setting up TTS APIs with Geniusrise, showcasing various use cases and providing examples to help you get started.

    "},{"location":"blog/huggingface/speak/#quick-setup","title":"Quick Setup","text":"

    Installation:

    Begin by installing Geniusrise and its dependencies:

    pip install geniusrise\npip install geniusrise-vision\n

    Configuration File (genius.yml):

    Define your TTS API using a genius.yml file. Here's a basic example:

    version: \"1\"\nbolts:\nmy_bolt:\nname: TextToSpeechAPI\nstate:\ntype: none\ninput:\ntype: batch\nargs:\ninput_folder: ./input\noutput:\ntype: batch\nargs:\noutput_folder: ./output\nmethod: listen\nargs:\nmodel_name: \"facebook/mms-tts-eng\"\nmodel_class: \"VitsModel\"\nprocessor_class: \"VitsTokenizer\"\nuse_cuda: True\nprecision: \"float32\"\ndevice_map: \"cuda:0\"\nendpoint: \"*\"\nport: 3000\ncors_domain: \"http://localhost:3000\"\nusername: \"user\"\npassword: \"password\"\n

    This configuration sets up an API for Facebook's MMS TTS English model.

    "},{"location":"blog/huggingface/speak/#interacting-with-your-api","title":"Interacting with Your API","text":"

    Convert text to speech by making a POST request to your API. Here's how to do it using curl:

    curl -X POST localhost:3000/api/v1/synthesize \\\n-H \"Content-Type: application/json\" \\\n-u user:password \\\n-d '{\"text\": \"Your text here.\", \"output_type\": \"mp3\"}' \\\n| jq -r '.audio_file' | base64 -d > output.mp3 && vlc output.mp3\n
    "},{"location":"blog/huggingface/speak/#use-cases-variations","title":"Use Cases & Variations","text":""},{"location":"blog/huggingface/speak/#multilingual-support","title":"Multilingual Support","text":"

    Deploy models capable of synthesizing speech in multiple languages. Modify the model_name and add tgt_lang parameters to target different languages.

    "},{"location":"blog/huggingface/speak/#voice-personalization","title":"Voice Personalization","text":"

    Some models support different voice presets. Use the voice_preset parameter to select various voices, adjusting tone and style to fit your application's context.

    "},{"location":"blog/huggingface/speak/#high-quality-synthesis","title":"High-Quality Synthesis","text":"

    For applications requiring high-fidelity audio, select models optimized for quality, such as facebook/seamless-m4t-v2-large. These models often have larger sizes but produce more natural and clear voice outputs.

    "},{"location":"blog/huggingface/speak/#real-time-applications","title":"Real-Time Applications","text":"

    For real-time TTS needs, focus on models with lower latency. Configuration options like use_cuda for GPU acceleration and precision adjustments can help reduce response times.

    "},{"location":"blog/huggingface/speak/#configuration-tips","title":"Configuration Tips","text":"
    • Model Selection: Experiment with various models to find the best fit for your application's language, quality, and performance requirements.
    • Security: Use the username and password fields to secure your API endpoint.
    • Resource Management: Adjust precision, quantization, and device_map settings based on your server's capabilities and your application's needs.
    "},{"location":"blog/huggingface/speech/","title":"Host Speech to Text Models Using Geniusrise","text":"

    Speech to Text (STT) technology has become a cornerstone in creating accessible and efficient user interfaces. Geniusrise offers a streamlined approach to deploying STT models as APIs, enabling developers to integrate speech recognition capabilities into their applications with ease. This post will guide you through setting up STT APIs using Geniusrise, highlighting various use cases and providing practical examples.

    "},{"location":"blog/huggingface/speech/#quick-setup","title":"Quick Setup","text":"

    Installation:

    Before you start, make sure you have Geniusrise installed:

    pip install geniusrise\npip install geniusrise-vision\n

    Configuration File (genius.yml):

    Create a genius.yml configuration file to define your STT API's specifications. Here\u2019s an example configuration:

    version: \"1\"\nbolts:\nmy_bolt:\nname: SpeechToTextAPI\nstate:\ntype: none\ninput:\ntype: batch\nargs:\ninput_folder: ./input\noutput:\ntype: batch\nargs:\noutput_folder: ./output\nmethod: listen\nargs:\nmodel_name: \"openai/whisper-large-v3\"\nmodel_class: \"WhisperForConditionalGeneration\"\nprocessor_class: \"AutoProcessor\"\nuse_cuda: True\nprecision: \"float32\"\ndevice_map: \"cuda:0\"\nendpoint: \"*\"\nport: 3000\ncors_domain: \"http://localhost:3000\"\nusername: \"user\"\npassword: \"password\"\n

    This configuration launches an STT API using OpenAI's Whisper model.

    "},{"location":"blog/huggingface/speech/#api-interaction","title":"API Interaction","text":"

    To interact with your STT API, encode your audio file in base64 format and construct a JSON payload. Below are examples using curl:

    # Encode your audio file to base64 and create the payload\nbase64 -w 0 sample.mp3 | awk '{print \"{\\\"audio_file\\\": \\\"\"$0\"\\\", \\\"model_sampling_rate\\\": 16000}\"}' > payload.json\n\n# Send the request to your API\ncurl -X POST http://localhost:3000/api/v1/transcribe \\\n-H \"Content-Type: application/json\" \\\n-u user:password \\\n-d @payload.json | jq\n
    "},{"location":"blog/huggingface/speech/#use-cases-variations","title":"Use Cases & Variations","text":""},{"location":"blog/huggingface/speech/#general-speech-recognition","title":"General Speech Recognition","text":"

    Deploy models like openai/whisper-large-v3 for broad speech recognition tasks across various languages and domains.

    "},{"location":"blog/huggingface/speech/#specialized-transcription","title":"Specialized Transcription","text":"

    For specialized domains, such as medical or legal transcription, tailor your genius.yml to utilize domain-specific models to improve accuracy.

    "},{"location":"blog/huggingface/speech/#long-audio-files","title":"Long Audio Files","text":"

    Handling long audio files efficiently requires chunking the audio into manageable pieces. Adjust chunk_size in your configuration to enable this feature.

    "},{"location":"blog/huggingface/speech/#real-time-transcription","title":"Real-time Transcription","text":"

    For real-time applications, consider models optimized for speed and responsiveness. Adjust endpoint, port, and device_map accordingly to minimize latency.

    "},{"location":"blog/huggingface/speech/#advanced-configuration-tips","title":"Advanced Configuration Tips","text":"
    • Model Selection: Experiment with different models to find the one that best suits your needs. Geniusrise supports a wide range of STT models.
    • Precision and Performance: Adjust the precision and use_cuda settings to balance between transcription accuracy and resource utilization.
    • Security: Use username and password in your configuration to secure your API endpoint.
    "},{"location":"blog/huggingface/summz/","title":"Host Summarization Models Using Geniusrise","text":"
    • Host Summarization Models Using Geniusrise
      • Setup and Configuration
      • Configuration Parameters Explained
      • Interacting with the Summarization API
        • Summarizing Text
        • Advanced Summarization Features
      • Use Cases \\& Variations
        • Different Summarization Models
        • Customizing Summarization Parameters
      • Fun
        • Book summarization
        • Python Code Explainer
        • Domain-wise or Content-wise Summarization
          • Medical text
          • Legal text
          • Conversational text
      • Play around

    This guide will walk you through setting up, configuring, and interacting with a summarization API using Geniusrise, highlighting various use cases and how to adapt the configuration for different models.

    "},{"location":"blog/huggingface/summz/#setup-and-configuration","title":"Setup and Configuration","text":"

    Requirements

    • python 3.10, PPA, AUR, brew, Windows.
    • You need to have a GPU. Most of the system works with NVIDIA GPUs.
    • Install CUDA.

    Optional: Set up a virtual environment:

    virtualenv venv -p `which python3.10`\nsource venv/bin/activate\n

    Installation:

    Begin by installing Geniusrise and its text module:

    pip install geniusrise\npip install geniusrise-text\n

    Configuration (genius.yml):

    Create a genius.yml to define your summarization service:

    version: \"1\"\n\nbolts:\n    my_bolt:\n        name: SummarizationAPI\n        state:\n            type: none\n        input:\n            type: batch\n            args:\n                input_folder: ./input\n        output:\n            type: batch\n            args:\n                output_folder: ./output\n        method: listen\n        args:\n            model_name: facebook/bart-large-cnn\n            model_class: AutoModelForSeq2SeqLM\n            tokenizer_class: AutoTokenizer\n            use_cuda: true\n            precision: float\n            device_map: cuda:0\n            endpoint: \"0.0.0.0\"\n            port: 3000\n            cors_domain: http://localhost:3000\n            username: user\n            password: password\n

    Run your API server with:

    genius rise\n
    "},{"location":"blog/huggingface/summz/#configuration-parameters-explained","title":"Configuration Parameters Explained","text":"
    • model_name: Specifies the pre-trained model, such as facebook/bart-large-cnn for summarization.
    • use_cuda: Utilizes GPU acceleration for faster processing.
    • precision: Controls computational precision, affecting performance.
    • endpoint & port: Network address and port for API access.
    • username & password: Basic authentication for API security.
    "},{"location":"blog/huggingface/summz/#interacting-with-the-summarization-api","title":"Interacting with the Summarization API","text":""},{"location":"blog/huggingface/summz/#summarizing-text","title":"Summarizing Text","text":"

    You can summarize text by making HTTP requests to your API.

    Example with curl:

    /usr/bin/curl -X POST localhost:3000/api/v1/summarize \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\n        \"text\": \"Theres something magical about Recurrent Neural Networks (RNNs). I still remember when I trained my first recurrent network for Image Captioning. Within a few dozen minutes of training my first baby model (with rather arbitrarily-chosen hyperparameters) started to generate very nice looking descriptions of images that were on the edge of making sense. Sometimes the ratio of how simple your model is to the quality of the results you get out of it blows past your expectations, and this was one of those times. What made this result so shocking at the time was that the common wisdom was that RNNs were supposed to be difficult to train (with more experience Ive in fact reached the opposite conclusion). Fast forward about a year: Im training RNNs all the time and Ive witnessed their power and robustness many times, and yet their magical outputs still find ways of amusing me.\",\n        \"decoding_strategy\": \"generate\",\n        \"bos_token_id\": 0,\n        \"decoder_start_token_id\": 2,\n        \"early_stopping\": true,\n        \"eos_token_id\": 2,\n        \"forced_bos_token_id\": 0,\n        \"forced_eos_token_id\": 2,\n        \"length_penalty\": 2.0,\n        \"max_length\": 142,\n        \"min_length\": 56,\n        \"no_repeat_ngram_size\": 3,\n        \"num_beams\": 4,\n        \"pad_token_id\": 1,\n        \"do_sample\": false\n    }' | jq\n

    Example with python-requests:

    import requests\ndata = {\n\"text\": \"Theres something magical about Recurrent Neural Networks (RNNs). I still remember when I trained my first recurrent network for Image Captioning. Within a few dozen minutes of training my first baby model (with rather arbitrarily-chosen hyperparameters) started to generate very nice looking descriptions of images that were on the edge of making sense. Sometimes the ratio of how simple your model is to the quality of the results you get out of it blows past your expectations, and this was one of those times. What made this result so shocking at the time was that the common wisdom was that RNNs were supposed to be difficult to train (with more experience Ive in fact reached the opposite conclusion). Fast forward about a year: Im training RNNs all the time and Ive witnessed their power and robustness many times, and yet their magical outputs still find ways of amusing me.\",\n\"decoding_strategy\": \"generate\",\n\"bos_token_id\": 0,\n\"decoder_start_token_id\": 2,\n\"early_stopping\": true,\n\"eos_token_id\": 2,\n\"forced_bos_token_id\": 0,\n\"forced_eos_token_id\": 2,\n\"length_penalty\": 2.0,\n\"max_length\": 142,\n\"min_length\": 56,\n\"no_repeat_ngram_size\": 3,\n\"num_beams\": 4,\n\"pad_token_id\": 1,\n\"do_sample\": false\n}\nresponse = requests.post(\"http://localhost:3000/api/v1/summarize\",\njson=data,\nauth=('user', 'password'))\nprint(response.json())\n
    "},{"location":"blog/huggingface/summz/#advanced-summarization-features","title":"Advanced Summarization Features","text":"

    For use cases requiring specific summarization strategies or adjustments (e.g., length penalty, no repeat ngram size), additional parameters can be included in your request to customize the summarization output.

    "},{"location":"blog/huggingface/summz/#use-cases-variations","title":"Use Cases & Variations","text":""},{"location":"blog/huggingface/summz/#different-summarization-models","title":"Different Summarization Models","text":"

    To cater to various summarization needs, such as domain-specific texts or languages, simply adjust the model_name in your genius.yml. For example, for summarizing scientific papers, you might choose a model like allenai/longformer-base-4096.

    "},{"location":"blog/huggingface/summz/#customizing-summarization-parameters","title":"Customizing Summarization Parameters","text":"

    Adjust summarization parameters such as max_length, min_length, and num_beams to fine-tune the output based on the specific requirements of your application.

    "},{"location":"blog/huggingface/summz/#fun","title":"Fun","text":""},{"location":"blog/huggingface/summz/#book-summarization","title":"Book summarization","text":"

    Models with very large context sizes trained on the booksum dataset. For example pszemraj/led-base-book-summary, pszemraj/bigbird-pegasus-large-K-booksum or the following large model:

    version: \"1\"\n\nbolts:\n    my_bolt:\n        name: SummarizationAPI\n        state:\n            type: none\n        input:\n            type: batch\n            args:\n                input_folder: ./input\n        output:\n            type: batch\n            args:\n                output_folder: ./output\n        method: listen\n        args:\n            model_name: pszemraj/led-large-book-summary\n            model_class: AutoModelForSeq2SeqLM\n            tokenizer_class: AutoTokenizer\n            use_cuda: true\n            precision: float\n            device_map: cuda:0\n            endpoint: \"0.0.0.0\"\n            port: 3000\n            cors_domain: http://localhost:3000\n            username: user\n            password: password\n
    /usr/bin/curl -X POST localhost:3000/api/v1/summarize \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\n        \"text\": \" the big variety of data coming from diverse sources is one of the key properties of the big data phenomenon. It is, therefore, beneficial to understand how data is generated in various environments and scenarios, before looking at what should be done with this data and how to design the best possible architecture to accomplish this The evolution of IT architectures, described in Chapter 2, means that the data is no longer processed by a few big monolith systems, but rather by a group of services In parallel to the processing layer, the underlying data storage has also changed and became more distributed This, in turn, required a significant paradigm shift as the traditional approach to transactions (ACID) could no longer be supported. On top of this, cloud computing is becoming a major approach with the benefits of reducing costs and providing on-demand scalability but at the same time introducing concerns about privacy, data ownership, etc In the meantime the Internet continues its exponential growth: Every day both structured and unstructured data is published and available for processing: To achieve competitive advantage companies have to relate their corporate resources to external services, e.g. financial markets, weather forecasts, social media, etc While several of the sites provide some sort of API to access the data in a more orderly fashion; countless sources require advanced web mining and Natural Language Processing (NLP) processing techniques: Advances in science push researchers to construct new instruments for observing the universe O conducting experiments to understand even better the laws of physics and other domains. Every year humans have at their disposal new telescopes, space probes, particle accelerators, etc These instruments generate huge streams of data, which need to be stored and analyzed. The constant drive for efficiency in the industry motivates the introduction of new automation techniques and process optimization: This could not be done without analyzing the precise data that describe these processes. As more and more human tasks are automated, machines provide rich data sets, which can be analyzed in real-time to drive efficiency to new levels. Finally, it is now evident that the growth of the Internet of Things is becoming a major source of data. More and more of the devices are equipped with significant computational power and can generate a continuous data stream from their sensors. In the subsequent sections of this chapter, we will look at the domains described above to see what they generate in terms of data sets. We will compare the volumes but will also look at what is characteristic and important from their respective points of view. 3.1 The Internet is undoubtedly the largest database ever created by humans. While several well described; cleaned, and structured data sets have been made available through this medium, most of the resources are of an ambiguous, unstructured, incomplete or even erroneous nature. Still, several examples in the areas such as opinion mining, social media analysis, e-governance, etc, clearly show the potential lying in these resources. Those who can successfully mine and interpret the Internet data can gain unique insight and competitive advantage in their business An important area of data analytics on the edge of corporate IT and the Internet is Web Analytics.\",\n        \"decoding_strategy\": \"generate\",\n        \"bos_token_id\": 0,\n        \"decoder_start_token_id\": 2,\n        \"early_stopping\": true,\n        \"eos_token_id\": 2,\n        \"forced_bos_token_id\": 0,\n        \"forced_eos_token_id\": 2,\n        \"length_penalty\": 2.0,\n        \"max_length\": 142,\n        \"min_length\": 56,\n        \"no_repeat_ngram_size\": 3,\n        \"num_beams\": 4,\n        \"pad_token_id\": 1,\n        \"do_sample\": false\n    }' | jq\n
    "},{"location":"blog/huggingface/summz/#python-code-explainer","title":"Python Code Explainer","text":"

    Summarization is a text-to-text task and can be used to transform the input text into another form, in this case this model transforms python code into simple english explanations:

    version: \"1\"\n\nbolts:\n    my_bolt:\n        name: SummarizationAPI\n        state:\n            type: none\n        input:\n            type: batch\n            args:\n                input_folder: ./input\n        output:\n            type: batch\n            args:\n                output_folder: ./output\n        method: listen\n        args:\n            model_name: sagard21/python-code-explainer\n            model_class: AutoModelForSeq2SeqLM\n            tokenizer_class: AutoTokenizer\n            use_cuda: true\n            precision: float\n            device_map: cuda:0\n            endpoint: \"0.0.0.0\"\n            port: 3000\n            cors_domain: http://localhost:3000\n            username: user\n            password: password\n
    /usr/bin/curl -X POST localhost:3000/api/v1/summarize \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\n        \"text\": \"    def create_parser(self, parser):\\n        \"\"\"\\n        Create and return the command-line parser for managing spouts and bolts.\\n        \"\"\"\\n        # fmt: off\\n        subparsers = parser.add_subparsers(dest=\"deploy\")\\n        up_parser = subparsers.add_parser(\"up\", help=\"Deploy according to the genius.yml file.\", formatter_class=RichHelpFormatter)\\n        up_parser.add_argument(\"--spout\", type=str, help=\"Name of the specific spout to run.\")\\n        up_parser.add_argument(\"--bolt\", type=str, help=\"Name of the specific bolt to run.\")\\n        up_parser.add_argument(\"--file\", default=\"genius.yml\", type=str, help=\"Path of the genius.yml file, default to .\")\\n\\n        parser.add_argument(\"--spout\", type=str, help=\"Name of the specific spout to run.\")\\n        parser.add_argument(\"--bolt\", type=str, help=\"Name of the specific bolt to run.\")\\n        parser.add_argument(\"--file\", default=\"genius.yml\", type=str, help=\"Path of the genius.yml file, default to .\")\\n        # fmt: on\\n\\n        return parser\",\n        \"decoding_strategy\": \"generate\",\n        \"bos_token_id\": 0,\n        \"decoder_start_token_id\": 2,\n        \"early_stopping\": true,\n        \"eos_token_id\": 2,\n        \"forced_bos_token_id\": 0,\n        \"forced_eos_token_id\": 2,\n        \"length_penalty\": 2.0,\n        \"max_length\": 142,\n        \"min_length\": 56,\n        \"no_repeat_ngram_size\": 3,\n        \"num_beams\": 4,\n        \"pad_token_id\": 1,\n        \"do_sample\": false\n    }' | jq\n
    "},{"location":"blog/huggingface/summz/#domain-wise-or-content-wise-summarization","title":"Domain-wise or Content-wise Summarization","text":"

    Models can be specialized in performing better at specialized fine-tuning tasks along various verticals - like domain knowledge or content.

    Here are a few examples:

    "},{"location":"blog/huggingface/summz/#medical-text","title":"Medical text","text":"
    version: \"1\"\n\nbolts:\n    my_bolt:\n        name: SummarizationAPI\n        state:\n            type: none\n        input:\n            type: batch\n            args:\n                input_folder: ./input\n        output:\n            type: batch\n            args:\n                output_folder: ./output\n        method: listen\n        args:\n            model_name: Falconsai/medical_summarization\n            model_class: AutoModelForSeq2SeqLM\n            tokenizer_class: AutoTokenizer\n            use_cuda: true\n            precision: float\n            device_map: cuda:0\n            endpoint: \"0.0.0.0\"\n            port: 3000\n            cors_domain: http://localhost:3000\n            username: user\n            password: password\n
    "},{"location":"blog/huggingface/summz/#legal-text","title":"Legal text","text":"
    version: \"1\"\n\nbolts:\n    my_bolt:\n        name: SummarizationAPI\n        state:\n            type: none\n        input:\n            type: batch\n            args:\n                input_folder: ./input\n        output:\n            type: batch\n            args:\n                output_folder: ./output\n        method: listen\n        args:\n            model_name: EasyTerms/legalSummerizerET\n            model_class: AutoModelForSeq2SeqLM\n            tokenizer_class: AutoTokenizer\n            use_cuda: true\n            precision: float\n            device_map: cuda:0\n            endpoint: \"0.0.0.0\"\n            port: 3000\n            cors_domain: http://localhost:3000\n            username: user\n            password: password\n
    "},{"location":"blog/huggingface/summz/#conversational-text","title":"Conversational text","text":"
    version: \"1\"\n\nbolts:\n    my_bolt:\n        name: SummarizationAPI\n        state:\n            type: none\n        input:\n            type: batch\n            args:\n                input_folder: ./input\n        output:\n            type: batch\n            args:\n                output_folder: ./output\n        method: listen\n        args:\n            model_name: kabita-choudhary/finetuned-bart-for-conversation-summary\n            model_class: AutoModelForSeq2SeqLM\n            tokenizer_class: AutoTokenizer\n            use_cuda: true\n            precision: float\n            device_map: cuda:0\n            endpoint: \"0.0.0.0\"\n            port: 3000\n            cors_domain: http://localhost:3000\n            username: user\n            password: password\n
    "},{"location":"blog/huggingface/summz/#play-around","title":"Play around","text":"

    At 1551 open source models on the hub, there is enough to learn and play.

    "},{"location":"blog/huggingface/table_qa/","title":"Host Table Question Answering Models Using Geniusrise","text":"
    • Host Table Question Answering Models Using Geniusrise
    • Setup and Configuration
    • Understanding genius.yml Parameters
    • Use Cases \\& Variations
      • Changing the Model for Different Table QA Tasks
      • Example genius.yml for tabular fact-checking:
    • Interacting with Your API
      • Table QA
      • Utilizing the Hugging Face Pipeline
    • Fun
      • Executing SQL on data
      • Query generators
    • Play around

    Deploying table question answering (QA) models is a sophisticated task that Geniusrise simplifies for developers. This guide aims to demonstrate how you can use Geniusrise to set up and run APIs for table QA, a crucial functionality for extracting structured information from tabular data. We'll cover the setup process, explain the parameters in the genius.yml file with examples, and provide code snippets for interacting with your API using curl and python-requests.

    "},{"location":"blog/huggingface/table_qa/#setup-and-configuration","title":"Setup and Configuration","text":"

    Requirements

    • python 3.10, PPA, AUR, brew, Windows.
    • You need to have a GPU. Most of the system works with NVIDIA GPUs.
    • Install CUDA.

    Optional: Set up a virtual environment:

    virtualenv venv -p `which python3.10`\nsource venv/bin/activate\n

    Step 1: Install Geniusrise

    pip install geniusrise\npip install geniusrise-text\n

    Step 2: Configure Your API

    Create a genius.yml file to define the settings of your table QA API.

    version: \"1\"\n\nbolts:\n    my_bolt:\n        name: QAAPI\n        state:\n            type: none\n        input:\n            type: batch\n            args:\n                input_folder: ./input\n        output:\n            type: batch\n            args:\n                output_folder: ./output\n        method: listen\n        args:\n            model_name: google/tapas-base-finetuned-wtq\n            model_class: AutoModelForTableQuestionAnswering\n            tokenizer_class: AutoTokenizer\n            use_cuda: true\n            precision: float\n            device_map: cuda:0\n            endpoint: \"0.0.0.0\"\n            port: 3000\n            cors_domain: http://localhost:3000\n            username: user\n            password: password\n

    Launch your API with:

    genius rise\n
    "},{"location":"blog/huggingface/table_qa/#understanding-geniusyml-parameters","title":"Understanding genius.yml Parameters","text":"
    • model_name: The identifier for the model from Hugging Face, designed for table QA tasks.
    • model_class & tokenizer_class: Specifies the classes used for the model and tokenizer, respectively, suitable for table QA.
    • use_cuda: Utilize GPU acceleration to speed up inference times.
    • precision: Determines the floating-point precision for calculations (e.g., float for single precision).
    • device_map: Designates model parts to specific GPUs, optimizing performance.
    • endpoint & port: The network address and port where the API will be accessible.
    • username & password: Basic authentication credentials to secure access to your API.
    "},{"location":"blog/huggingface/table_qa/#use-cases-variations","title":"Use Cases & Variations","text":""},{"location":"blog/huggingface/table_qa/#changing-the-model-for-different-table-qa-tasks","title":"Changing the Model for Different Table QA Tasks","text":"

    To tailor your API for different table QA tasks, such as financial data analysis or sports statistics, you can modify the model_name in your genius.yml. For example, to switch to a model optimized for financial tables, you might use google/tapas-large-finetuned-finance.

    "},{"location":"blog/huggingface/table_qa/#example-geniusyml-for-tabular-fact-checking","title":"Example genius.yml for tabular fact-checking:","text":"
    args:\n  model_name: \"google/tapas-large-finetuned-tabfact\"\n
    "},{"location":"blog/huggingface/table_qa/#interacting-with-your-api","title":"Interacting with Your API","text":""},{"location":"blog/huggingface/table_qa/#table-qa","title":"Table QA","text":"

    Using curl:

    curl -X POST http://localhost:3000/api/v1/answer \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\"question\": \"Who had the highest batting average?\", \"data\": [{\"player\": \"John Doe\", \"average\": \".312\"}, {\"player\": \"Jane Roe\", \"average\": \".328\"}]}'\n

    Using python-requests:

    import requests\ndata = {\n\"question\": \"Who had the highest batting average?\",\n\"data\": [\n{\"player\": \"John Doe\", \"average\": \".312\"},\n{\"player\": \"Jane Roe\", \"average\": \".328\"}\n]\n}\nresponse = requests.post(\"http://localhost:3000/api/v1/answer\",\njson=data,\nauth=('user', 'password'))\nprint(response.json())\n
    "},{"location":"blog/huggingface/table_qa/#utilizing-the-hugging-face-pipeline","title":"Utilizing the Hugging Face Pipeline","text":"

    Although primarily for text-based QA, you might experiment with the pipeline for preprocessing or extracting text from tables before querying.

    Using curl:

    curl -X POST http://localhost:3000/api/v1/answer_pipeline \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\"question\": \"What is the total revenue?\", \"data\": \"The total revenue in Q1 was $10M, and in Q2 was $15M.\"}'\n

    Using python-requests:

    import requests\ndata = {\n\"question\": \"What is the total revenue?\",\n\"data\": \"\nThe total revenue in Q1 was $10M, and in Q2 was $15M.\"\n}\nresponse = requests.post(\"http://localhost:3000/api/v1/answer_pipeline\",\njson=data,\nauth=('user', 'password'))\nprint(response.json())\n
    "},{"location":"blog/huggingface/table_qa/#fun","title":"Fun","text":"

    Table QA is dominated by two families of base models: the google TAPAS and microsoft TAPEX.

    "},{"location":"blog/huggingface/table_qa/#executing-sql-on-data","title":"Executing SQL on data","text":"

    Given some data and an sql query, this model can return the results.

    version: \"1\"\n\nbolts:\n    my_bolt:\n        name: QAAPI\n        state:\n            type: none\n        input:\n            type: batch\n            args:\n                input_folder: ./input\n        output:\n            type: batch\n            args:\n                output_folder: ./output\n        method: listen\n        args:\n            model_name: microsoft/tapex-large-sql-execution\n            model_class: BartForConditionalGeneration\n            tokenizer_class: TapexTokenizer\n            use_cuda: true\n            precision: float\n            device_map: cuda:0\n            endpoint: \"0.0.0.0\"\n            port: 3000\n            cors_domain: http://localhost:3000\n            username: user\n            password: password\n
    /usr/bin/curl -X POST localhost:3000/api/v1/answer \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\n        \"data\": {\n            \"year\": [1896, 1900, 1904, 2004, 2008, 2012],\n            \"city\": [\"athens\", \"paris\", \"st. louis\", \"athens\", \"beijing\", \"london\"]\n        },\n        \"question\": \"select year where city = beijing\"\n  }\n  ' | jq\n\n# {\n#   \"data\": {\n#     \"year\": [\n#       1896,\n#       1900,\n#       1904,\n#       2004,\n#       2008,\n#       2012\n#     ],\n#     \"city\": [\n#       \"athens\",\n#       \"paris\",\n#       \"st. louis\",\n#       \"athens\",\n#       \"beijing\",\n#       \"london\"\n#     ]\n#   },\n#   \"question\": \"select year where city = beijing\",\n#   \"answer\": {\n#     \"answers\": [\n#       \"2008\"        # <----\n#     ],\n#     \"aggregation\": \"NONE\"\n#   }\n# }\n
    "},{"location":"blog/huggingface/table_qa/#query-generators","title":"Query generators","text":"

    Given some data and a natural language query, these models generate a query that can be used to compute the result. These models are what power spreadsheet automations.

    version: \"1\"\n\nbolts:\n    my_bolt:\n        name: QAAPI\n        state:\n            type: none\n        input:\n            type: batch\n            args:\n                input_folder: ./input\n        output:\n            type: batch\n            args:\n                output_folder: ./output\n        method: listen\n        args:\n            model_name: google/tapas-large-finetuned-wtq\n            model_class: AutoModelForTableQuestionAnswering\n            tokenizer_class: AutoTokenizer\n            use_cuda: true\n            precision: float\n            device_map: cuda:0\n            endpoint: \"0.0.0.0\"\n            port: 3000\n            cors_domain: http://localhost:3000\n            username: user\n            password: password\n
    /usr/bin/curl -X POST localhost:3000/api/v1/answer \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\n        \"data\": {\n          \"population\": [\"10.6\", \"12.6\", \"12.9\", \"11.9\", \"10.3\", \"11.5\", \"12.5\", \"12.0\", \"11.5\", \"12.4\", \"11.0\", \"12.8\", \"12.5\", \"10.6\", \"11.9\", \"12.0\", \"12.6\", \"11.7\", \"12.3\", \"10.8\", \"11.2\", \"12.7\", \"10.5\", \"11.3\", \"12.2\", \"10.9\", \"11.7\", \"10.3\", \"10.9\", \"10.2\", \"10.6\", \"10.4\", \"10.5\", \"11.5\", \"11.7\", \"10.9\", \"10.4\", \"11.0\", \"12.4\", \"12.2\", \"11.3\", \"10.2\", \"11.0\", \"11.5\", \"11.0\", \"10.9\", \"11.5\", \"12.8\", \"11.3\", \"11.9\", \"12.9\", \"10.9\", \"11.4\", \"12.8\", \"10.3\", \"12.6\", \"11.1\", \"10.6\", \"12.0\", \"12.4\", \"10.2\", \"12.9\", \"11.7\", \"12.3\", \"12.4\", \"12.0\", \"10.9\", \"10.9\", \"12.3\", \"12.7\", \"10.2\", \"11.7\", \"12.4\", \"12.5\", \"12.0\", \"11.0\", \"12.9\", \"10.9\", \"10.4\", \"12.8\", \"10.3\", \"11.6\", \"12.9\", \"12.4\", \"12.4\", \"10.2\", \"11.2\", \"10.2\", \"10.1\", \"12.7\", \"11.2\", \"12.5\", \"11.7\", \"11.4\", \"10.7\", \"10.9\", \"11.5\", \"11.3\", \"10.3\", \"10.7\", \"11.2\", \"10.6\", \"11.0\", \"12.3\", \"11.7\", \"10.0\", \"10.4\", \"11.4\", \"11.5\", \"12.2\"],\n          \"city\": [\"Tokyo\", \"Delhi\", \"Shanghai\", \"Sao Paulo\", \"Mumbai\", \"Mexico City\", \"Beijing\", \"Osaka\", \"Cairo\", \"New York\", \"Dhaka\", \"Karachi\", \"Buenos Aires\", \"Kolkata\", \"Istanbul\", \"Chongqing\", \"Lagos\", \"Rio de Janeiro\", \"Tianjin\", \"Kinshasa\", \"Guangzhou\", \"Los Angeles\", \"Moscow\", \"Shenzhen\", \"Lahore\", \"Bangalore\", \"Paris\", \"Bogota\", \"Jakarta\", \"Chennai\", \"Lima\", \"Bangkok\", \"Seoul\", \"Nagoya\", \"Hyderabad\", \"London\", \"Tehran\", \"Chicago\", \"Chengdu\", \"Nanjing\", \"Wuhan\", \"Ho Chi Minh City\", \"Luanda\", \"Ahmedabad\", \"Kuala Lumpur\", \"Riyadh\", \"Baghdad\", \"Santiago\", \"Surat\", \"Madrid\", \"Suzhou\", \"Pune\", \"Houston\", \"Dallas\", \"Toronto\", \"Dar es Salaam\", \"Miami\", \"Belo Horizonte\", \"Singapore\", \"Philadelphia\", \"Atlanta\", \"Fukuoka\", \"Khartoum\", \"Barcelona\", \"Johannesburg\", \"Saint Petersburg\", \"Qingdao\", \"Dalian\", \"Washington, D.C.\", \"Yangon\", \"Alexandria\", \"Jinan\", \"Guadalajara\", \"Harbin\", \"San Francisco\", \"Fort Worth\", \"Boston\", \"Detroit\", \"Montreal\", \"Porto Alegre\", \"Ankara\", \"Monterrey\", \"Nairobi\", \"Doha\", \"Luoyang\", \"Kuwait City\", \"Dublin\", \"Mecca\", \"Medina\", \"Amman\", \"Algiers\", \"Kampala\", \"Maputo\", \"Addis Ababa\", \"Brasilia\", \"Havana\", \"Faisalabad\", \"Tashkent\", \"Accra\", \"Sapporo\", \"Manila\", \"Hanoi\", \"Sydney\", \"Melbourne\", \"Cape Town\", \"Auckland\", \"Oslo\", \"Stockholm\", \"Helsinki\", \"Copenhagen\"]\n        },\n        \"question\": \"what is the total population of these cities\"\n  }\n  ' | jq\n\n# {\n#   \"data\": {\n#     \"population\": [ ...\n#     ],\n#     \"city\": [\n#       \"Tokyo\", ...\n#     ]\n#   },\n#   \"question\": \"what is the total population of these cities\",\n#   \"answer\": {\n#     \"answers\": [\n#       \"10.6\",\n#       ...\n#       \"12.2\"\n#     ],\n#     \"aggregation\": \"COUNT\" # <---\n#   }\n# }\n

    The answer.aggregation field indicates the operation to be done on the answer.answers field to get the answer.

    However, when queries involve selecting one value from the data, the value of answer.aggregation remains as NONE.

    /usr/bin/curl -X POST localhost:3000/api/v1/answer \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\n      \"data\": [\n        {\n          \"Name\": \"Acme Corp\",\n          \"Revenue\": \"1622908.31\",\n          \"Expenses\": \"802256.16\",\n          \"Profit\": \"820652.15\",\n          \"Assets\": \"2758871.86\",\n          \"Liabilities\": \"1786333.21\",\n          \"Equity\": \"972538.65\"\n        },\n        {\n          \"Name\": \"Globex Inc\",\n          \"Revenue\": \"1846200.97\",\n          \"Expenses\": \"1414781.1\",\n          \"Profit\": \"431419.87\",\n          \"Assets\": \"246642.65\",\n          \"Liabilities\": \"1969146.36\",\n          \"Equity\": \"-1722503.71\"\n        },\n        {\n          \"Name\": \"Soylent Corp\",\n          \"Revenue\": \"1585575.02\",\n          \"Expenses\": \"1457030.2\",\n          \"Profit\": \"128544.82\",\n          \"Assets\": \"1599655.56\",\n          \"Liabilities\": \"1260425.14\",\n          \"Equity\": \"339230.42\"\n        },\n        {\n          \"Name\": \"Initech LLC\",\n          \"Revenue\": \"179462.76\",\n          \"Expenses\": \"792898.88\",\n          \"Profit\": \"-613436.12\",\n          \"Assets\": \"780230.44\",\n          \"Liabilities\": \"990416.97\",\n          \"Equity\": \"-210186.53\"\n        },\n        {\n          \"Name\": \"Umbrella Corp\",\n          \"Revenue\": \"1882828.73\",\n          \"Expenses\": \"487215.16\",\n          \"Profit\": \"1395613.57\",\n          \"Assets\": \"2933377.54\",\n          \"Liabilities\": \"1519978.31\",\n          \"Equity\": \"1413399.23\"\n        },\n        {\n          \"Name\": \"Vandelay Ind\",\n          \"Revenue\": \"1641614.11\",\n          \"Expenses\": \"722957.57\",\n          \"Profit\": \"918656.54\",\n          \"Assets\": \"1818305.88\",\n          \"Liabilities\": \"1051099.45\",\n          \"Equity\": \"767206.43\"\n        },\n        {\n          \"Name\": \"Hooli Inc\",\n          \"Revenue\": \"784472.77\",\n          \"Expenses\": \"1035568.89\",\n          \"Profit\": \"-251096.12\",\n          \"Assets\": \"1011898.52\",\n          \"Liabilities\": \"757685.31\",\n          \"Equity\": \"254213.21\"\n        },\n        {\n          \"Name\": \"Stark Industries\",\n          \"Revenue\": \"1752780.24\",\n          \"Expenses\": \"954382.19\",\n          \"Profit\": \"798398.05\",\n          \"Assets\": \"1828265.8\",\n          \"Liabilities\": \"1785958.67\",\n          \"Equity\": \"42307.13\"\n        },\n        {\n          \"Name\": \"Wayne Enterprises\",\n          \"Revenue\": \"772662.41\",\n          \"Expenses\": \"724219.29\",\n          \"Profit\": \"48443.12\",\n          \"Assets\": \"2952379.67\",\n          \"Liabilities\": \"1255329.61\",\n          \"Equity\": \"1697050.06\"\n        },\n        {\n          \"Name\": \"Weyland-Yutani\",\n          \"Revenue\": \"1157644.0\",\n          \"Expenses\": \"1454230.66\",\n          \"Profit\": \"-296586.66\",\n          \"Assets\": \"776909.75\",\n          \"Liabilities\": \"759733.68\",\n          \"Equity\": \"17176.07\"\n        }\n      ],\n      \"question\": \"Given the balance sheet data, identify the company with the highest equity to assets ratio.\"\n}\n' | jq\n\n# {\n#   \"data\": [\n#     ...\n#   ],\n#   \"question\": \"Given the balance sheet data, identify the company with the highest equity to assets ratio.\",\n#   \"answer\": {\n#     \"answers\": [\n#       \"Wayne Enterprises\"\n#     ],\n#     \"aggregation\": \"NONE\"\n#   }\n# }\n

    Lets verify this:

    def calculate_highest_equity_to_assets_ratio(data):\n    ratios = {}\nfor company in data[\"data\"]:\n        name = company[\"Name\"]\nequity = float(company[\"Equity\"])\nassets = float(company[\"Assets\"])\nratio = equity / assets if assets != 0 else 0\nratios[name] = ratio\n\nhighest_ratio_company = max(ratios, key=ratios.get)\nhighest_ratio = ratios[highest_ratio_company]\nreturn highest_ratio_company, highest_ratio\n\nhighest_ratio_company, highest_ratio = calculate_highest_equity_to_assets_ratio(financial_data)\nhighest_ratio_company, highest_ratio\n

    which gives us:

    ('Wayne Enterprises', 0.574807528057528)\n

    yay \ud83e\udd73

    "},{"location":"blog/huggingface/table_qa/#play-around","title":"Play around","text":"

    This kind of models are few with 82 models on the huggingface hub.

    "},{"location":"blog/huggingface/trans/","title":"Host Translation Models Using Geniusrise","text":"

    This guide will walk you through deploying translation models using Geniusrise, covering the setup, configuration, and interaction with the translation API for various use cases.

    "},{"location":"blog/huggingface/trans/#setup-and-configuration","title":"Setup and Configuration","text":"

    Requirements

    • python 3.10, PPA, AUR, brew, Windows.
    • You need to have a GPU. Most of the system works with NVIDIA GPUs.
    • Install CUDA.

    Optional: Set up a virtual environment:

    virtualenv venv -p `which python3.10`\nsource venv/bin/activate\n

    Installation:

    Begin by installing Geniusrise and the necessary text processing extensions:

    pip install geniusrise\npip install geniusrise-text\n

    Configuration (genius.yml):

    Next, define your translation service in a genius.yml file:

    version: \"1\"\n\nbolts:\n    my_bolt:\n        name: TranslationAPI\n        state:\n            type: none\n        input:\n            type: batch\n            args:\n                input_folder: ./input\n        output:\n            type: batch\n            args:\n                output_folder: ./output\n        method: listen\n        args:\n            model_name: facebook/mbart-large-50-many-to-many-mmt\n            model_class: AutoModelForSeq2SeqLM\n            tokenizer_class: AutoTokenizer\n            use_cuda: true\n            precision: float\n            device_map: cuda:0\n            endpoint: \"0.0.0.0\"\n            port: 3000\n            cors_domain: http://localhost:3000\n            username: user\n            password: password\n

    To launch your API, execute:

    genius rise\n
    "},{"location":"blog/huggingface/trans/#configuration-parameters-explained","title":"Configuration Parameters Explained","text":"
    • model_name: Specifies the model to use, such as facebook/mbart-large-50-many-to-many-mmt for multilingual translation.
    • model_class & tokenizer_class: Defines the classes for the model and tokenizer, crucial for the translation process.
    • use_cuda: Indicates whether to use GPU acceleration for faster processing.
    • precision: The computational precision (e.g., float) affects performance and resource usage.
    • endpoint & port: The network address where the API is accessible.
    • username & password: Security credentials for accessing the API.
    "},{"location":"blog/huggingface/trans/#interacting-with-the-translation-api","title":"Interacting with the Translation API","text":""},{"location":"blog/huggingface/trans/#translating-text","title":"Translating Text","text":"

    Translate text from one language to another using a simple HTTP request.

    Example using curl:

    curl -X POST http://localhost:3000/api/v1/translate \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\n        \"text\": \"\u0938\u0902\u092f\u0941\u0915\u094d\u0924 \u0930\u093e\u0937\u094d\u091f\u094d\u0930 \u0915\u0947 \u092a\u094d\u0930\u092e\u0941\u0916 \u0915\u093e \u0915\u0939\u0928\u093e \u0939\u0948 \u0915\u093f \u0938\u0940\u0930\u093f\u092f\u093e \u092e\u0947\u0902 \u0915\u094b\u0908 \u0938\u0948\u0928\u094d\u092f \u0938\u092e\u093e\u0927\u093e\u0928 \u0928\u0939\u0940\u0902 \u0939\u0948\",\n        \"source_lang\": \"hi_IN\",\n        \"target_lang\": \"en_XX\",\n        \"decoding_strategy\": \"generate\",\n        \"decoder_start_token_id\": 2,\n        \"early_stopping\": true,\n        \"eos_token_id\": 2,\n        \"forced_eos_token_id\": 2,\n        \"max_length\": 200,\n        \"num_beams\": 5,\n        \"pad_token_id\": 1\n    }' | jq\n

    Example using python-requests:

    import requests\ndata = {\n\"text\": \"\u0938\u0902\u092f\u0941\u0915\u094d\u0924 \u0930\u093e\u0937\u094d\u091f\u094d\u0930 \u0915\u0947 \u092a\u094d\u0930\u092e\u0941\u0916 \u0915\u093e \u0915\u0939\u0928\u093e \u0939\u0948 \u0915\u093f \u0938\u0940\u0930\u093f\u092f\u093e \u092e\u0947\u0902 \u0915\u094b\u0908 \u0938\u0948\u0928\u094d\u092f \u0938\u092e\u093e\u0927\u093e\u0928 \u0928\u0939\u0940\u0902 \u0939\u0948\",\n\"source_lang\": \"hi_IN\",\n\"target_lang\": \"en_XX\",\n\"decoding_strategy\": \"generate\",\n\"decoder_start_token_id\": 2,\n\"early_stopping\": true,\n\"eos_token_id\": 2,\n\"forced_eos_token_id\": 2,\n\"max_length\": 200,\n\"num_beams\": 5,\n\"pad_token_id\": 1\n}\nresponse = requests.post(\"http://localhost:3000/api/v1/translate\",\njson=data,\nauth=('user', 'password'))\nprint(response.json())\n
    "},{"location":"blog/huggingface/trans/#advanced-translation-features","title":"Advanced Translation Features","text":"

    For use cases requiring specific translation strategies or parameters (e.g., beam search, number of beams), you can pass additional parameters in your request to customize the translation process.

    "},{"location":"blog/huggingface/trans/#use-cases-variations","title":"Use Cases & Variations","text":""},{"location":"blog/huggingface/trans/#different-language-pairs","title":"Different Language Pairs","text":"

    Adjust the source_lang and target_lang parameters to cater to various language pairs, enabling translation between numerous languages supported by the chosen model.

    "},{"location":"blog/huggingface/trans/#customizing-translation-parameters","title":"Customizing Translation Parameters","text":"

    For advanced translation needs, such as controlling the length of the output or employing beam search, modify the additional_params in your requests:

    {\n\"text\": \"Your text here\",\n\"source_lang\": \"en_XX\",\n\"target_lang\": \"es_XX\",\n\"num_beams\": 4\n}\n
    "},{"location":"blog/huggingface/trans/#fun","title":"Fun","text":"

    There are two families of models from facebook that can perform any to any language translation among a large number of languages.

    • facebook/mbart-large-50-many-to-many-mmt: 50 languages
    • facebook/nllb-200-distilled-600M: 200 languages

    Both the MBART and the NLLB families have several members, with facebook/nllb-moe-54b 54billion parameter mixture of experts being the largest and most capable one.

    See here for the language codes for the FLORES-200 dataset.

    version: \"1\"\n\nbolts:\n    my_bolt:\n        name: TranslationAPI\n        state:\n            type: none\n        input:\n            type: batch\n            args:\n                input_folder: ./input\n        output:\n            type: batch\n            args:\n                output_folder: ./output\n        method: listen\n        args:\n            model_name: facebook/nllb-200-3.3B\n            model_class: AutoModelForSeq2SeqLM\n            tokenizer_class: AutoTokenizer\n            use_cuda: true\n            precision: float\n            device_map: cuda:0\n            endpoint: \"0.0.0.0\"\n            port: 3000\n            cors_domain: http://localhost:3000\n            username: user\n            password: password\n

    We can try translating from hindi to tatar:

    curl -X POST http://localhost:3000/api/v1/translate \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\n        \"text\": \"\u0938\u0902\u092f\u0941\u0915\u094d\u0924 \u0930\u093e\u0937\u094d\u091f\u094d\u0930 \u0915\u0947 \u092a\u094d\u0930\u092e\u0941\u0916 \u0915\u093e \u0915\u0939\u0928\u093e \u0939\u0948 \u0915\u093f \u0938\u0940\u0930\u093f\u092f\u093e \u092e\u0947\u0902 \u0915\u094b\u0908 \u0938\u0948\u0928\u094d\u092f \u0938\u092e\u093e\u0927\u093e\u0928 \u0928\u0939\u0940\u0902 \u0939\u0948\",\n        \"target_lang\": \"tat_Cyrl\",\n        \"decoding_strategy\": \"generate\",\n        \"bos_token_id\": 0,\n        \"decoder_start_token_id\": 2,\n        \"eos_token_id\": 2,\n        \"max_length\": 200,\n        \"pad_token_id\": 1\n    }'\n

    Now how do we even verify whether this is correct? Lets reverse translate followed by sentence similarity from NLI. We need to launch 2 containers - one for translation and another for NLI:

    version: \"1\"\nbolts:\nmy_translation_bolt:\nname: TranslationAPI\nstate:\ntype: none\ninput:\ntype: batch\nargs:\ninput_folder: ./input\noutput:\ntype: batch\nargs:\noutput_folder: ./output\nmethod: listen\nargs:\nmodel_name: facebook/nllb-200-3.3B\nmodel_class: AutoModelForSeq2SeqLM\ntokenizer_class: AutoTokenizer\nuse_cuda: true\nprecision: float\ndevice_map: cuda:0\nendpoint: \"0.0.0.0\"\nport: 3000\ncors_domain: http://localhost:3000\nusername: user\npassword: password\nmy_nli_bolt:\nname: NLIAPI\nstate:\ntype: none\ninput:\ntype: batch\nargs:\ninput_folder: ./input\noutput:\ntype: batch\nargs:\noutput_folder: ./output\nmethod: listen\nargs:\nmodel_name: facebook/bart-large-mnli\nmodel_class: AutoModelForSequenceClassification\ntokenizer_class: AutoTokenizer\nuse_cuda: true\nprecision: float\ndevice_map: cuda:0\nendpoint: \"0.0.0.0\"\nport: 3001\ncors_domain: http://localhost:3001\nusername: user\npassword: password\n

    import requests\n# First we translate this hindi sentence to tatar\ndata = {\n\"text\": \"\u0938\u0902\u092f\u0941\u0915\u094d\u0924 \u0930\u093e\u0937\u094d\u091f\u094d\u0930 \u0915\u0947 \u092a\u094d\u0930\u092e\u0941\u0916 \u0915\u093e \u0915\u0939\u0928\u093e \u0939\u0948 \u0915\u093f \u0938\u0940\u0930\u093f\u092f\u093e \u092e\u0947\u0902 \u0915\u094b\u0908 \u0938\u0948\u0928\u094d\u092f \u0938\u092e\u093e\u0927\u093e\u0928 \u0928\u0939\u0940\u0902 \u0939\u0948\",\n\"target_lang\": \"tat_Cyrl\",\n\"decoding_strategy\": \"generate\",\n\"bos_token_id\": 0,\n\"decoder_start_token_id\": 2,\n\"eos_token_id\": 2,\n\"max_length\": 200,\n\"pad_token_id\": 1\n}\nresponse = requests.post(\"http://localhost:3000/api/v1/translate\",\njson=data,\nauth=('user', 'password'))\ntranslated = response.json()[\"translated_text\"]\n# \u0411\u041c\u041e \u0431\u0430\u0448\u043b\u044b\u0433\u044b \u0421\u04af\u0440\u0438\u044f\u0434\u04d9 \u0445\u04d9\u0440\u0431\u0438 \u0447\u0430\u0440\u0430\u043b\u0430\u0440 \u044e\u043a \u0434\u0438\u043f \u0431\u0435\u043b\u0434\u0435\u0440\u04d9\n# Then we translate the tatar back to hindi\nrev = data.copy()\nrev[\"text\"] = translated\nrev[\"target_lang\"] = \"hin_Deva\"\nresponse = requests.post(\"http://localhost:3000/api/v1/translate\",\njson=rev,\nauth=('user', 'password'))\nrev_translated = response.json()[\"translated_text\"]\n# Finally we look at similarity of the source and reverse-translated hindi sentences\ndata = {\n\"text1\": data[\"text\"],\n\"text2\": rev_translated\n}\nresponse = requests.post(\"http://localhost:3001/api/v1/textual_similarity\",\njson=data,\nauth=('user', 'password'))\nprint(response.json())\n# {\n#     'text1': '\u0938\u0902\u092f\u0941\u0915\u094d\u0924 \u0930\u093e\u0937\u094d\u091f\u094d\u0930 \u0915\u0947 \u092a\u094d\u0930\u092e\u0941\u0916 \u0915\u093e \u0915\u0939\u0928\u093e \u0939\u0948 \u0915\u093f \u0938\u0940\u0930\u093f\u092f\u093e \u092e\u0947\u0902 \u0915\u094b\u0908 \u0938\u0948\u0928\u094d\u092f \u0938\u092e\u093e\u0927\u093e\u0928 \u0928\u0939\u0940\u0902 \u0939\u0948',\n#     'text2': '\u092c\u0940\u090f\u092e\u0913 \u092a\u094d\u0930\u092e\u0941\u0916 \u0928\u0947 \u0915\u0939\u093e \u0915\u093f \u0938\u0940\u0930\u093f\u092f\u093e \u092e\u0947\u0902 \u0915\u094b\u0908 \u0938\u0948\u0928\u094d\u092f \u0909\u092a\u093e\u092f \u0928\u0939\u0940\u0902 \u0939\u0948\u0902',\n#     'similarity_score': 0.9829527983379287\n# }\n
    0.9829527983379287 looks like a great similarity score, so the translation really works! (or the mistakes are isomorphic) \ud83e\udd73\ud83d\udc4d

    "},{"location":"blog/huggingface/trans/#play-around","title":"Play around","text":"

    There is not much to really do in translation except mess around with different languagues \ud83e\udd37\u200d\u2642\ufe0f Not many models either, facebook is the undisputed leader in translation models.

    "},{"location":"blog/huggingface/txtclass/","title":"Host Text Classification Models Using Geniusrise","text":"
    • Host Text Classification Models Using Geniusrise
    • Quick Setup
    • Configuration Breakdown
    • Use Cases \\& Variations
      • Sentiment Analysis
      • Content Moderation
      • Language Detection
      • Making API Requests
      • Classify Text
      • Classification Pipeline
    • Fun
      • Political bias detection
      • Intent classification
      • Hallucination Evaluation
      • Irony Detection
    • Play around

    This post will guide you through creating inference APIs for different text classification tasks using geniusrise, explaining the genius.yml configuration and providing examples of how to interact with your API using curl and python-requests.

    "},{"location":"blog/huggingface/txtclass/#quick-setup","title":"Quick Setup","text":"

    Requirements:

    • python 3.10, PPA, AUR, brew, Windows.
    • You need to have a GPU. Most of the system works with NVIDIA GPUs.
    • Install CUDA.

    Installation:

    Optional: Set up a virtual environment:

    virtualenv venv -p `which python3.10`\nsource venv/bin/activate\n

    Install the packages:

    pip install torch\npip install geniusrise\npip install geniusrise-text\n

    Configuration File (genius.yml):

    Create a genius.yml with the necessary configuration for your text classification API:

    version: \"1\"\n\nbolts:\n    my_bolt:\n        name: TextClassificationAPI\n        state:\n            type: none\n        input:\n            type: batch\n            args:\n                input_folder: ./input\n        output:\n            type: batch\n            args:\n                output_folder: ./output\n        method: listen\n        args:\n            model_name: tomh/toxigen_roberta\n            model_class: AutoModelForSequenceClassification\n            tokenizer_class: AutoTokenizer\n            use_cuda: true\n            precision: float\n            device_map: cuda:0\n            compile: false\n            endpoint: \"0.0.0.0\"\n            port: 3000\n            cors_domain: http://localhost:3000\n            username: user\n            password: password\n

    Launch your API with:

    genius rise\n
    "},{"location":"blog/huggingface/txtclass/#configuration-breakdown","title":"Configuration Breakdown","text":"
    • model_name: Specify the Hugging Face model ID, e.g., bert-base-uncased for sentiment analysis.
    • use_cuda: Enable GPU acceleration with true or false for CPU.
    • precision: Set to float for single precision; consider half for faster inference on compatible GPUs. Does not work for most small models.
    • device_map: Assign model parts to specific GPUs, e.g., cuda:0.
    • endpoint & port: Define the API access point.
    • username & password: Secure your API with basic authentication.
    "},{"location":"blog/huggingface/txtclass/#use-cases-variations","title":"Use Cases & Variations","text":""},{"location":"blog/huggingface/txtclass/#sentiment-analysis","title":"Sentiment Analysis","text":"

    For sentiment analysis, swap the model_name to a model trained for sentiment, like distilbert-base-uncased-finetuned-sst-2-english.

    args:\n  model_name: \"distilbert-base-uncased-finetuned-sst-2-english\"\n
    "},{"location":"blog/huggingface/txtclass/#content-moderation","title":"Content Moderation","text":"

    To filter inappropriate content, use a model like roberta-base-openai-detector.

    args:\n  model_name: \"roberta-base-openai-detector\"\n
    "},{"location":"blog/huggingface/txtclass/#language-detection","title":"Language Detection","text":"

    For detecting the language of the input text, a model like xlm-roberta-base is suitable.

    args:\n  model_name: \"xlm-roberta-base\"\n

    Try out various models from huggingface.

    "},{"location":"blog/huggingface/txtclass/#making-api-requests","title":"Making API Requests","text":""},{"location":"blog/huggingface/txtclass/#classify-text","title":"Classify Text","text":"

    cURL:

    curl -X POST http://localhost:3000/api/v1/classify \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\"text\": \"Your text here.\"}'\n

    Python-Requests:

    import requests\nresponse = requests.post(\"http://localhost:3000/api/v1/classify\",\njson={\"text\": \"Your text here.\"},\nauth=('user', 'password'))\nprint(response.json())\n
    "},{"location":"blog/huggingface/txtclass/#classification-pipeline","title":"Classification Pipeline","text":"

    cURL:

    curl -X POST http://localhost:3000/api/v1/classification_pipeline \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\"text\": \"Your text here.\"}'\n

    Python-Requests:

    import requests\nresponse = requests.post(\"http://localhost:3000/api/v1/classification_pipeline\",\njson={\"text\": \"Your text here.\"},\nauth=('user', 'password'))\nprint(response.json())\n
    "},{"location":"blog/huggingface/txtclass/#fun","title":"Fun","text":"

    There are quite a few fun models to try out from huggingface!

    "},{"location":"blog/huggingface/txtclass/#political-bias-detection","title":"Political bias detection","text":"

    This model tries to classify text according to the political bias they might possess.

    version: \"1\"\n\nbolts:\n    my_bolt:\n        name: TextClassificationAPI\n        state:\n            type: none\n        input:\n            type: batch\n            args:\n                input_folder: ./input\n        output:\n            type: batch\n            args:\n                output_folder: ./output\n        method: listen\n        args:\n            model_name: bucketresearch/politicalBiasBERT\n            model_class: AutoModelForSequenceClassification\n            tokenizer_class: AutoTokenizer\n            use_cuda: true\n            precision: float\n            device_map: cuda:0\n            compile: false\n            endpoint: \"0.0.0.0\"\n            port: 3000\n            cors_domain: http://localhost:3000\n            username: user\n            password: password\n
    /usr/bin/curl -X POST localhost:3000/api/v1/classify \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\n        \"text\": \"i think i agree with bjp that hindus need to be respected\"\n    }' | jq\n\n# {\n#   \"input\": \"i think i agree with bjp that hindus need to be respected\",\n#   \"label_scores\": {\n#     \"LEFT\": 0.28080788254737854,\n#     \"CENTER\": 0.18140915036201477,\n#     \"RIGHT\": 0.5377829670906067 # <--\n#   }\n# }\n
    /usr/bin/curl -X POST localhost:3000/api/v1/classify \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\n        \"text\": \"these ghettos are sprawling these days and the people who live there stink\"\n    }' | jq\n\n# {\n#   \"input\": \"these ghettos are sprawling these days and the people who live there stink\",\n#   \"label_scores\": {\n#     \"LEFT\": 0.38681042194366455, # <-- NIMBY?\n#     \"CENTER\": 0.20437702536582947,\n#     \"RIGHT\": 0.408812552690506 # <--\n#   }\n# }\n

    Works fairly well empirically for medium-sized sentences and in an american context.

    "},{"location":"blog/huggingface/txtclass/#intent-classification","title":"Intent classification","text":"

    Text classification can be used to figure out the intent of the user in a chat conversation scenario. For e.g. to determine whether the user has an intent to explore or to buy.

    version: \"1\"\n\nbolts:\n    my_bolt:\n        name: TextClassificationAPI\n        state:\n            type: none\n        input:\n            type: batch\n            args:\n                input_folder: ./input\n        output:\n            type: batch\n            args:\n                output_folder: ./output\n        method: listen\n        args:\n            model_name: Falconsai/intent_classification\n            model_class: AutoModelForSequenceClassification\n            tokenizer_class: AutoTokenizer\n            use_cuda: true\n            precision: float\n            device_map: cuda:0\n            compile: false\n            endpoint: \"0.0.0.0\"\n            port: 3000\n            cors_domain: http://localhost:3000\n            username: user\n            password: password\n
    /usr/bin/curl -X POST localhost:3000/api/v1/classify \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\n        \"text\": \"hey i havent got my package yet where is it\"\n    }' | jq\n\n# {\n#   \"input\": \"hey i havent got my package yet where is it\",\n#   \"label_scores\": {\n#     \"cancellation\": 6.553709398088303E-12,\n#     \"ordering\": 4.977344745534613E-15,\n#     \"shipping\": 4.109915668426903E-15,\n#     \"invoicing\": 1.3524543897996955E-13,\n#     \"billing and payment\": 2.5260177283215057E-10,\n#     \"returns and refunds\": 1.915349389508547E-12,\n#     \"complaints and feedback\": 1.0671016614826126E-13,\n#     \"speak to person\": 2.6417441435886042E-15,\n#     \"edit account\": 3.1924864227900196E-13,\n#     \"delete account\": 2.704471304022793E-13,\n#     \"delivery information\": 1.0,                 # <--\n#     \"subscription\": 1.2307567616963444E-13,\n#     \"recover password\": 1.387644556552492E-12,\n#     \"registration problems\": 2.686436142984583E-13,\n#     \"appointment\": 3.555285948454723E-13\n#   }\n# }\n
    "},{"location":"blog/huggingface/txtclass/#hallucination-evaluation","title":"Hallucination Evaluation","text":"

    Figuring out whether your chat / LLM model is hallucinating or not is a text classification task!

    version: \"1\"\n\nbolts:\n    my_bolt:\n        name: TextClassificationAPI\n        state:\n            type: none\n        input:\n            type: batch\n            args:\n                input_folder: ./input\n        output:\n            type: batch\n            args:\n                output_folder: ./output\n        method: listen\n        args:\n            model_name: vectara/hallucination_evaluation_model\n            model_class: AutoModelForSequenceClassification\n            tokenizer_class: AutoTokenizer\n            use_cuda: true\n            precision: float\n            device_map: cuda:0\n            compile: false\n            endpoint: \"0.0.0.0\"\n            port: 3000\n            cors_domain: http://localhost:3000\n            username: user\n            password: password\n
    /usr/bin/curl -X POST localhost:3000/api/v1/classify \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\n        \"text\": \"A man walks into a bar and buys a drink [SEP] A bloke swigs alcohol at a pub\"\n    }' | jq\n\n# {\n#   \"input\": \"A man walks into a bar and buys a drink [SEP] A bloke swigs alcohol at a pub\",\n#   \"label_scores\": [\n#     0.6105160713195801\n#   ]\n# }\n
    "},{"location":"blog/huggingface/txtclass/#irony-detection","title":"Irony Detection","text":"

    Yussss NLP has advanced enough for us to be easily be able to detect irony!

    version: \"1\"\n\nbolts:\n    my_bolt:\n        name: TextClassificationAPI\n        state:\n            type: none\n        input:\n            type: batch\n            args:\n                input_folder: ./input\n        output:\n            type: batch\n            args:\n                output_folder: ./output\n        method: listen\n        args:\n            model_name: cardiffnlp/twitter-roberta-base-irony\n            model_class: AutoModelForSequenceClassification\n            tokenizer_class: AutoTokenizer\n            use_cuda: true\n            precision: float\n            device_map: cuda:0\n            compile: false\n            endpoint: \"0.0.0.0\"\n            port: 3000\n            cors_domain: http://localhost:3000\n            username: user\n            password: password\n
    /usr/bin/curl -X POST localhost:3000/api/v1/classify \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d '{\n        \"text\": \"What a wonderful day to have a flat tire!\"\n    }' | jq\n\n# {\n#   \"input\": \"What a wonderful day to have a flat tire!\",\n#   \"label_scores\": {\n#     \"non_irony\": 0.023495545610785484,\n#     \"irony\": 0.9765045046806335  <---\n#   }\n# }\n
    "},{"location":"blog/huggingface/txtclass/#play-around","title":"Play around","text":"

    There are 49,863 text classification models as of this article on huggingface. Play around with them, tweak various parameters, learn about various usecases and cool shit that can be built with \"mere\" text classification!

    "},{"location":"blog/huggingface/vqa/","title":"Host Visual QA Models Using Geniusrise","text":"

    Visual Question Answering (VQA) combines the power of visual understanding with natural language processing to answer questions about images. Geniusrise offers a streamlined process to deploy VQA models as APIs, making it accessible to developers to integrate advanced AI capabilities into their applications. This blog post demonstrates how to set up VQA APIs using Geniusrise and provides examples for various use cases.

    "},{"location":"blog/huggingface/vqa/#setting-up","title":"Setting Up","text":"

    To begin, ensure you have Geniusrise and Geniusrise-Vision installed:

    pip install geniusrise\npip install geniusrise-vision\n

    Create a genius.yml configuration file tailored to your API requirements, specifying the model, tokenizer, and additional parameters necessary for inference.

    "},{"location":"blog/huggingface/vqa/#sample-configuration","title":"Sample Configuration","text":"

    Below is an example of a configuration file for a VQA API:

    version: \"1\"\nbolts:\nmy_bolt:\nname: VisualQAAPI\nstate:\ntype: none\ninput:\ntype: batch\nargs:\ninput_folder: ./input\noutput:\ntype: batch\nargs:\noutput_folder: ./output\nmethod: listen\nargs:\nmodel_name: \"google/pix2struct-ai2d-base\"\nmodel_class: \"Pix2StructForConditionalGeneration\"\nprocessor_class: \"Pix2StructProcessor\"\nuse_cuda: true\nprecision: \"float\"\ndevice_map: \"cuda:0\"\nendpoint: \"*\"\nport: 3000\ncors_domain: \"http://localhost:3000\"\nusername: \"user\"\npassword: \"password\"\n

    This configuration sets up a VQA API using the Pix2Struct model, ready to process images and answer questions about them.

    "},{"location":"blog/huggingface/vqa/#interacting-with-your-api","title":"Interacting with Your API","text":"

    To interact with your VQA API, encode your images in base64 format and construct a JSON payload with the image and the question. Here are examples using curl:

    # Convert the image to base64 and prepare the payload\nbase64 -w 0 image.jpg | awk '{print \"{\\\"image_base64\\\": \\\"\"$0\"\\\", \\\"question\\\": \\\"What is in this image?\\\"}\"}' > payload.json\n\n# Send the request to your API\ncurl -X POST http://localhost:3000/api/v1/answer_question \\\n-H \"Content-Type: application/json\" \\\n-u user:password \\\n-d @payload.json | jq\n
    "},{"location":"blog/huggingface/vqa/#use-cases-variations","title":"Use Cases & Variations","text":""},{"location":"blog/huggingface/vqa/#general-vqa","title":"General VQA","text":"

    Use models like google/pix2struct-ai2d-base for general VQA tasks, where the model predicts answers based on the image content and the posed question.

    "},{"location":"blog/huggingface/vqa/#specialized-vqa","title":"Specialized VQA","text":"

    For specialized domains, such as medical imaging or technical diagrams, tailor your genius.yml to use domain-specific models. This requires replacing the model_name, model_class, and processor_class with those suitable for your specific application.

    "},{"location":"blog/huggingface/vqa/#advanced-configuration","title":"Advanced Configuration","text":"

    Experiment with different models, precision levels, and CUDA settings to optimize performance and accuracy for your use case. Geniusrise allows for detailed configuration, including quantization and torchscript options, to fine-tune the deployment according to your requirements.

    "},{"location":"bolts/openai/base/","title":"Base Fine Tuner","text":"

    Bases: Bolt

    An abstract base class for writing bolts for fine-tuning OpenAI models.

    This base class is intended to be subclassed for fine-tuning OpenAI models. The chief objective of its subclasses is to load and preprocess the dataset, though of course, other methods, including fine-tuning, can be overridden for customization.

    This bolt uses the OpenAI API to fine-tune a pre-trained model.

    Each subclass can be invoked using the genius cli or yaml.

    "},{"location":"bolts/openai/base/#geniusrise_openai.OpenAIFineTuner--using-genius-cli","title":"Using genius cli","text":"
    genius <bolt_name> rise \\\nbatch \\\n--input_s3_bucket my-input-bucket \\\n--input_s3_folder my-input-folder \\\nbatch \\\n--output_s3_bucket my-output-bucket \\\n--output_s3_folder my-output-folder \\\npostgres \\\n--postgres_host 127.0.0.1 \\\n--postgres_port 5432 \\\n--postgres_user postgres \\\n--postgres_password postgres \\\n--postgres_database geniusrise \\\n--postgres_table task_state \\\nfine_tune \\\n--args\n        model=gpt-3.5-turbo \\\nn_epochs=2 \\\nbatch_size=64 \\\nlearning_rate_multiplier=0.5 \\\nprompt_loss_weight=1 \\\nwait=True\n

    This will load and preprocess data from input s3 location, and upload it to openai for fine tuning, and wait.

    "},{"location":"bolts/openai/base/#geniusrise_openai.OpenAIFineTuner--using-yaml","title":"Using YAML","text":"

    Bolts can be invoked using the genius cli on a yaml file.

    Create a yaml file with the following content (looks very similar to cli):

    version: 1\nbolts:\nmy_fine_tuner:\nname: OpenAIClassificationFineTuner\nmethod: fine_tune\nargs:\nmodel: gpt-3.5-turbo\nn_epochs: 2\nbatch_size: 64\nlearning_rate_multiplier: 0.5\nprompt_loss_weight: 1\nwait: True\ninput:\ntype: batch\nbucket: my-input-bucket\nfolder: my-input-folder\noutput:\ntype: batch\nbucket: my-output-bucket\nfolder: my-output-folder\nstate:\ntype: postgres\nhost: 127.0.0.1\nport: 5432\nuser: postgres\npassword: postgres\ndatabase: geniusrise\ntable: state\n
    genius rise\n

    Gotchas:

    1. Extra command line arguments can be passed to the load_dataset method via fine_tune method by appending data_ to the param name.

    e.g.

            args:\nmodel: gpt-3.5-turbo\n...\ndata_some_arbitrary_key: passed_to_load_dataset_method\n
    "},{"location":"bolts/openai/base/#geniusrise_openai.base.OpenAIFineTuner.__init__","title":"__init__(input, output, state)","text":"

    Initialize the bolt.

    Parameters:

    Name Type Description Default input BatchInput

    The batch input data.

    required output BatchOutput

    The output data.

    required state State

    The state manager.

    required"},{"location":"bolts/openai/base/#geniusrise_openai.base.OpenAIFineTuner.delete_fine_tuned_model","title":"delete_fine_tuned_model(model_id) staticmethod","text":"

    Delete a fine-tuned model.

    "},{"location":"bolts/openai/base/#geniusrise_openai.base.OpenAIFineTuner.fine_tune","title":"fine_tune(model, n_epochs, batch_size, learning_rate_multiplier, prompt_loss_weight, suffix=None, wait=False, data_extractor_lambda=None, **kwargs)","text":"

    Fine-tune the model.

    Parameters:

    Name Type Description Default model str

    The pre-trained model name.

    required suffix str

    The suffix to append to the model name.

    None n_epochs int

    Total number of training epochs to perform.

    required batch_size int

    Batch size during training.

    required learning_rate_multiplier int

    Learning rate multiplier.

    required prompt_loss_weight int

    Prompt loss weight.

    required wait bool

    Whether to wait for the fine-tuning to complete. Defaults to False.

    False data_extractor_lambda str

    A lambda function run on each data element to extract the actual data.

    None **kwargs

    Additional keyword arguments for training and data loading.

    {}

    Raises:

    Type Description Exception

    If any step in the fine-tuning process fails.

    "},{"location":"bolts/openai/base/#geniusrise_openai.base.OpenAIFineTuner.get_fine_tuning_job","title":"get_fine_tuning_job(job_id) staticmethod","text":"

    Get the status of a fine-tuning job.

    "},{"location":"bolts/openai/base/#geniusrise_openai.base.OpenAIFineTuner.load_dataset","title":"load_dataset(dataset_path, **kwargs) abstractmethod","text":"

    Load a dataset from a file.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset file.

    required **kwargs

    Additional keyword arguments to pass to the load_dataset method.

    {}

    Returns:

    Name Type Description Dataset Union[Dataset, DatasetDict, Optional[Dataset]]

    The loaded dataset.

    Raises:

    Type Description NotImplementedError

    This method should be overridden by subclasses.

    "},{"location":"bolts/openai/base/#geniusrise_openai.base.OpenAIFineTuner.prepare_fine_tuning_data","title":"prepare_fine_tuning_data(data, data_type)","text":"

    Prepare the given data for fine-tuning.

    Parameters:

    Name Type Description Default data Union[Dataset, DatasetDict, Optional[Dataset]]

    The dataset to prepare.

    required data_type str

    Either 'train' or 'eval' to specify the type of data.

    required

    Raises:

    Type Description ValueError

    If data_type is not 'train' or 'eval'.

    "},{"location":"bolts/openai/base/#geniusrise_openai.base.OpenAIFineTuner.preprocess_data","title":"preprocess_data(**kwargs)","text":"

    Load and preprocess the dataset.

    Raises:

    Type Description Exception

    If any step in the preprocessing fails.

    "},{"location":"bolts/openai/base/#geniusrise_openai.base.OpenAIFineTuner.wait_for_fine_tuning","title":"wait_for_fine_tuning(job_id, check_interval=60)","text":"

    Wait for a fine-tuning job to complete, checking the status every check_interval seconds.

    "},{"location":"bolts/openai/classification/","title":"Classification Fine Tuner","text":"

    Bases: OpenAIFineTuner

    A bolt for fine-tuning OpenAI models for text classification tasks.

    Parameters:

    Name Type Description Default input BatchInput

    The batch input data.

    required output BatchOutput

    The output data.

    required state State

    The state manager.

    required

    CLI Usage:

        genius HuggingFaceCommonsenseReasoningFineTuner rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder train \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder model \\\nfine_tune \\\n--args model_name=my_model tokenizer_name=my_tokenizer num_train_epochs=3 per_device_train_batch_size=8\n

    YAML Configuration:

        version: \"1\"\nbolts:\nmy_fine_tuner:\nname: \"HuggingFaceCommonsenseReasoningFineTuner\"\nmethod: \"fine_tune\"\nargs:\nmodel_name: \"my_model\"\ntokenizer_name: \"my_tokenizer\"\nnum_train_epochs: 3\nper_device_train_batch_size: 8\ndata_max_length: 512\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\nfolder: \"my_dataset\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\nfolder: \"my_model\"\ndeploy:\ntype: k8s\nargs:\nkind: deployment\nname: my_fine_tuner\ncontext_name: arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\nnamespace: geniusrise\nimage: geniusrise/geniusrise\nkube_config_path: ~/.kube/config\n
    Supported Data Formats
    • JSONL
    • CSV
    • Parquet
    • JSON
    • XML
    • YAML
    • TSV
    • Excel (.xls, .xlsx)
    • SQLite (.db)
    • Feather
    "},{"location":"bolts/openai/classification/#geniusrise_openai.classification.OpenAIClassificationFineTuner.load_dataset","title":"load_dataset(dataset_path, **kwargs)","text":"

    Load a classification dataset from a directory.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory.

    required

    Returns:

    Name Type Description Dataset Union[Dataset, DatasetDict, Optional[Dataset]]

    The loaded dataset.

    Raises:

    Type Description Exception

    If there was an error loading the dataset.

    "},{"location":"bolts/openai/classification/#geniusrise_openai.classification.OpenAIClassificationFineTuner.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"bolts/openai/classification/#geniusrise_openai.classification.OpenAIClassificationFineTuner.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\"text\": \"The text content\", \"label\": \"The label\"}\n

    "},{"location":"bolts/openai/classification/#geniusrise_openai.classification.OpenAIClassificationFineTuner.load_dataset--csv","title":"CSV","text":"

    Should contain 'text' and 'label' columns.

    text,label\n\"The text content\",\"The label\"\n

    "},{"location":"bolts/openai/classification/#geniusrise_openai.classification.OpenAIClassificationFineTuner.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'text' and 'label' columns.

    "},{"location":"bolts/openai/classification/#geniusrise_openai.classification.OpenAIClassificationFineTuner.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'text' and 'label' keys.

    [{\"text\": \"The text content\", \"label\": \"The label\"}]\n

    "},{"location":"bolts/openai/classification/#geniusrise_openai.classification.OpenAIClassificationFineTuner.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'text' and 'label' child elements.

    <record>\n<text>The text content</text>\n<label>The label</label>\n</record>\n

    "},{"location":"bolts/openai/classification/#geniusrise_openai.classification.OpenAIClassificationFineTuner.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'text' and 'label' keys.

    - text: \"The text content\"\nlabel: \"The label\"\n

    "},{"location":"bolts/openai/classification/#geniusrise_openai.classification.OpenAIClassificationFineTuner.load_dataset--tsv","title":"TSV","text":"

    Should contain 'text' and 'label' columns separated by tabs.

    "},{"location":"bolts/openai/classification/#geniusrise_openai.classification.OpenAIClassificationFineTuner.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'text' and 'label' columns.

    "},{"location":"bolts/openai/classification/#geniusrise_openai.classification.OpenAIClassificationFineTuner.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'text' and 'label' columns.

    "},{"location":"bolts/openai/classification/#geniusrise_openai.classification.OpenAIClassificationFineTuner.load_dataset--feather","title":"Feather","text":"

    Should contain 'text' and 'label' columns.

    "},{"location":"bolts/openai/commonsense_reasoning/","title":"Natural Language Inference Fine Tuner","text":"

    Bases: OpenAIFineTuner

    A bolt for fine-tuning OpenAI models for commonsense reasoning tasks.

    This bolt uses the OpenAI API to fine-tune a pre-trained model for commonsense reasoning.

    CLI Usage:

        genius HuggingFaceCommonsenseReasoningFineTuner rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder train \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder model \\\nfine_tune \\\n--args model_name=my_model tokenizer_name=my_tokenizer num_train_epochs=3 per_device_train_batch_size=8\n

    YAML Configuration:

        version: \"1\"\nbolts:\nmy_fine_tuner:\nname: \"HuggingFaceCommonsenseReasoningFineTuner\"\nmethod: \"fine_tune\"\nargs:\nmodel_name: \"my_model\"\ntokenizer_name: \"my_tokenizer\"\nnum_train_epochs: 3\nper_device_train_batch_size: 8\ndata_max_length: 512\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\nfolder: \"my_dataset\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\nfolder: \"my_model\"\ndeploy:\ntype: k8s\nargs:\nkind: deployment\nname: my_fine_tuner\ncontext_name: arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\nnamespace: geniusrise\nimage: geniusrise/geniusrise\nkube_config_path: ~/.kube/config\n
    Supported Data Formats
    • JSONL
    • CSV
    • Parquet
    • JSON
    • XML
    • YAML
    • TSV
    • Excel (.xls, .xlsx)
    • SQLite (.db)
    • Feather
    "},{"location":"bolts/openai/commonsense_reasoning/#geniusrise_openai.commonsense_reasoning.OpenAICommonsenseReasoningFineTuner.load_dataset","title":"load_dataset(dataset_path, **kwargs)","text":"

    Load a commonsense reasoning dataset from a directory.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory.

    required **kwargs Any

    Additional keyword arguments.

    {}

    Returns:

    Name Type Description Dataset Union[Dataset, DatasetDict, Optional[Dataset]]

    The loaded dataset.

    Raises:

    Type Description Exception

    If there was an error loading the dataset.

    "},{"location":"bolts/openai/commonsense_reasoning/#geniusrise_openai.commonsense_reasoning.OpenAICommonsenseReasoningFineTuner.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"bolts/openai/commonsense_reasoning/#geniusrise_openai.commonsense_reasoning.OpenAICommonsenseReasoningFineTuner.load_dataset--hugging-face-dataset","title":"Hugging Face Dataset","text":"

    Dataset files saved by the Hugging Face datasets library.

    "},{"location":"bolts/openai/commonsense_reasoning/#geniusrise_openai.commonsense_reasoning.OpenAICommonsenseReasoningFineTuner.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    "},{"location":"bolts/openai/commonsense_reasoning/#geniusrise_openai.commonsense_reasoning.OpenAICommonsenseReasoningFineTuner.load_dataset--csv","title":"CSV","text":"

    Should contain 'premise', 'hypothesis', and 'label' columns.

    "},{"location":"bolts/openai/commonsense_reasoning/#geniusrise_openai.commonsense_reasoning.OpenAICommonsenseReasoningFineTuner.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'premise', 'hypothesis', and 'label' columns.

    "},{"location":"bolts/openai/commonsense_reasoning/#geniusrise_openai.commonsense_reasoning.OpenAICommonsenseReasoningFineTuner.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'premise', 'hypothesis', and 'label' keys.

    "},{"location":"bolts/openai/commonsense_reasoning/#geniusrise_openai.commonsense_reasoning.OpenAICommonsenseReasoningFineTuner.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'premise', 'hypothesis', and 'label' child elements.

    "},{"location":"bolts/openai/commonsense_reasoning/#geniusrise_openai.commonsense_reasoning.OpenAICommonsenseReasoningFineTuner.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'premise', 'hypothesis', and 'label' keys.

    "},{"location":"bolts/openai/commonsense_reasoning/#geniusrise_openai.commonsense_reasoning.OpenAICommonsenseReasoningFineTuner.load_dataset--tsv","title":"TSV","text":"

    Should contain 'premise', 'hypothesis', and 'label' columns separated by tabs.

    "},{"location":"bolts/openai/commonsense_reasoning/#geniusrise_openai.commonsense_reasoning.OpenAICommonsenseReasoningFineTuner.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'premise', 'hypothesis', and 'label' columns.

    "},{"location":"bolts/openai/commonsense_reasoning/#geniusrise_openai.commonsense_reasoning.OpenAICommonsenseReasoningFineTuner.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'premise', 'hypothesis', and 'label' columns.

    "},{"location":"bolts/openai/commonsense_reasoning/#geniusrise_openai.commonsense_reasoning.OpenAICommonsenseReasoningFineTuner.load_dataset--feather","title":"Feather","text":"

    Should contain 'premise', 'hypothesis', and 'label' columns.

    "},{"location":"bolts/openai/commonsense_reasoning/#geniusrise_openai.commonsense_reasoning.OpenAICommonsenseReasoningFineTuner.prepare_fine_tuning_data","title":"prepare_fine_tuning_data(data, data_type)","text":"

    Prepare the given data for fine-tuning.

    Parameters:

    Name Type Description Default data Union[Dataset, DatasetDict, Optional[Dataset]]

    The dataset to prepare.

    required data_type str

    Either 'train' or 'eval' to specify the type of data.

    required

    Raises:

    Type Description ValueError

    If data_type is not 'train' or 'eval'.

    "},{"location":"bolts/openai/instruction_tuning/","title":"Instruction Tuning Fine Tuner","text":"

    Bases: OpenAIFineTuner

    A bolt for fine-tuning OpenAI models on instruction following tasks.

    This bolt uses the OpenAI API to fine-tune a pre-trained model for instruction following tasks.

    Parameters:

    Name Type Description Default input BatchInput

    The batch input data.

    required output BatchOutput

    The output data.

    required state State

    The state manager.

    required

    CLI Usage:

        genius HuggingFaceCommonsenseReasoningFineTuner rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder train \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder model \\\nfine_tune \\\n--args model_name=my_model tokenizer_name=my_tokenizer num_train_epochs=3 per_device_train_batch_size=8\n

    YAML Configuration:

        version: \"1\"\nbolts:\nmy_fine_tuner:\nname: \"HuggingFaceCommonsenseReasoningFineTuner\"\nmethod: \"fine_tune\"\nargs:\nmodel_name: \"my_model\"\ntokenizer_name: \"my_tokenizer\"\nnum_train_epochs: 3\nper_device_train_batch_size: 8\ndata_max_length: 512\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\nfolder: \"my_dataset\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\nfolder: \"my_model\"\ndeploy:\ntype: k8s\nargs:\nkind: deployment\nname: my_fine_tuner\ncontext_name: arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\nnamespace: geniusrise\nimage: geniusrise/geniusrise\nkube_config_path: ~/.kube/config\n
    Supported Data Formats
    • JSONL
    • CSV
    • Parquet
    • JSON
    • XML
    • YAML
    • TSV
    • Excel (.xls, .xlsx)
    • SQLite (.db)
    • Feather
    "},{"location":"bolts/openai/instruction_tuning/#geniusrise_openai.instruction_tuning.OpenAIInstructionFineTuner.load_dataset","title":"load_dataset(dataset_path, **kwargs)","text":"

    Load an instruction following dataset from a directory.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory.

    required **kwargs Any

    Additional keyword arguments.

    {}

    Returns:

    Name Type Description Dataset Union[Dataset, DatasetDict, Optional[Dataset]]

    The loaded dataset.

    Raises:

    Type Description Exception

    If there was an error loading the dataset.

    "},{"location":"bolts/openai/instruction_tuning/#geniusrise_openai.instruction_tuning.OpenAIInstructionFineTuner.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"bolts/openai/instruction_tuning/#geniusrise_openai.instruction_tuning.OpenAIInstructionFineTuner.load_dataset--hugging-face-dataset","title":"Hugging Face Dataset","text":"

    Dataset files saved by the Hugging Face datasets library.

    "},{"location":"bolts/openai/instruction_tuning/#geniusrise_openai.instruction_tuning.OpenAIInstructionFineTuner.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    "},{"location":"bolts/openai/instruction_tuning/#geniusrise_openai.instruction_tuning.OpenAIInstructionFineTuner.load_dataset--csv","title":"CSV","text":"

    Should contain 'instruction' and 'output' columns.

    "},{"location":"bolts/openai/instruction_tuning/#geniusrise_openai.instruction_tuning.OpenAIInstructionFineTuner.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'instruction' and 'output' columns.

    "},{"location":"bolts/openai/instruction_tuning/#geniusrise_openai.instruction_tuning.OpenAIInstructionFineTuner.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'instruction' and 'output' keys.

    "},{"location":"bolts/openai/instruction_tuning/#geniusrise_openai.instruction_tuning.OpenAIInstructionFineTuner.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'instruction' and 'output' child elements.

    "},{"location":"bolts/openai/instruction_tuning/#geniusrise_openai.instruction_tuning.OpenAIInstructionFineTuner.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'instruction' and 'output' keys.

    "},{"location":"bolts/openai/instruction_tuning/#geniusrise_openai.instruction_tuning.OpenAIInstructionFineTuner.load_dataset--tsv","title":"TSV","text":"

    Should contain 'instruction' and 'output' columns separated by tabs.

    "},{"location":"bolts/openai/instruction_tuning/#geniusrise_openai.instruction_tuning.OpenAIInstructionFineTuner.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'instruction' and 'output' columns.

    "},{"location":"bolts/openai/instruction_tuning/#geniusrise_openai.instruction_tuning.OpenAIInstructionFineTuner.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'instruction' and 'output' columns.

    "},{"location":"bolts/openai/instruction_tuning/#geniusrise_openai.instruction_tuning.OpenAIInstructionFineTuner.load_dataset--feather","title":"Feather","text":"

    Should contain 'instruction' and 'output' columns.

    "},{"location":"bolts/openai/instruction_tuning/#geniusrise_openai.instruction_tuning.OpenAIInstructionFineTuner.prepare_fine_tuning_data","title":"prepare_fine_tuning_data(data, data_type)","text":"

    Prepare the given data for fine-tuning.

    Parameters:

    Name Type Description Default data Union[Dataset, DatasetDict, Optional[Dataset]]

    The dataset to prepare.

    required data_type str

    Either 'train' or 'eval' to specify the type of data.

    required

    Raises:

    Type Description ValueError

    If data_type is not 'train' or 'eval'.

    "},{"location":"bolts/openai/language_model/","title":"Language Model Fine Tuner","text":"

    Bases: OpenAIFineTuner

    A bolt for fine-tuning OpenAI models on language modeling tasks.

    This bolt uses the OpenAI API to fine-tune a pre-trained model for language modeling.

    Parameters:

    Name Type Description Default input BatchInput

    The batch input data.

    required output BatchOutput

    The output data.

    required state State

    The state manager.

    required **kwargs

    Additional keyword arguments.

    required

    CLI Usage:

        genius HuggingFaceCommonsenseReasoningFineTuner rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder train \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder model \\\nfine_tune \\\n--args model_name=my_model tokenizer_name=my_tokenizer num_train_epochs=3 per_device_train_batch_size=8\n

    YAML Configuration:

        version: \"1\"\nbolts:\nmy_fine_tuner:\nname: \"HuggingFaceCommonsenseReasoningFineTuner\"\nmethod: \"fine_tune\"\nargs:\nmodel_name: \"my_model\"\ntokenizer_name: \"my_tokenizer\"\nnum_train_epochs: 3\nper_device_train_batch_size: 8\ndata_max_length: 512\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\nfolder: \"my_dataset\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\nfolder: \"my_model\"\ndeploy:\ntype: k8s\nargs:\nkind: deployment\nname: my_fine_tuner\ncontext_name: arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\nnamespace: geniusrise\nimage: geniusrise/geniusrise\nkube_config_path: ~/.kube/config\n
    Supported Data Formats
    • JSONL
    • CSV
    • Parquet
    • JSON
    • XML
    • YAML
    • TSV
    • Excel (.xls, .xlsx)
    • SQLite (.db)
    • Feather
    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.load_dataset","title":"load_dataset(dataset_path, **kwargs)","text":"

    Load a language modeling dataset from a directory.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory.

    required

    Returns:

    Name Type Description Dataset Union[Dataset, DatasetDict, Optional[Dataset]]

    The loaded dataset.

    Raises:

    Type Description Exception

    If there was an error loading the dataset.

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.load_dataset--dataset-files-saved-by-hugging-face-datasets-library","title":"Dataset files saved by Hugging Face datasets library","text":"

    The directory should contain 'dataset_info.json' and other related files.

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\"text\": \"The text content\"}\n

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.load_dataset--csv","title":"CSV","text":"

    Should contain 'text' column.

    text\n\"The text content\"\n

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'text' column.

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'text' key.

    [{\"text\": \"The text content\"}]\n

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'text' child element.

    <record>\n<text>The text content</text>\n</record>\n

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'text' key.

    - text: \"The text content\"\n

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.load_dataset--tsv","title":"TSV","text":"

    Should contain 'text' column separated by tabs.

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'text' column.

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'text' column.

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.load_dataset--feather","title":"Feather","text":"

    Should contain 'text' column.

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.prepare_fine_tuning_data","title":"prepare_fine_tuning_data(data, data_type)","text":"

    Load a language modeling dataset from a directory.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory.

    required masked bool

    Whether to use masked language modeling. Defaults to True.

    required max_length int

    The maximum length for tokenization. Defaults to 512.

    required

    Returns:

    Name Type Description Dataset None

    The loaded dataset.

    Raises:

    Type Description Exception

    If there was an error loading the dataset.

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.prepare_fine_tuning_data--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.prepare_fine_tuning_data--dataset-files-saved-by-hugging-face-datasets-library","title":"Dataset files saved by Hugging Face datasets library","text":"

    The directory should contain 'dataset_info.json' and other related files.

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.prepare_fine_tuning_data--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\"text\": \"The text content\"}\n

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.prepare_fine_tuning_data--csv","title":"CSV","text":"

    Should contain 'text' column.

    text\n\"The text content\"\n

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.prepare_fine_tuning_data--parquet","title":"Parquet","text":"

    Should contain 'text' column.

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.prepare_fine_tuning_data--json","title":"JSON","text":"

    An array of dictionaries with 'text' key.

    [{\"text\": \"The text content\"}]\n

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.prepare_fine_tuning_data--xml","title":"XML","text":"

    Each 'record' element should contain 'text' child element.

    <record>\n<text>The text content</text>\n</record>\n

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.prepare_fine_tuning_data--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'text' key.

    - text: \"The text content\"\n

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.prepare_fine_tuning_data--tsv","title":"TSV","text":"

    Should contain 'text' column separated by tabs.

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.prepare_fine_tuning_data--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'text' column.

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.prepare_fine_tuning_data--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'text' column.

    "},{"location":"bolts/openai/language_model/#geniusrise_openai.language_model.OpenAILanguageModelFineTuner.prepare_fine_tuning_data--feather","title":"Feather","text":"

    Should contain 'text' column.

    "},{"location":"bolts/openai/ner/","title":"Named Entity Recognition Fine Tuner","text":"

    Bases: OpenAIFineTuner

    A bolt for fine-tuning OpenAI models on named entity recognition tasks.

    This bolt extends the OpenAIFineTuner to handle the specifics of named entity recognition tasks.

    Parameters:

    Name Type Description Default input BatchInput

    The batch input data.

    required output BatchOutput

    The output data.

    required state State

    The state manager.

    required

    CLI Usage:

        genius HuggingFaceCommonsenseReasoningFineTuner rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder train \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder model \\\nfine_tune \\\n--args model_name=my_model tokenizer_name=my_tokenizer num_train_epochs=3 per_device_train_batch_size=8\n

    YAML Configuration:

        version: \"1\"\nbolts:\nmy_fine_tuner:\nname: \"HuggingFaceCommonsenseReasoningFineTuner\"\nmethod: \"fine_tune\"\nargs:\nmodel_name: \"my_model\"\ntokenizer_name: \"my_tokenizer\"\nnum_train_epochs: 3\nper_device_train_batch_size: 8\ndata_max_length: 512\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\nfolder: \"my_dataset\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\nfolder: \"my_model\"\ndeploy:\ntype: k8s\nargs:\nkind: deployment\nname: my_fine_tuner\ncontext_name: arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\nnamespace: geniusrise\nimage: geniusrise/geniusrise\nkube_config_path: ~/.kube/config\n
    Supported Data Formats
    • JSONL
    • CSV
    • Parquet
    • JSON
    • XML
    • YAML
    • TSV
    • Excel (.xls, .xlsx)
    • SQLite (.db)
    • Feather
    "},{"location":"bolts/openai/ner/#geniusrise_openai.ner.NamedEntityRecognitionFineTuner.load_dataset","title":"load_dataset(dataset_path, **kwargs)","text":"

    Load a named entity recognition dataset from a directory.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory.

    required

    Returns:

    Name Type Description DatasetDict Union[Dataset, DatasetDict, None]

    The loaded dataset.

    Raises:

    Type Description Exception

    If there was an error loading the dataset.

    "},{"location":"bolts/openai/ner/#geniusrise_openai.ner.NamedEntityRecognitionFineTuner.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"bolts/openai/ner/#geniusrise_openai.ner.NamedEntityRecognitionFineTuner.load_dataset--hugging-face-dataset","title":"Hugging Face Dataset","text":"

    Dataset files saved by the Hugging Face datasets library.

    "},{"location":"bolts/openai/ner/#geniusrise_openai.ner.NamedEntityRecognitionFineTuner.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\"tokens\": [\"token1\", \"token2\", ...], \"ner_tags\": [0, 1, ...]}\n

    "},{"location":"bolts/openai/ner/#geniusrise_openai.ner.NamedEntityRecognitionFineTuner.load_dataset--csv","title":"CSV","text":"

    Should contain 'tokens' and 'ner_tags' columns.

    tokens,ner_tags\n\"['token1', 'token2', ...]\", \"[0, 1, ...]\"\n

    "},{"location":"bolts/openai/ner/#geniusrise_openai.ner.NamedEntityRecognitionFineTuner.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'tokens' and 'ner_tags' columns.

    "},{"location":"bolts/openai/ner/#geniusrise_openai.ner.NamedEntityRecognitionFineTuner.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'tokens' and 'ner_tags' keys.

    [{\"tokens\": [\"token1\", \"token2\", ...], \"ner_tags\": [0, 1, ...]}]\n

    "},{"location":"bolts/openai/ner/#geniusrise_openai.ner.NamedEntityRecognitionFineTuner.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'tokens' and 'ner_tags' child elements.

    <record>\n<tokens>token1 token2 ...</tokens>\n<ner_tags>0 1 ...</ner_tags>\n</record>\n

    "},{"location":"bolts/openai/ner/#geniusrise_openai.ner.NamedEntityRecognitionFineTuner.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'tokens' and 'ner_tags' keys.

    - tokens: [\"token1\", \"token2\", ...]\nner_tags: [0, 1, ...]\n

    "},{"location":"bolts/openai/ner/#geniusrise_openai.ner.NamedEntityRecognitionFineTuner.load_dataset--tsv","title":"TSV","text":"

    Should contain 'tokens' and 'ner_tags' columns separated by tabs.

    "},{"location":"bolts/openai/ner/#geniusrise_openai.ner.NamedEntityRecognitionFineTuner.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'tokens' and 'ner_tags' columns.

    "},{"location":"bolts/openai/ner/#geniusrise_openai.ner.NamedEntityRecognitionFineTuner.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'tokens' and 'ner_tags' columns.

    "},{"location":"bolts/openai/ner/#geniusrise_openai.ner.NamedEntityRecognitionFineTuner.load_dataset--feather","title":"Feather","text":"

    Should contain 'tokens' and 'ner_tags' columns.

    "},{"location":"bolts/openai/ner/#geniusrise_openai.ner.NamedEntityRecognitionFineTuner.prepare_fine_tuning_data","title":"prepare_fine_tuning_data(data, data_type)","text":"

    Prepare the given data for fine-tuning.

    Parameters:

    Name Type Description Default data Union[Dataset, DatasetDict, Optional[Dataset]]

    The dataset to prepare.

    required data_type str

    Either 'train' or 'eval' to specify the type of data.

    required

    Raises:

    Type Description ValueError

    If data_type is not 'train' or 'eval'.

    "},{"location":"bolts/openai/question_answering/","title":"Question Answering Fine Tuner","text":"

    Bases: OpenAIFineTuner

    A bolt for fine-tuning OpenAI models on question answering tasks.

    CLI Usage:

        genius HuggingFaceCommonsenseReasoningFineTuner rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder train \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder model \\\nfine_tune \\\n--args model_name=my_model tokenizer_name=my_tokenizer num_train_epochs=3 per_device_train_batch_size=8\n

    YAML Configuration:

        version: \"1\"\nbolts:\nmy_fine_tuner:\nname: \"HuggingFaceCommonsenseReasoningFineTuner\"\nmethod: \"fine_tune\"\nargs:\nmodel_name: \"my_model\"\ntokenizer_name: \"my_tokenizer\"\nnum_train_epochs: 3\nper_device_train_batch_size: 8\ndata_max_length: 512\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\nfolder: \"my_dataset\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\nfolder: \"my_model\"\ndeploy:\ntype: k8s\nargs:\nkind: deployment\nname: my_fine_tuner\ncontext_name: arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\nnamespace: geniusrise\nimage: geniusrise/geniusrise\nkube_config_path: ~/.kube/config\n
    Supported Data Formats
    • JSONL
    • CSV
    • Parquet
    • JSON
    • XML
    • YAML
    • TSV
    • Excel (.xls, .xlsx)
    • SQLite (.db)
    • Feather
    "},{"location":"bolts/openai/question_answering/#geniusrise_openai.question_answering.OpenAIQuestionAnsweringFineTuner.load_dataset","title":"load_dataset(dataset_path, **kwargs)","text":"

    Load a dataset from a directory.

    "},{"location":"bolts/openai/question_answering/#geniusrise_openai.question_answering.OpenAIQuestionAnsweringFineTuner.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"bolts/openai/question_answering/#geniusrise_openai.question_answering.OpenAIQuestionAnsweringFineTuner.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\"context\": \"The context content\", \"question\": \"The question\", \"answers\": {\"answer_start\": [int], \"context\": [str]}}\n

    "},{"location":"bolts/openai/question_answering/#geniusrise_openai.question_answering.OpenAIQuestionAnsweringFineTuner.load_dataset--csv","title":"CSV","text":"

    Should contain 'context', 'question', and 'answers' columns.

    context,question,answers\n\"The context content\",\"The question\",\"{'answer_start': [int], 'text': [str]}\"\n

    "},{"location":"bolts/openai/question_answering/#geniusrise_openai.question_answering.OpenAIQuestionAnsweringFineTuner.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'context', 'question', and 'answers' columns.

    "},{"location":"bolts/openai/question_answering/#geniusrise_openai.question_answering.OpenAIQuestionAnsweringFineTuner.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'context', 'question', and 'answers' keys.

    [{\"context\": \"The context content\", \"question\": \"The question\", \"answers\": {\"answer_start\": [int], \"context\": [str]}}]\n

    "},{"location":"bolts/openai/question_answering/#geniusrise_openai.question_answering.OpenAIQuestionAnsweringFineTuner.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'context', 'question', and 'answers' child elements.

    <record>\n<context>The context content</context>\n<question>The question</question>\n<answers answer_start=\"int\" context=\"str\"></answers>\n</record>\n

    "},{"location":"bolts/openai/question_answering/#geniusrise_openai.question_answering.OpenAIQuestionAnsweringFineTuner.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'context', 'question', and 'answers' keys.

    - context: \"The context content\"\nquestion: \"The question\"\nanswers:\nanswer_start: [int]\ncontext: [str]\n

    "},{"location":"bolts/openai/question_answering/#geniusrise_openai.question_answering.OpenAIQuestionAnsweringFineTuner.load_dataset--tsv","title":"TSV","text":"

    Should contain 'context', 'question', and 'answers' columns separated by tabs.

    "},{"location":"bolts/openai/question_answering/#geniusrise_openai.question_answering.OpenAIQuestionAnsweringFineTuner.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'context', 'question', and 'answers' columns.

    "},{"location":"bolts/openai/question_answering/#geniusrise_openai.question_answering.OpenAIQuestionAnsweringFineTuner.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'context', 'question', and 'answers' columns.

    "},{"location":"bolts/openai/question_answering/#geniusrise_openai.question_answering.OpenAIQuestionAnsweringFineTuner.load_dataset--feather","title":"Feather","text":"

    Should contain 'context', 'question', and 'answers' columns.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory.

    required pad_on_right bool

    Whether to pad on the right.

    required max_length int

    The maximum length of the sequences.

    required doc_stride int

    The document stride.

    required evaluate_squadv2 bool

    Whether to evaluate using SQuAD v2 metrics.

    required

    Returns:

    Name Type Description Dataset Union[Dataset, DatasetDict, Optional[Dataset]]

    The loaded dataset.

    "},{"location":"bolts/openai/question_answering/#geniusrise_openai.question_answering.OpenAIQuestionAnsweringFineTuner.prepare_fine_tuning_data","title":"prepare_fine_tuning_data(data, data_type)","text":"

    Prepare the given data for fine-tuning.

    Parameters:

    Name Type Description Default data Union[Dataset, DatasetDict, Optional[Dataset]]

    The dataset to prepare.

    required data_type str

    Either 'train' or 'eval' to specify the type of data.

    required

    Raises:

    Type Description ValueError

    If data_type is not 'train' or 'eval'.

    "},{"location":"bolts/openai/sentiment_analysis/","title":"Sentiment Analysis Fine Tuner","text":"

    Bases: OpenAIFineTuner

    A bolt for fine-tuning OpenAI models on sentiment analysis tasks.

    Parameters:

    Name Type Description Default input BatchInput

    The batch input data.

    required output BatchOutput

    The output data.

    required state State

    The state manager.

    required

    CLI Usage:

        genius HuggingFaceCommonsenseReasoningFineTuner rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder train \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder model \\\nfine_tune \\\n--args model_name=my_model tokenizer_name=my_tokenizer num_train_epochs=3 per_device_train_batch_size=8\n

    YAML Configuration:

        version: \"1\"\nbolts:\nmy_fine_tuner:\nname: \"HuggingFaceCommonsenseReasoningFineTuner\"\nmethod: \"fine_tune\"\nargs:\nmodel_name: \"my_model\"\ntokenizer_name: \"my_tokenizer\"\nnum_train_epochs: 3\nper_device_train_batch_size: 8\ndata_max_length: 512\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\nfolder: \"my_dataset\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\nfolder: \"my_model\"\ndeploy:\ntype: k8s\nargs:\nkind: deployment\nname: my_fine_tuner\ncontext_name: arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\nnamespace: geniusrise\nimage: geniusrise/geniusrise\nkube_config_path: ~/.kube/config\n
    Supported Data Formats
    • JSONL
    • CSV
    • Parquet
    • JSON
    • XML
    • YAML
    • TSV
    • Excel (.xls, .xlsx)
    • SQLite (.db)
    • Feather
    "},{"location":"bolts/openai/sentiment_analysis/#geniusrise_openai.sentiment_analysis.OpenAISentimentAnalysisFineTuner.load_dataset","title":"load_dataset(dataset_path, **kwargs)","text":"

    Load a dataset from a directory.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory.

    required **kwargs Any

    Additional keyword arguments.

    {}

    Returns:

    Type Description Union[Dataset, DatasetDict, Optional[Dataset]]

    Dataset | DatasetDict: The loaded dataset.

    "},{"location":"bolts/openai/sentiment_analysis/#geniusrise_openai.sentiment_analysis.OpenAISentimentAnalysisFineTuner.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"bolts/openai/sentiment_analysis/#geniusrise_openai.sentiment_analysis.OpenAISentimentAnalysisFineTuner.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\"text\": \"The text content\", \"label\": \"The label\"}\n

    "},{"location":"bolts/openai/sentiment_analysis/#geniusrise_openai.sentiment_analysis.OpenAISentimentAnalysisFineTuner.load_dataset--csv","title":"CSV","text":"

    Should contain 'text' and 'label' columns.

    text,label\n\"The text content\",\"The label\"\n

    "},{"location":"bolts/openai/sentiment_analysis/#geniusrise_openai.sentiment_analysis.OpenAISentimentAnalysisFineTuner.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'text' and 'label' columns.

    "},{"location":"bolts/openai/sentiment_analysis/#geniusrise_openai.sentiment_analysis.OpenAISentimentAnalysisFineTuner.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'text' and 'label' keys.

    [{\"text\": \"The text content\", \"label\": \"The label\"}]\n

    "},{"location":"bolts/openai/sentiment_analysis/#geniusrise_openai.sentiment_analysis.OpenAISentimentAnalysisFineTuner.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'text' and 'label' child elements.

    <record>\n<text>The text content</text>\n<label>The label</label>\n</record>\n

    "},{"location":"bolts/openai/sentiment_analysis/#geniusrise_openai.sentiment_analysis.OpenAISentimentAnalysisFineTuner.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'text' and 'label' keys.

    - text: \"The text content\"\nlabel: \"The label\"\n

    "},{"location":"bolts/openai/sentiment_analysis/#geniusrise_openai.sentiment_analysis.OpenAISentimentAnalysisFineTuner.load_dataset--tsv","title":"TSV","text":"

    Should contain 'text' and 'label' columns separated by tabs.

    "},{"location":"bolts/openai/sentiment_analysis/#geniusrise_openai.sentiment_analysis.OpenAISentimentAnalysisFineTuner.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'text' and 'label' columns.

    "},{"location":"bolts/openai/sentiment_analysis/#geniusrise_openai.sentiment_analysis.OpenAISentimentAnalysisFineTuner.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'text' and 'label' columns.

    "},{"location":"bolts/openai/sentiment_analysis/#geniusrise_openai.sentiment_analysis.OpenAISentimentAnalysisFineTuner.load_dataset--feather","title":"Feather","text":"

    Should contain 'text' and 'label' columns.

    "},{"location":"bolts/openai/sentiment_analysis/#geniusrise_openai.sentiment_analysis.OpenAISentimentAnalysisFineTuner.prepare_fine_tuning_data","title":"prepare_fine_tuning_data(data, data_type)","text":"

    Prepare the given data for fine-tuning.

    Parameters:

    Name Type Description Default data Union[Dataset, DatasetDict, Optional[Dataset]]

    The dataset to prepare.

    required data_type str

    Either 'train' or 'eval' to specify the type of data.

    required

    Raises:

    Type Description ValueError

    If data_type is not 'train' or 'eval'.

    "},{"location":"bolts/openai/summarization/","title":"Sentiment Analysis Fine Tuner","text":"

    Bases: OpenAIFineTuner

    A bolt for fine-tuning OpenAI models for summarization tasks.

    This bolt uses the OpenAI API to fine-tune a pre-trained model for summarization.

    Parameters:

    Name Type Description Default input BatchInput

    The batch input data.

    required output BatchOutput

    The output data.

    required state State

    The state manager.

    required

    CLI Usage:

        genius HuggingFaceCommonsenseReasoningFineTuner rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder train \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder model \\\nfine_tune \\\n--args model_name=my_model tokenizer_name=my_tokenizer num_train_epochs=3 per_device_train_batch_size=8\n

    YAML Configuration:

        version: \"1\"\nbolts:\nmy_fine_tuner:\nname: \"HuggingFaceCommonsenseReasoningFineTuner\"\nmethod: \"fine_tune\"\nargs:\nmodel_name: \"my_model\"\ntokenizer_name: \"my_tokenizer\"\nnum_train_epochs: 3\nper_device_train_batch_size: 8\ndata_max_length: 512\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\nfolder: \"my_dataset\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\nfolder: \"my_model\"\ndeploy:\ntype: k8s\nargs:\nkind: deployment\nname: my_fine_tuner\ncontext_name: arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\nnamespace: geniusrise\nimage: geniusrise/geniusrise\nkube_config_path: ~/.kube/config\n
    Supported Data Formats
    • JSONL
    • CSV
    • Parquet
    • JSON
    • XML
    • YAML
    • TSV
    • Excel (.xls, .xlsx)
    • SQLite (.db)
    • Feather
    "},{"location":"bolts/openai/summarization/#geniusrise_openai.summarization.OpenAISummarizationFineTuner.load_dataset","title":"load_dataset(dataset_path, **kwargs)","text":"

    Load a dataset from a directory.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory.

    required **kwargs Any

    Additional keyword arguments.

    {}

    Returns:

    Type Description Optional[DatasetDict]

    Dataset | DatasetDict: The loaded dataset.

    "},{"location":"bolts/openai/summarization/#geniusrise_openai.summarization.OpenAISummarizationFineTuner.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"bolts/openai/summarization/#geniusrise_openai.summarization.OpenAISummarizationFineTuner.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\"text\": \"The text content\", \"summary\": \"The summary\"}\n

    "},{"location":"bolts/openai/summarization/#geniusrise_openai.summarization.OpenAISummarizationFineTuner.load_dataset--csv","title":"CSV","text":"

    Should contain 'text' and 'summary' columns.

    text,summary\n\"The text content\",\"The summary\"\n

    "},{"location":"bolts/openai/summarization/#geniusrise_openai.summarization.OpenAISummarizationFineTuner.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'text' and 'summary' columns.

    "},{"location":"bolts/openai/summarization/#geniusrise_openai.summarization.OpenAISummarizationFineTuner.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'text' and 'summary' keys.

    [{\"text\": \"The text content\", \"summary\": \"The summary\"}]\n

    "},{"location":"bolts/openai/summarization/#geniusrise_openai.summarization.OpenAISummarizationFineTuner.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'text' and 'summary' child elements.

    <record>\n<text>The text content</text>\n<summary>The summary</summary>\n</record>\n

    "},{"location":"bolts/openai/summarization/#geniusrise_openai.summarization.OpenAISummarizationFineTuner.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'text' and 'summary' keys.

    - text: \"The text content\"\nsummary: \"The summary\"\n

    "},{"location":"bolts/openai/summarization/#geniusrise_openai.summarization.OpenAISummarizationFineTuner.load_dataset--tsv","title":"TSV","text":"

    Should contain 'text' and 'summary' columns separated by tabs.

    "},{"location":"bolts/openai/summarization/#geniusrise_openai.summarization.OpenAISummarizationFineTuner.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'text' and 'summary' columns.

    "},{"location":"bolts/openai/summarization/#geniusrise_openai.summarization.OpenAISummarizationFineTuner.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'text' and 'summary' columns.

    "},{"location":"bolts/openai/summarization/#geniusrise_openai.summarization.OpenAISummarizationFineTuner.load_dataset--feather","title":"Feather","text":"

    Should contain 'text' and 'summary' columns.

    "},{"location":"bolts/openai/summarization/#geniusrise_openai.summarization.OpenAISummarizationFineTuner.prepare_fine_tuning_data","title":"prepare_fine_tuning_data(data, data_type)","text":"

    Prepare the given data for fine-tuning.

    Parameters:

    Name Type Description Default data Dataset | DatasetDict | Optional[Dataset]

    The dataset to prepare.

    required data_type str

    Either 'train' or 'eval' to specify the type of data.

    required

    Raises:

    Type Description ValueError

    If data_type is not 'train' or 'eval'.

    "},{"location":"bolts/openai/translation/","title":"Sentiment Analysis Fine Tuner","text":"

    Bases: OpenAIFineTuner

    A bolt for fine-tuning OpenAI models for translation tasks.

    This bolt uses the OpenAI API to fine-tune a pre-trained model for translation.

    Parameters:

    Name Type Description Default input BatchInput

    The batch input data.

    required output BatchOutput

    The output data.

    required state State

    The state manager.

    required

    CLI Usage:

        genius HuggingFaceCommonsenseReasoningFineTuner rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder train \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder model \\\nfine_tune \\\n--args model_name=my_model tokenizer_name=my_tokenizer num_train_epochs=3 per_device_train_batch_size=8\n

    YAML Configuration:

        version: \"1\"\nbolts:\nmy_fine_tuner:\nname: \"HuggingFaceCommonsenseReasoningFineTuner\"\nmethod: \"fine_tune\"\nargs:\nmodel_name: \"my_model\"\ntokenizer_name: \"my_tokenizer\"\nnum_train_epochs: 3\nper_device_train_batch_size: 8\ndata_max_length: 512\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\nfolder: \"my_dataset\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\nfolder: \"my_model\"\ndeploy:\ntype: k8s\nargs:\nkind: deployment\nname: my_fine_tuner\ncontext_name: arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\nnamespace: geniusrise\nimage: geniusrise/geniusrise\nkube_config_path: ~/.kube/config\n
    Supported Data Formats
    • JSONL
    • CSV
    • Parquet
    • JSON
    • XML
    • YAML
    • TSV
    • Excel (.xls, .xlsx)
    • SQLite (.db)
    • Feather
    "},{"location":"bolts/openai/translation/#geniusrise_openai.translation.OpenAITranslationFineTuner.load_dataset","title":"load_dataset(dataset_path, origin='en', target='fr', **kwargs)","text":"

    Load a dataset from a directory.

    "},{"location":"bolts/openai/translation/#geniusrise_openai.translation.OpenAITranslationFineTuner.load_dataset--supported-data-formats-and-structures-for-translation-tasks","title":"Supported Data Formats and Structures for Translation Tasks:","text":""},{"location":"bolts/openai/translation/#geniusrise_openai.translation.OpenAITranslationFineTuner.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\n\"translation\": {\n\"en\": \"English text\",\n\"fr\": \"French text\"\n}\n}\n

    "},{"location":"bolts/openai/translation/#geniusrise_openai.translation.OpenAITranslationFineTuner.load_dataset--csv","title":"CSV","text":"

    Should contain 'en' and 'fr' columns.

    en,fr\n\"English text\",\"French text\"\n

    "},{"location":"bolts/openai/translation/#geniusrise_openai.translation.OpenAITranslationFineTuner.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'en' and 'fr' columns.

    "},{"location":"bolts/openai/translation/#geniusrise_openai.translation.OpenAITranslationFineTuner.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'en' and 'fr' keys.

    [\n{\n\"en\": \"English text\",\n\"fr\": \"French text\"\n}\n]\n

    "},{"location":"bolts/openai/translation/#geniusrise_openai.translation.OpenAITranslationFineTuner.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'en' and 'fr' child elements.

    <record>\n<en>English text</en>\n<fr>French text</fr>\n</record>\n

    "},{"location":"bolts/openai/translation/#geniusrise_openai.translation.OpenAITranslationFineTuner.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'en' and 'fr' keys.

    - en: \"English text\"\nfr: \"French text\"\n

    "},{"location":"bolts/openai/translation/#geniusrise_openai.translation.OpenAITranslationFineTuner.load_dataset--tsv","title":"TSV","text":"

    Should contain 'en' and 'fr' columns separated by tabs.

    "},{"location":"bolts/openai/translation/#geniusrise_openai.translation.OpenAITranslationFineTuner.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'en' and 'fr' columns.

    "},{"location":"bolts/openai/translation/#geniusrise_openai.translation.OpenAITranslationFineTuner.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'en' and 'fr' columns.

    "},{"location":"bolts/openai/translation/#geniusrise_openai.translation.OpenAITranslationFineTuner.load_dataset--feather","title":"Feather","text":"

    Should contain 'en' and 'fr' columns.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the directory containing the dataset files.

    required max_length int

    The maximum length for tokenization. Defaults to 512.

    required origin str

    The origin language. Defaults to 'en'.

    'en' target str

    The target language. Defaults to 'fr'.

    'fr' **kwargs

    Additional keyword arguments.

    {}

    Returns:

    Name Type Description DatasetDict Dataset | DatasetDict | Optional[Dataset]

    The loaded dataset.

    "},{"location":"bolts/openai/translation/#geniusrise_openai.translation.OpenAITranslationFineTuner.prepare_fine_tuning_data","title":"prepare_fine_tuning_data(data, data_type)","text":"

    Prepare the given data for fine-tuning.

    Parameters:

    Name Type Description Default data Dataset | DatasetDict | Optional[Dataset]

    The dataset to prepare.

    required data_type str

    Either 'train' or 'eval' to specify the type of data.

    required

    Raises:

    Type Description ValueError

    If data_type is not 'train' or 'eval'.

    "},{"location":"core/airflow/","title":"Airflow Deployment","text":"

    AirflowRunner is a utility for managing and orchestrating Airflow DAGs. It is designed to provide a command-line interface (CLI) for creating, describing, showing, deleting, and getting the status of Airflow DAGs.

    This class uses the Airflow models to interact with DAGs and DockerOperator to run tasks in Docker containers. It is aimed to simplify the deployment and management of Airflow tasks, providing a straightforward way to deploy DAGs with Docker tasks from the command line.

    CLI Usage

    genius airflow sub-command

    Sub-commands
    • create: Create a new DAG with the given parameters and Docker task. genius airflow create [options]
    • describe: Describe a specific DAG by its ID. genius airflow describe --dag_id example_dag
    • show: Show all available DAGs in the Airflow environment. genius airflow show
    • delete: Delete a specific DAG by its ID. genius airflow delete --dag_id example_dag
    • status: Get the status of a specific DAG by its ID. genius airflow status --dag_id example_dag --airflow_api_base_url http://localhost:8080/api/v1

    Each sub-command supports various options to specify the details of the DAG or the Docker task, such as the schedule interval, start date, owner, image, command, and more.

    Example

    Creating a new DAG:

    genius airflow create --dag_directory ~/airflow/dags \\\n--dag_id my_dag \\\n--image python:3.10-slim \\\n--command \"echo Hello World\"\n

    Attributes:

    Name Type Description dag_directory str

    Directory where DAGs are stored. This path should be known to Airflow.

    Methods
    • create: Method to create a new DAG based on the provided parameters and template.
    • describe: Method to describe a specific DAG by its ID, showing details like tasks and schedule.
    • show: Method to list all available DAGs.
    • delete: Method to remove a specific DAG by its ID from the directory.
    • status: Method to fetch and display the status of a specific DAG using Airflow's REST API.
    Note
    • Ensure that the Airflow environment is properly configured and the specified DAG directory is correct.
    • Make sure that the Airflow REST API base URL is accessible if using the status command.
    "},{"location":"core/airflow/#runners.airflow.generate.AirflowRunner.__init__","title":"__init__()","text":"

    Initialize the AirflowRunner class for managing Airflow DAGs.

    "},{"location":"core/airflow/#runners.airflow.generate.AirflowRunner.create","title":"create(args)","text":"

    Create a new DAG with a Docker task using the provided arguments.

    Parameters:

    Name Type Description Default args Namespace

    Namespace containing all the arguments needed for creating the DAG.

    required"},{"location":"core/airflow/#runners.airflow.generate.AirflowRunner.delete","title":"delete(dag_id)","text":"

    Delete a specific DAG by removing its file from the DAG directory.

    Parameters:

    Name Type Description Default dag_id str

    The ID of the DAG to delete.

    required"},{"location":"core/airflow/#runners.airflow.generate.AirflowRunner.describe","title":"describe(dag_id)","text":"

    Describe the details of a specific DAG.

    Parameters:

    Name Type Description Default dag_id str

    The ID of the DAG to describe.

    required

    Returns:

    Type Description None

    The DAG object if found, None otherwise.

    "},{"location":"core/airflow/#runners.airflow.generate.AirflowRunner.run","title":"run(args)","text":"

    Execute the command based on the parsed arguments.

    "},{"location":"core/airflow/#runners.airflow.generate.AirflowRunner.show","title":"show()","text":"

    Show all available DAGs by listing their IDs.

    "},{"location":"core/airflow/#runners.airflow.generate.AirflowRunner.status","title":"status(dag_id, airflow_api_base_url)","text":"

    Get the status of a specific DAG using Airflow's REST API.

    Parameters:

    Name Type Description Default dag_id str

    The ID of the DAG to get the status of.

    required airflow_api_base_url str

    URL of airflow for calling its APIs.

    required"},{"location":"core/cli_boltctl/","title":"Boltctl","text":"

    The main bolt controller

    "},{"location":"core/cli_boltctl/#cli.boltctl.BoltCtl","title":"BoltCtl","text":"

    Class for managing bolts end-to-end from the command line.

    "},{"location":"core/cli_boltctl/#cli.boltctl.BoltCtl.__init__","title":"__init__(discovered_bolt)","text":"

    Initialize BoltCtl with a DiscoveredBolt object.

    Parameters:

    Name Type Description Default discovered_bolt DiscoveredBolt

    DiscoveredBolt object used to create and manage bolts.

    required"},{"location":"core/cli_boltctl/#cli.boltctl.BoltCtl.create_bolt","title":"create_bolt(input_type, output_type, state_type, id, **kwargs)","text":"

    Create a bolt of a specific type.

    Parameters:

    Name Type Description Default input_type str

    The type of input (\"batch\" or \"streaming\").

    required output_type str

    The type of output (\"batch\" or \"streaming\").

    required state_type str

    The type of state manager (\"none\", \"redis\", \"postgres\", or \"dynamodb\").

    required **kwargs

    Additional keyword arguments for initializing the bolt.

    Keyword Arguments:\n    Batch input:\n    - input_folder (str): The input folder argument.\n    - input_s3_bucket (str): The input bucket argument.\n    - input_s3_folder (str): The input S3 folder argument.\n    Batch output:\n    - output_folder (str): The output folder argument.\n    - output_s3_bucket (str): The output bucket argument.\n    - output_s3_folder (str): The output S3 folder argument.\n    Streaming input:\n    - input_kafka_cluster_connection_string (str): The input Kafka servers argument.\n    - input_kafka_topic (str): The input kafka topic argument.\n    - input_kafka_consumer_group_id (str): The Kafka consumer group id.\n    Streaming output:\n    - output_kafka_cluster_connection_string (str): The output Kafka servers argument.\n    - output_kafka_topic (str): The output kafka topic argument.\n    Redis state manager config:\n    - redis_host (str): The Redis host argument.\n    - redis_port (str): The Redis port argument.\n    - redis_db (str): The Redis database argument.\n    Postgres state manager config:\n    - postgres_host (str): The PostgreSQL host argument.\n    - postgres_port (str): The PostgreSQL port argument.\n    - postgres_user (str): The PostgreSQL user argument.\n    - postgres_password (str): The PostgreSQL password argument.\n    - postgres_database (str): The PostgreSQL database argument.\n    - postgres_table (str): The PostgreSQL table argument.\n    DynamoDB state manager config:\n    - dynamodb_table_name (str): The DynamoDB table name argument.\n    - dynamodb_region_name (str): The DynamoDB region name argument.\n

    {}

    Returns:

    Name Type Description Bolt Bolt

    The created bolt.

    "},{"location":"core/cli_boltctl/#cli.boltctl.BoltCtl.create_parser","title":"create_parser(parser)","text":"

    Add arguments to the command-line parser for managing the bolt.

    Parameters:

    Name Type Description Default parser argparse.ArgumentParser

    Command-line parser.

    required"},{"location":"core/cli_boltctl/#cli.boltctl.BoltCtl.deploy_bolt","title":"deploy_bolt(args)","text":"

    Deploy a spout of a specific type.

    Parameters:

    Name Type Description Default **kwargs

    Additional keyword arguments for initializing the spout.

    Keyword Arguments:\n    Batch input:\n    - input_folder (str): The input folder argument.\n    - input_s3_bucket (str): The input bucket argument.\n    - input_s3_folder (str): The input S3 folder argument.\n    Batch outupt:\n    - output_folder (str): The output folder argument.\n    - output_s3_bucket (str): The output bucket argument.\n    - output_s3_folder (str): The output S3 folder argument.\n    Streaming input:\n    - input_kafka_cluster_connection_string (str): The input Kafka servers argument.\n    - input_kafka_topic (str): The input kafka topic argument.\n    - input_kafka_consumer_group_id (str): The Kafka consumer group id.\n    Streaming output:\n    - output_kafka_cluster_connection_string (str): The output Kafka servers argument.\n    - output_kafka_topic (str): The output kafka topic argument.\n    Redis state manager config:\n    - redis_host (str): The host address for the Redis server.\n    - redis_port (int): The port number for the Redis server.\n    - redis_db (int): The Redis database to be used.\n    Postgres state manager config:\n    - postgres_host (str): The host address for the PostgreSQL server.\n    - postgres_port (int): The port number for the PostgreSQL server.\n    - postgres_user (str): The username for the PostgreSQL server.\n    - postgres_password (str): The password for the PostgreSQL server.\n    - postgres_database (str): The PostgreSQL database to be used.\n    - postgres_table (str): The PostgreSQL table to be used.\n    DynamoDB state manager config:\n    - dynamodb_table_name (str): The name of the DynamoDB table.\n    - dynamodb_region_name (str): The AWS region for DynamoDB.\n    Deployment\n    - k8s_kind (str): Kind opf kubernetes resource to be deployed as, choices are \"deployment\", \"service\", \"job\", \"cron_job\"\n    - k8s_name (str): Name of the Kubernetes resource.\n    - k8s_image (str): Docker image for the Kubernetes resource.\n    - k8s_replicas (int): Number of replicas.\n    - k8s_env_vars (json): Environment variables as a JSON string.\n    - k8s_cpu (str): CPU requirements.\n    - k8s_memory (str): Memory requirements.\n    - k8s_storage (str): Storage requirements.\n    - k8s_gpu (str): GPU requirements.\n    - k8s_kube_config_path (str): Name of the Kubernetes cluster local config.\n    - k8s_api_key (str): GPU requirements.\n    - k8s_api_host (str): GPU requirements.\n    - k8s_verify_ssl (str): GPU requirements.\n    - k8s_ssl_ca_cert (str): GPU requirements.\n    - k8s_cluster_name (str): Name of the Kubernetes cluster.\n    - k8s_context_name (str): Name of the kubeconfig context.\n    - k8s_namespace (str): Kubernetes namespace.\", default=\"default\n    - k8s_labels (json): Labels for Kubernetes resources, as a JSON string.\n    - k8s_annotations (json): Annotations for Kubernetes resources, as a JSON string.\n    - k8s_port (int): Port to run the spout on as a service.\n    - k8s_target_port (int): Port to expose the spout on as a service.\n    - k8s_schedule (str): Schedule to run the spout on as a cron job.\n

    required"},{"location":"core/cli_boltctl/#cli.boltctl.BoltCtl.execute_bolt","title":"execute_bolt(bolt, method_name, *args, **kwargs)","text":"

    Execute a method of a bolt.

    Parameters:

    Name Type Description Default bolt Bolt

    The bolt to execute.

    required method_name str

    The name of the method to execute.

    required *args

    Positional arguments to pass to the method.

    () **kwargs

    Keyword arguments to pass to the method.

    {}

    Returns:

    Name Type Description Any

    The result of the method.

    "},{"location":"core/cli_boltctl/#cli.boltctl.BoltCtl.run","title":"run(args)","text":"

    Run the command-line interface.

    Parameters:

    Name Type Description Default args argparse.Namespace

    Parsed command-line arguments.

    required"},{"location":"core/cli_discover/","title":"Discover","text":"

    Module discovery

    "},{"location":"core/cli_discover/#cli.discover.Discover","title":"Discover","text":""},{"location":"core/cli_discover/#cli.discover.Discover.__init__","title":"__init__(directory=None)","text":"

    Initialize the Discover class.

    "},{"location":"core/cli_discover/#cli.discover.Discover.discover_geniusrise_installed_modules","title":"discover_geniusrise_installed_modules()","text":"

    Discover installed geniusrise modules from Python path directories.

    "},{"location":"core/cli_discover/#cli.discover.Discover.find_classes","title":"find_classes(module)","text":"

    Discover spout/bolt classes in a module.

    Parameters:

    Name Type Description Default module Any

    Module to scan for spout/bolt classes.

    required"},{"location":"core/cli_discover/#cli.discover.Discover.get_geniusignore_patterns","title":"get_geniusignore_patterns(directory) staticmethod","text":"

    Read the .geniusignore file and return a list of patterns to ignore.

    Parameters:

    Name Type Description Default directory str

    Directory containing the .geniusignore file.

    required

    Returns:

    Type Description List[str]

    List[str]: List of patterns to ignore.

    "},{"location":"core/cli_discover/#cli.discover.Discover.get_init_args","title":"get_init_args(cls)","text":"

    Extract initialization arguments of a class.

    Parameters:

    Name Type Description Default cls type

    Class to extract initialization arguments from.

    required

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: Initialization arguments.

    "},{"location":"core/cli_discover/#cli.discover.Discover.import_module","title":"import_module(path)","text":"

    Import a module given its path.

    Parameters:

    Name Type Description Default path str

    Path to the module.

    required

    Returns:

    Name Type Description Any

    Imported module.

    "},{"location":"core/cli_discover/#cli.discover.Discover.scan_directory","title":"scan_directory(directory=None)","text":"

    Scan for spouts/bolts in installed extensions and user's codebase.

    Parameters:

    Name Type Description Default directory Optional[str]

    Directory to scan for user-defined spouts/bolts.

    None

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: Discovered spouts/bolts.

    "},{"location":"core/cli_dockerctl/","title":"Dockerctl","text":""},{"location":"core/cli_dockerctl/#cli.dockerctl.DockerCtl","title":"DockerCtl","text":"

    This class manages the creation and uploading of Docker containers.

    Attributes:

    Name Type Description base_image str

    The base image to use for the Docker container.

    workdir str

    The working directory in the Docker container.

    local_dir str

    The local directory to copy into the Docker container.

    packages List[str]

    List of packages to install in the Docker container.

    os_packages List[str]

    List of OS packages to install in the Docker container.

    env_vars Dict[str, str]

    Environment variables to set in the Docker container.

    Command-Line Interface

    genius docker package [options]

    Parameters:

    Name Type Description Default - <image_name>

    The name of the Docker image to build and upload.

    required - <repository>

    The container repository to upload to (e.g., \"ECR\", \"DockerHub\", \"Quay\", \"ACR\", \"GCR\").

    required Options
    • --auth: Authentication credentials as a JSON string. Default is an empty JSON object.
    • --base_image: The base image to use for the Docker container. Default is \"nvidia/cuda:12.2.0-runtime-ubuntu20.04\".
    • --workdir: The working directory in the Docker container. Default is \"/app\".
    • --local_dir: The local directory to copy into the Docker container. Default is \".\".
    • --packages: List of Python packages to install in the Docker container. Default is an empty list.
    • --os_packages: List of OS packages to install in the Docker container. Default is an empty list.
    • --env_vars: Environment variables to set in the Docker container. Default is an empty dictionary.
    Authentication Details
    • ECR: {\"aws_region\": \"ap-south-1\", \"aws_secret_access_key\": \"aws_key\", \"aws_access_key_id\": \"aws_secret\"}
    • DockerHub: {\"dockerhub_username\": \"username\", \"dockerhub_password\": \"password\"}
    • ACR: {\"acr_username\": \"username\", \"acr_password\": \"password\", \"acr_login_server\": \"login_server\"}
    • GCR: {\"gcr_key_file_path\": \"/path/to/keyfile.json\", \"gcr_repository\": \"repository\"}
    • Quay: {\"quay_username\": \"username\", \"quay_password\": \"password\"}
    "},{"location":"core/cli_dockerctl/#cli.dockerctl.DockerCtl--examples","title":"Examples","text":""},{"location":"core/cli_dockerctl/#cli.dockerctl.DockerCtl--uploading-to-ecr-amazon-elastic-container-registry","title":"Uploading to ECR (Amazon Elastic Container Registry)","text":"
    genius docker package geniusrise ecr --auth '{\"aws_region\": \"ap-south-1\"}'\n
    "},{"location":"core/cli_dockerctl/#cli.dockerctl.DockerCtl--uploading-to-dockerhub","title":"Uploading to DockerHub","text":"
    genius docker package geniusrise dockerhub --auth '{\"dockerhub_username\": \"username\", \"dockerhub_password\": \"password\"}'\n

    This is how we upload to dockerhub:

    export DOCKERHUB_USERNAME=\nexport DOCKERHUB_PASSWORD=\ngenius docker package geniusrise dockerhub \\\n--packages geniusrise-listeners geniusrise-databases geniusrise-huggingface geniusrise-openai \\\n--os_packages libmysqlclient-dev libldap2-dev libsasl2-dev libssl-dev\n
    genius docker package geniusrise-core dockerhub\n
    "},{"location":"core/cli_dockerctl/#cli.dockerctl.DockerCtl--uploading-to-acr-azure-container-registry","title":"Uploading to ACR (Azure Container Registry)","text":"
    genius docker package geniusrise acr --auth '{\"acr_username\": \"username\", \"acr_password\": \"password\", \"acr_login_server\": \"login_server\"}'\n
    "},{"location":"core/cli_dockerctl/#cli.dockerctl.DockerCtl--uploading-to-gcr-google-container-registry","title":"Uploading to GCR (Google Container Registry)","text":"
    genius docker package geniusrise gcr --auth '{\"gcr_key_file_path\": \"/path/to/keyfile.json\", \"gcr_repository\": \"repository\"}'\n
    "},{"location":"core/cli_dockerctl/#cli.dockerctl.DockerCtl.__init__","title":"__init__()","text":"

    Initialize the DockerContainerManager with logging.

    "},{"location":"core/cli_dockerctl/#cli.dockerctl.DockerCtl.build_image","title":"build_image(image_name, dockerfile_path)","text":"

    Build a Docker image based on the provided Dockerfile.

    Parameters:

    Name Type Description Default image_name str

    The name to give to the built Docker image.

    required dockerfile_path str

    The path to the Dockerfile to use for building the image.

    required

    Returns:

    Name Type Description bool

    True if the build was successful, False otherwise.

    "},{"location":"core/cli_dockerctl/#cli.dockerctl.DockerCtl.create_dockerfile","title":"create_dockerfile()","text":"

    Create a Dockerfile based on the class attributes.

    Returns:

    Name Type Description str str

    The path to the created Dockerfile.

    "},{"location":"core/cli_dockerctl/#cli.dockerctl.DockerCtl.create_parser","title":"create_parser(parser)","text":"

    Add arguments to the command-line parser for managing Docker containers.

    Parameters:

    Name Type Description Default parser argparse.ArgumentParser

    Command-line parser.

    required

    Returns:

    Type Description argparse.ArgumentParser

    argparse.ArgumentParser: The updated parser.

    "},{"location":"core/cli_dockerctl/#cli.dockerctl.DockerCtl.run","title":"run(args)","text":"

    Run the command-line interface.

    Parameters:

    Name Type Description Default args argparse.Namespace

    Parsed command-line arguments.

    required"},{"location":"core/cli_dockerctl/#cli.dockerctl.DockerCtl.upload_to_acr","title":"upload_to_acr(image_name, auth)","text":"

    Upload the Docker image to Azure Container Registry (ACR).

    Parameters:

    Name Type Description Default image_name str

    The name of the Docker image to upload.

    required auth dict

    Authentication credentials for ACR.

    required

    Returns:

    Name Type Description bool bool

    True if the upload was successful, False otherwise.

    "},{"location":"core/cli_dockerctl/#cli.dockerctl.DockerCtl.upload_to_dockerhub","title":"upload_to_dockerhub(image_name, auth)","text":"

    Upload the Docker image to DockerHub.

    Parameters:

    Name Type Description Default image_name str

    The name of the Docker image to upload.

    required auth dict

    Authentication credentials for DockerHub.

    required

    Returns:

    Name Type Description bool bool

    True if the upload was successful, False otherwise.

    "},{"location":"core/cli_dockerctl/#cli.dockerctl.DockerCtl.upload_to_ecr","title":"upload_to_ecr(image_name, auth, ecr_repo=None)","text":"

    Upload the Docker image to Amazon Elastic Container Registry (ECR).

    Parameters:

    Name Type Description Default image_name str

    The name of the Docker image to upload.

    required auth dict

    Authentication credentials for ECR.

    required ecr_repo Optional[str]

    The ECR repository to upload to. If not provided, it will be generated.

    None

    Returns:

    Name Type Description bool bool

    True if the upload was successful, False otherwise.

    "},{"location":"core/cli_dockerctl/#cli.dockerctl.DockerCtl.upload_to_gcr","title":"upload_to_gcr(image_name, auth)","text":"

    Upload the Docker image to Google Container Registry (GCR).

    Parameters:

    Name Type Description Default image_name str

    The name of the Docker image to upload.

    required auth dict

    Authentication credentials for GCR.

    required

    Returns:

    Name Type Description bool bool

    True if the upload was successful, False otherwise.

    "},{"location":"core/cli_dockerctl/#cli.dockerctl.DockerCtl.upload_to_quay","title":"upload_to_quay(image_name, auth)","text":"

    Upload the Docker image to Quay.io.

    Parameters:

    Name Type Description Default image_name str

    The name of the Docker image to upload.

    required auth dict

    Authentication credentials for Quay.io.

    required

    Returns:

    Name Type Description bool bool

    True if the upload was successful, False otherwise.

    "},{"location":"core/cli_dockerctl/#cli.dockerctl.DockerCtl.upload_to_repository","title":"upload_to_repository(image_name, repository, auth={})","text":"

    Upload the Docker image to a specified container repository.

    Parameters:

    Name Type Description Default image_name str

    The name of the Docker image to upload.

    required repository str

    The container repository to upload to (e.g., \"ECR\", \"DockerHub\", \"Quay\").

    required auth dict

    Authentication credentials for the container repository. Defaults to None.

    {}

    Returns:

    Name Type Description bool bool

    True if the upload was successful, False otherwise.

    "},{"location":"core/cli_geniusctl/","title":"Geniusctl","text":"

    The main command line application

    "},{"location":"core/cli_geniusctl/#cli.geniusctl.GeniusCtl","title":"GeniusCtl","text":"

    Main class for managing the geniusrise CLI application.

    "},{"location":"core/cli_geniusctl/#cli.geniusctl.GeniusCtl.__init__","title":"__init__()","text":"

    Initialize GeniusCtl.v

    Parameters:

    Name Type Description Default directory str

    The directory to scan for spouts and bolts.

    required"},{"location":"core/cli_geniusctl/#cli.geniusctl.GeniusCtl.cli","title":"cli()","text":"

    Main function to be called when geniusrise is run from the command line.

    "},{"location":"core/cli_geniusctl/#cli.geniusctl.GeniusCtl.create_parser","title":"create_parser()","text":"

    Create a command-line parser with arguments for managing the application.

    Returns:

    Type Description

    argparse.ArgumentParser: Command-line parser.

    "},{"location":"core/cli_geniusctl/#cli.geniusctl.GeniusCtl.list_spouts_and_bolts","title":"list_spouts_and_bolts(verbose=False)","text":"

    List all discovered spouts and bolts in a table.

    "},{"location":"core/cli_geniusctl/#cli.geniusctl.GeniusCtl.run","title":"run(args)","text":"

    Run the command-line interface.

    Parameters:

    Name Type Description Default args argparse.Namespace

    Parsed command-line arguments.

    required"},{"location":"core/cli_schema/","title":"YAML schema","text":"

    YAML schema definition as pydantic

    "},{"location":"core/cli_schema/#cli.schema.Bolt","title":"Bolt","text":"

    Bases: BaseModel

    This class defines a bolt. A bolt has a name, method, optional arguments, input, output, state, and deployment.

    "},{"location":"core/cli_schema/#cli.schema.Deploy","title":"Deploy","text":"

    Bases: BaseModel

    This class defines the deployment of the spout or bolt. The deployment can be of type k8s or ecs.

    "},{"location":"core/cli_schema/#cli.schema.DeployArgs","title":"DeployArgs","text":"

    Bases: BaseModel

    This class defines the arguments for the deployment. Depending on the type of deployment (k8s, ecs), different arguments are required.

    "},{"location":"core/cli_schema/#cli.schema.ExtraKwargs","title":"ExtraKwargs","text":"

    Bases: BaseModel

    This class is used to handle any extra arguments that are not explicitly defined in the schema.

    "},{"location":"core/cli_schema/#cli.schema.Geniusfile","title":"Geniusfile","text":"

    Bases: BaseModel

    This class defines the overall structure of the YAML file. It includes a version, spouts, and bolts.

    "},{"location":"core/cli_schema/#cli.schema.Input","title":"Input","text":"

    Bases: BaseModel

    This class defines the input of the bolt. The input can be of type batch, streaming, spout, or bolt.

    "},{"location":"core/cli_schema/#cli.schema.InputArgs","title":"InputArgs","text":"

    Bases: BaseModel

    This class defines the arguments for the input. Depending on the type of input (batch, streaming, spout, bolt), different arguments are required.

    "},{"location":"core/cli_schema/#cli.schema.Output","title":"Output","text":"

    Bases: BaseModel

    This class defines the output of the spout or bolt. The output can be of type batch or streaming.

    "},{"location":"core/cli_schema/#cli.schema.OutputArgs","title":"OutputArgs","text":"

    Bases: BaseModel

    This class defines the arguments for the output. Depending on the type of output (batch, streaming), different arguments are required.

    "},{"location":"core/cli_schema/#cli.schema.Spout","title":"Spout","text":"

    Bases: BaseModel

    This class defines a spout. A spout has a name, method, optional arguments, output, state, and deployment.

    "},{"location":"core/cli_schema/#cli.schema.State","title":"State","text":"

    Bases: BaseModel

    This class defines the state of the spout or bolt. The state can be of type none, redis, postgres, or dynamodb.

    "},{"location":"core/cli_schema/#cli.schema.StateArgs","title":"StateArgs","text":"

    Bases: BaseModel

    This class defines the arguments for the state. Depending on the type of state (none, redis, postgres, dynamodb), different arguments are required.

    "},{"location":"core/cli_spoutctl/","title":"Spoutctl","text":"

    The main spout controller

    "},{"location":"core/cli_spoutctl/#cli.spoutctl.SpoutCtl","title":"SpoutCtl","text":"

    Class for managing spouts end-to-end from the command line.

    "},{"location":"core/cli_spoutctl/#cli.spoutctl.SpoutCtl.__init__","title":"__init__(discovered_spout)","text":"

    Initialize SpoutCtl with a DiscoveredSpout object.

    Parameters:

    Name Type Description Default discovered_spout DiscoveredSpout

    DiscoveredSpout object used to create and manage spouts.

    required"},{"location":"core/cli_spoutctl/#cli.spoutctl.SpoutCtl.create_parser","title":"create_parser(parser)","text":"

    Add arguments to the command-line parser for managing the spout.

    Parameters:

    Name Type Description Default parser argparse.ArgumentParser

    Command-line parser.

    required"},{"location":"core/cli_spoutctl/#cli.spoutctl.SpoutCtl.create_spout","title":"create_spout(output_type, state_type, id, **kwargs)","text":"

    Create a spout of a specific type.

    Parameters:

    Name Type Description Default output_type str

    The type of output (\"batch\" or \"streaming\").

    required state_type str

    The type of state manager (\"none\", \"redis\", \"postgres\", or \"dynamodb\").

    required **kwargs

    Additional keyword arguments for initializing the spout.

    Keyword Arguments:\n    Batch output:\n    - output_folder (str): The directory where output files should be stored temporarily.\n    - output_s3_bucket (str): The name of the S3 bucket for output storage.\n    - output_s3_folder (str): The S3 folder for output storage.\n    Streaming output:\n    - output_kafka_topic (str): Kafka output topic for streaming spouts.\n    - output_kafka_cluster_connection_string (str): Kafka connection string for streaming spouts.\n    Stream to Batch output:\n    - output_folder (str): The directory where output files should be stored temporarily.\n    - output_s3_bucket (str): The name of the S3 bucket for output storage.\n    - output_s3_folder (str): The S3 folder for output storage.\n    - buffer_size (int): Number of messages to buffer.\n    Redis state manager config:\n    - redis_host (str): The host address for the Redis server.\n    - redis_port (int): The port number for the Redis server.\n    - redis_db (int): The Redis database to be used.\n    Postgres state manager config:\n    - postgres_host (str): The host address for the PostgreSQL server.\n    - postgres_port (int): The port number for the PostgreSQL server.\n    - postgres_user (str): The username for the PostgreSQL server.\n    - postgres_password (str): The password for the PostgreSQL server.\n    - postgres_database (str): The PostgreSQL database to be used.\n    - postgres_table (str): The PostgreSQL table to be used.\n    DynamoDB state manager config:\n    - dynamodb_table_name (str): The name of the DynamoDB table.\n    - dynamodb_region_name (str): The AWS region for DynamoDB.\n

    {}

    Returns:

    Name Type Description Spout Spout

    The created spout.

    "},{"location":"core/cli_spoutctl/#cli.spoutctl.SpoutCtl.deploy_spout","title":"deploy_spout(args)","text":"

    Deploy a spout of a specific type.

    Parameters:

    Name Type Description Default **kwargs

    Additional keyword arguments for initializing the spout.

    Keyword Arguments:\n    Batch output:\n    - output_folder (str): The directory where output files should be stored temporarily.\n    - output_s3_bucket (str): The name of the S3 bucket for output storage.\n    - output_s3_folder (str): The S3 folder for output storage.\n    Streaming output:\n    - output_kafka_topic (str): Kafka output topic for streaming spouts.\n    - output_kafka_cluster_connection_string (str): Kafka connection string for streaming spouts.\n    Stream to Batch output:\n    - output_folder (str): The directory where output files should be stored temporarily.\n    - output_s3_bucket (str): The name of the S3 bucket for output storage.\n    - output_s3_folder (str): The S3 folder for output storage.\n    - buffer_size (int): Number of messages to buffer.\n    Redis state manager config:\n    - redis_host (str): The host address for the Redis server.\n    - redis_port (int): The port number for the Redis server.\n    - redis_db (int): The Redis database to be used.\n    Postgres state manager config:\n    - postgres_host (str): The host address for the PostgreSQL server.\n    - postgres_port (int): The port number for the PostgreSQL server.\n    - postgres_user (str): The username for the PostgreSQL server.\n    - postgres_password (str): The password for the PostgreSQL server.\n    - postgres_database (str): The PostgreSQL database to be used.\n    - postgres_table (str): The PostgreSQL table to be used.\n    DynamoDB state manager config:\n    - dynamodb_table_name (str): The name of the DynamoDB table.\n    - dynamodb_region_name (str): The AWS region for DynamoDB.\n    Deployment\n    - k8s_kind (str): Kind opf kubernetes resource to be deployed as, choices are \"deployment\", \"service\", \"job\", \"cron_job\"\n    - k8s_name (str): Name of the Kubernetes resource.\n    - k8s_image (str): Docker image for the Kubernetes resource.\n    - k8s_replicas (int): Number of replicas.\n    - k8s_env_vars (json): Environment variables as a JSON string.\n    - k8s_cpu (str): CPU requirements.\n    - k8s_memory (str): Memory requirements.\n    - k8s_storage (str): Storage requirements.\n    - k8s_gpu (str): GPU requirements.\n    - k8s_kube_config_path (str): Name of the Kubernetes cluster local config.\n    - k8s_api_key (str): GPU requirements.\n    - k8s_api_host (str): GPU requirements.\n    - k8s_verify_ssl (str): GPU requirements.\n    - k8s_ssl_ca_cert (str): GPU requirements.\n    - k8s_cluster_name (str): Name of the Kubernetes cluster.\n    - k8s_context_name (str): Name of the kubeconfig context.\n    - k8s_namespace (str): Kubernetes namespace.\", default=\"default\n    - k8s_labels (json): Labels for Kubernetes resources, as a JSON string.\n    - k8s_annotations (json): Annotations for Kubernetes resources, as a JSON string.\n    - k8s_port (int): Port to run the spout on as a service.\n    - k8s_target_port (int): Port to expose the spout on as a service.\n    - k8s_schedule (str): Schedule to run the spout on as a cron job.\n

    required"},{"location":"core/cli_spoutctl/#cli.spoutctl.SpoutCtl.execute_spout","title":"execute_spout(spout, method_name, *args, **kwargs)","text":"

    Execute a method of a spout.

    Parameters:

    Name Type Description Default spout Spout

    The spout to execute.

    required method_name str

    The name of the method to execute.

    required *args

    Positional arguments to pass to the method.

    () **kwargs

    Keyword arguments to pass to the method.

    {}

    Returns:

    Name Type Description Any

    The result of the method.

    "},{"location":"core/cli_spoutctl/#cli.spoutctl.SpoutCtl.run","title":"run(args)","text":"

    Run the command-line interface.

    Parameters:

    Name Type Description Default args argparse.Namespace

    Parsed command-line arguments.

    required"},{"location":"core/cli_yamlctl/","title":"YamlCtl","text":"

    Control spouts and bolts defined in a YAML file

    "},{"location":"core/cli_yamlctl/#cli.yamlctl.YamlCtl","title":"YamlCtl","text":"

    Command-line interface for managing spouts and bolts based on a YAML configuration.

    The YamlCtl class provides methods to run specific or all spouts and bolts defined in a YAML file. The YAML file's structure is defined by the Geniusfile schema.

    Example YAML structures:

    version: 1\nspouts:\nhttp_listener:\nname: WebhookListener\nmethod: listen\nargs:\nport: 8081\nstate:\ntype: redis\nargs:\nredis_host: \"127.0.0.1\"\nredis_port: 6379\nredis_db: 0\noutput:\ntype: batch\nargs:\nbucket: geniusrise-test\nfolder: train\ndeploy:\ntype: k8s\nargs:\nkind: deployment\nname: webhook-listener\ncontext_name: arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\nnamespace: geniusrise\nimage: geniusrise/geniusrise\nkube_config_path: ~/.kube/config\nbolts:\ntext_classifier:\nname: TextClassifier\nmethod: classify\nargs:\nmodel_name: bert-base-uncased\nstate:\ntype: none\ninput:\ntype: batch\nargs:\nbucket: geniusrise-test\nfolder: train\noutput:\ntype: batch\nargs:\nbucket: geniusrise-test\nfolder: model\ndeploy:\ntype: k8s\nargs:\nkind: deployment\nname: text-classifier\ncontext_name: arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\nnamespace: geniusrise\nimage: geniusrise/geniusrise\nkube_config_path: ~/.kube/config\n
    version: 1\nspouts:\ntwitter_stream:\nname: TwitterStream\nmethod: stream\nargs:\napi_key: \"your_twitter_api_key\"\nhashtags: [\"#AI\", \"#ML\"]\nstate:\ntype: postgres\nargs:\npostgres_host: \"127.0.0.1\"\npostgres_port: 5432\npostgres_user: \"postgres\"\npostgres_password: \"postgres\"\npostgres_database: \"geniusrise\"\npostgres_table: \"twitter_data\"\noutput:\ntype: streaming\nargs:\noutput_topic: twitter_topic\nkafka_servers: \"localhost:9092\"\ndeploy:\ntype: k8s\nargs:\nkind: deployment\nname: twitter-stream\ncontext_name: arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\nnamespace: geniusrise\nimage: geniusrise/geniusrise\nkube_config_path: ~/.kube/config\nbolts:\nsentiment_analyzer:\nname: SentimentAnalyzer\nmethod: analyze\nargs:\nmodel_name: \"sentiment-model\"\nstate:\ntype: dynamodb\nargs:\ndynamodb_table_name: \"SentimentAnalysis\"\ndynamodb_region_name: \"us-east-1\"\ninput:\ntype: streaming\nargs:\ninput_topic: twitter_topic\nkafka_servers: \"localhost:9092\"\ngroup_id: \"sentiment-group\"\noutput:\ntype: batch\nargs:\nbucket: geniusrise-test\nfolder: sentiment_results\ndeploy:\ntype: k8s\nargs:\nkind: deployment\nname: sentiment-analyzer\ncontext_name: arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\nnamespace: geniusrise\nimage: geniusrise/geniusrise\nkube_config_path: ~/.kube/config\n

    Attributes:

    Name Type Description geniusfile Geniusfile

    Parsed YAML configuration.

    spout_ctls Dict[str, SpoutCtl]

    Dictionary of SpoutCtl instances.

    bolt_ctls Dict[str, BoltCtl]

    Dictionary of BoltCtl instances.

    "},{"location":"core/cli_yamlctl/#cli.yamlctl.YamlCtl.__init__","title":"__init__(spout_ctls, bolt_ctls)","text":"

    Initialize YamlCtl with the path to the YAML file and control instances for spouts and bolts.

    Parameters:

    Name Type Description Default spout_ctls Dict[str, SpoutCtl]

    Dictionary of SpoutCtl instances.

    required bolt_ctls Dict[str, BoltCtl]

    Dictionary of BoltCtl instances.

    required"},{"location":"core/cli_yamlctl/#cli.yamlctl.YamlCtl.create_parser","title":"create_parser(parser)","text":"

    Create and return the command-line parser for managing spouts and bolts.

    "},{"location":"core/cli_yamlctl/#cli.yamlctl.YamlCtl.deploy_bolt","title":"deploy_bolt(bolt_name)","text":"

    Deploy a specific bolt based on its name.

    Parameters:

    Name Type Description Default bolt_name str

    Name of the bolt to run.

    required"},{"location":"core/cli_yamlctl/#cli.yamlctl.YamlCtl.deploy_bolts","title":"deploy_bolts()","text":"

    Deploy all bolts defined in the YAML configuration.

    "},{"location":"core/cli_yamlctl/#cli.yamlctl.YamlCtl.deploy_spout","title":"deploy_spout(spout_name)","text":"

    Deploy a specific spout based on its name.

    Parameters:

    Name Type Description Default spout_name str

    Name of the spout to deploy.

    required"},{"location":"core/cli_yamlctl/#cli.yamlctl.YamlCtl.deploy_spouts","title":"deploy_spouts()","text":"

    Deploy all spouts defined in the YAML configuration.

    "},{"location":"core/cli_yamlctl/#cli.yamlctl.YamlCtl.resolve_reference","title":"resolve_reference(input_type, ref_name)","text":"

    Resolve the reference of a bolt's input based on the input type (spout or bolt).

    Parameters:

    Name Type Description Default input_type str

    Type of the input (\"spout\" or \"bolt\").

    required ref_name str

    Name of the spout or bolt to refer to.

    required

    Returns:

    Name Type Description Output

    The output data of the referred spout or bolt.

    "},{"location":"core/cli_yamlctl/#cli.yamlctl.YamlCtl.run","title":"run(args)","text":"

    Run the command-line interface for managing spouts and bolts based on provided arguments. Please note that there is no ordering of the spouts and bolts in the YAML configuration. Each spout and bolt is an independent entity even when connected together.

    Parameters:

    Name Type Description Default args argparse.Namespace

    Parsed command-line arguments.

    required"},{"location":"core/cli_yamlctl/#cli.yamlctl.YamlCtl.run_bolt","title":"run_bolt(bolt_name)","text":"

    Run a specific bolt based on its name.

    Parameters:

    Name Type Description Default bolt_name str

    Name of the bolt to run.

    required"},{"location":"core/cli_yamlctl/#cli.yamlctl.YamlCtl.run_bolts","title":"run_bolts(executor)","text":"

    Run all bolts defined in the YAML configuration.

    "},{"location":"core/cli_yamlctl/#cli.yamlctl.YamlCtl.run_spout","title":"run_spout(spout_name)","text":"

    Run a specific spout based on its name.

    Parameters:

    Name Type Description Default spout_name str

    Name of the spout to run.

    required"},{"location":"core/cli_yamlctl/#cli.yamlctl.YamlCtl.run_spouts","title":"run_spouts(executor)","text":"

    Run all spouts defined in the YAML configuration.

    "},{"location":"core/core_bolt/","title":"Bolt","text":"

    Core Bolt class

    "},{"location":"core/core_bolt/#core.bolt.Bolt","title":"Bolt","text":"

    Bases: Task

    Base class for all bolts.

    A bolt is a component that consumes streams of data, processes them, and possibly emits new data streams.

    "},{"location":"core/core_bolt/#core.bolt.Bolt.__call__","title":"__call__(method_name, *args, **kwargs)","text":"

    Execute a method locally and manage the state.

    Parameters:

    Name Type Description Default method_name str

    The name of the method to execute.

    required *args

    Positional arguments to pass to the method.

    () **kwargs

    Keyword arguments to pass to the method. Keyword Arguments: - Additional keyword arguments specific to the method.

    {}

    Returns:

    Name Type Description Any Any

    The result of the method.

    "},{"location":"core/core_bolt/#core.bolt.Bolt.__init__","title":"__init__(input, output, state, id=None, **kwargs)","text":"

    The Bolt class is a base class for all bolts in the given context. It inherits from the Task class and provides methods for executing tasks both locally and remotely, as well as managing their state, with state management options including in-memory, Redis, PostgreSQL, and DynamoDB, and input and output data for batch, streaming, stream-to-batch, and batch-to-streaming.

    The Bolt class uses the Input, Output and State classes, which are abstract base classes for managing input data, output data and states, respectively. The Input and Output classes each have two subclasses: StreamingInput, BatchInput, StreamingOutput and BatchOutput, which manage streaming and batch input and output data, respectively. The State class is used to get and set state, and it has several subclasses for different types of state managers.

    The Bolt class also uses the ECSManager and K8sManager classes in the execute_remote method, which are used to manage tasks on Amazon ECS and Kubernetes, respectively.

    Usage
    • Create an instance of the Bolt class by providing an Input object, an Output object and a State object.
    • The Input object specifies the input data for the bolt.
    • The Output object specifies the output data for the bolt.
    • The State object handles the management of the bolt's state.
    Example

    input = Input(...) output = Output(...) state = State(...) bolt = Bolt(input, output, state)

    Parameters:

    Name Type Description Default input Input

    The input data.

    required output Output

    The output data.

    required state State

    The state manager.

    required"},{"location":"core/core_bolt/#core.bolt.Bolt.create","title":"create(klass, input_type, output_type, state_type, id=None, **kwargs) staticmethod","text":"

    Create a bolt of a specific type.

    This static method is used to create a bolt of a specific type. It takes in an input type, an output type, a state type, and additional keyword arguments for initializing the bolt.

    The method creates the input, output, and state manager based on the provided types, and then creates and returns a bolt using these configurations.

    Parameters:

    Name Type Description Default klass type

    The Bolt class to create.

    required input_type str

    The type of input (\"batch\" or \"streaming\").

    required output_type str

    The type of output (\"batch\" or \"streaming\").

    required state_type str

    The type of state manager (\"none\", \"redis\", \"postgres\", or \"dynamodb\").

    required **kwargs

    Additional keyword arguments for initializing the bolt.

    Keyword Arguments:\n    Batch input:\n    - input_folder (str): The input folder argument.\n    - input_s3_bucket (str): The input bucket argument.\n    - input_s3_folder (str): The input S3 folder argument.\n    Batch output config:\n    - output_folder (str): The output folder argument.\n    - output_s3_bucket (str): The output bucket argument.\n    - output_s3_folder (str): The output S3 folder argument.\n    Streaming input:\n    - input_kafka_cluster_connection_string (str): The input Kafka servers argument.\n    - input_kafka_topic (str): The input kafka topic argument.\n    - input_kafka_consumer_group_id (str): The Kafka consumer group id.\n    Streaming output:\n    - output_kafka_cluster_connection_string (str): The output Kafka servers argument.\n    - output_kafka_topic (str): The output kafka topic argument.\n    Stream-to-Batch input:\n    - buffer_size (int): Number of messages to buffer.\n    - input_kafka_cluster_connection_string (str): The input Kafka servers argument.\n    - input_kafka_topic (str): The input kafka topic argument.\n    - input_kafka_consumer_group_id (str): The Kafka consumer group id.\n    Batch-to-Streaming input:\n    - buffer_size (int): Number of messages to buffer.\n    - input_folder (str): The input folder argument.\n    - input_s3_bucket (str): The input bucket argument.\n    - input_s3_folder (str): The input S3 folder argument.\n    Stream-to-Batch output:\n    - buffer_size (int): Number of messages to buffer.\n    - output_folder (str): The output folder argument.\n    - output_s3_bucket (str): The output bucket argument.\n    - output_s3_folder (str): The output S3 folder argument.\n    Redis state manager config:\n    - redis_host (str): The Redis host argument.\n    - redis_port (str): The Redis port argument.\n    - redis_db (str): The Redis database argument.\n    Postgres state manager config:\n    - postgres_host (str): The PostgreSQL host argument.\n    - postgres_port (str): The PostgreSQL port argument.\n    - postgres_user (str): The PostgreSQL user argument.\n    - postgres_password (str): The PostgreSQL password argument.\n    - postgres_database (str): The PostgreSQL database argument.\n    - postgres_table (str): The PostgreSQL table argument.\n    DynamoDB state manager config:\n    - dynamodb_table_name (str): The DynamoDB table name argument.\n    - dynamodb_region_name (str): The DynamoDB region name argument.\n

    {}

    Returns:

    Name Type Description Bolt Bolt

    The created bolt.

    Raises:

    Type Description ValueError

    If an invalid input type, output type, or state type is provided.

    "},{"location":"core/core_data_batch_input/","title":"Batch data input","text":"

    Batch input manager

    "},{"location":"core/core_data_batch_input/#core.data.batch_input.BatchInput","title":"BatchInput","text":"

    Bases: Input

    \ud83d\udcc1 BatchInput: Manages batch input data.

    Attributes:

    Name Type Description input_folder str

    Folder to read input files.

    bucket str

    S3 bucket name.

    s3_folder str

    Folder within the S3 bucket.

    partition_scheme Optional[str]

    Partitioning scheme for S3, e.g., \"year/month/day\".

    Raises:

    Type Description FileNotExistError

    If the file does not exist.

    Parameters:

    Name Type Description Default input_folder str

    Folder to read input files from.

    required bucket str

    S3 bucket name.

    required s3_folder str

    Folder within the S3 bucket.

    required partition_scheme Optional[str]

    Partitioning scheme for S3, e.g., \"year/month/day\".

    None Usage"},{"location":"core/core_data_batch_input/#core.data.batch_input.BatchInput--initialize-batchinput","title":"Initialize BatchInput","text":"
    input = BatchInput(\"/path/to/input\", \"my_bucket\", \"s3/folder\")\n
    "},{"location":"core/core_data_batch_input/#core.data.batch_input.BatchInput--get-the-input-folder","title":"Get the input folder","text":"
    folder = input.get()\n
    "},{"location":"core/core_data_batch_input/#core.data.batch_input.BatchInput--save-a-spark-dataframe-to-the-input-folder","title":"Save a Spark DataFrame to the input folder","text":"
    input.from_spark(my_dataframe)\n
    "},{"location":"core/core_data_batch_input/#core.data.batch_input.BatchInput--compose-multiple-batchinput-instances","title":"Compose multiple BatchInput instances","text":"
    composed = input.compose(input1, input2)\n
    "},{"location":"core/core_data_batch_input/#core.data.batch_input.BatchInput--copy-files-from-s3-to-the-input-folder","title":"Copy files from S3 to the input folder","text":"
    input.from_s3()\n
    "},{"location":"core/core_data_batch_input/#core.data.batch_input.BatchInput--collect-metrics","title":"Collect metrics","text":"
    metrics = input.collect_metrics()\n
    "},{"location":"core/core_data_batch_input/#core.data.batch_input.BatchInput.__init__","title":"__init__(input_folder, bucket, s3_folder, partition_scheme=None)","text":"

    Initialize a new BatchInput instance.

    "},{"location":"core/core_data_batch_input/#core.data.batch_input.BatchInput.collect_metrics","title":"collect_metrics()","text":"

    Collect and return metrics, then clear them for future collection.

    Returns:

    Type Description Dict[str, float]

    Dict[str, float]: Dictionary containing metrics.

    "},{"location":"core/core_data_batch_input/#core.data.batch_input.BatchInput.compose","title":"compose(*inputs)","text":"

    Compose multiple BatchInput instances by merging their input folders.

    Parameters:

    Name Type Description Default inputs Input

    Variable number of BatchInput instances.

    ()

    Returns:

    Type Description Union[bool, str]

    Union[bool, str]: True if successful, error message otherwise.

    "},{"location":"core/core_data_batch_input/#core.data.batch_input.BatchInput.from_kafka","title":"from_kafka(input_topic, kafka_cluster_connection_string, nr_messages=1000, group_id='geniusrise', partition_scheme=None)","text":"

    Consume messages from a Kafka topic and save them as JSON files in the input folder. Stops consuming after reaching the latest message or the specified number of messages.

    Parameters:

    Name Type Description Default input_topic str

    Kafka topic to consume data from.

    required kafka_cluster_connection_string str

    Connection string for the Kafka cluster.

    required nr_messages int

    Number of messages to consume. Defaults to 1000.

    1000 group_id str

    Kafka consumer group ID. Defaults to \"geniusrise\".

    'geniusrise' partition_scheme Optional[str]

    Optional partitioning scheme for Kafka, e.g., \"year/month/day\".

    None

    Returns:

    Name Type Description str str

    The path to the folder where the consumed messages are saved as JSON files.

    Raises:

    Type Description KafkaConnectionError

    If unable to connect to Kafka.

    Exception

    If any other error occurs during processing.

    "},{"location":"core/core_data_batch_input/#core.data.batch_input.BatchInput.from_s3","title":"from_s3(bucket=None, s3_folder=None)","text":"

    Copy contents from a given S3 bucket and location to the input folder.

    Raises:

    Type Description Exception

    If the input folder is not specified.

    "},{"location":"core/core_data_batch_input/#core.data.batch_input.BatchInput.from_spark","title":"from_spark(df)","text":"

    Save the contents of a Spark DataFrame to the input folder with optional partitioning.

    Parameters:

    Name Type Description Default df DataFrame

    The Spark DataFrame to save.

    required

    Raises:

    Type Description FileNotExistError

    If the input folder does not exist.

    "},{"location":"core/core_data_batch_input/#core.data.batch_input.BatchInput.get","title":"get()","text":"

    Get the input folder path.

    Returns:

    Name Type Description str str

    The path to the input folder.

    "},{"location":"core/core_data_batch_input/#core.data.batch_input.FileNotExistError","title":"FileNotExistError","text":"

    Bases: Exception

    \u274c Custom exception for file not existing.

    "},{"location":"core/core_data_batch_input/#core.data.batch_input.KafkaConnectionError","title":"KafkaConnectionError","text":"

    Bases: Exception

    \u274c Custom exception for kafka connection problems.

    "},{"location":"core/core_data_batch_output/","title":"Batch data output","text":"

    Batch output manager

    "},{"location":"core/core_data_batch_output/#core.data.batch_output.BatchOutput","title":"BatchOutput","text":"

    Bases: Output

    \ud83d\udcc1 BatchOutput: Manages batch output data.

    Attributes:

    Name Type Description output_folder str

    Folder to save output files.

    bucket str

    S3 bucket name.

    s3_folder str

    Folder within the S3 bucket.

    partition_scheme Optional[str]

    Partitioning scheme for S3, e.g., \"year/month/day\".

    Raises:

    Type Description FileNotExistError

    If the output folder does not exist.

    Parameters:

    Name Type Description Default output_folder str

    Folder to save output files.

    required bucket str

    S3 bucket name.

    required s3_folder str

    Folder within the S3 bucket.

    required partition_scheme Optional[str]

    Partitioning scheme for S3, e.g., \"year/month/day\".

    None Usage
    # Initialize the BatchOutput instance\nconfig = BatchOutput(\"/path/to/output\", \"my_bucket\", \"s3/folder\", partition_scheme=\"%Y/%m/%d\")\n# Save data to a file\nconfig.save({\"key\": \"value\"}, \"example.json\")\n# Compose multiple BatchOutput instances\nresult = config1.compose(config2, config3)\n# Convert output to a Spark DataFrame\nspark_df = config.to_spark(spark_session)\n# Copy files to a remote S3 bucket\nconfig.to_s3()\n# Flush the output to S3\nconfig.flush()\n# Collect metrics\nmetrics = config.collect_metrics()\n
    "},{"location":"core/core_data_batch_output/#core.data.batch_output.BatchOutput.__init__","title":"__init__(output_folder, bucket, s3_folder, partition_scheme=None)","text":"

    Initialize a new batch output data.

    Parameters:

    Name Type Description Default output_folder str

    Folder to save output files.

    required bucket str

    S3 bucket name.

    required s3_folder str

    Folder within the S3 bucket.

    required"},{"location":"core/core_data_batch_output/#core.data.batch_output.BatchOutput.collect_metrics","title":"collect_metrics()","text":"

    Collect and return metrics, then clear them for future collection.

    Returns:

    Type Description Dict[str, float]

    Dict[str, float]: Dictionary containing metrics.

    "},{"location":"core/core_data_batch_output/#core.data.batch_output.BatchOutput.compose","title":"compose(*outputs)","text":"

    Compose multiple BatchOutput instances by merging their output folders.

    Parameters:

    Name Type Description Default outputs Output

    Variable number of BatchOutput instances.

    ()

    Returns:

    Type Description Union[bool, str]

    Union[bool, str]: True if successful, error message otherwise.

    "},{"location":"core/core_data_batch_output/#core.data.batch_output.BatchOutput.flush","title":"flush()","text":"

    \ud83d\udd04 Flush the output by copying all files and directories from the output folder to a given S3 bucket and folder.

    "},{"location":"core/core_data_batch_output/#core.data.batch_output.BatchOutput.save","title":"save(data, filename=None, **kwargs)","text":"

    \ud83d\udcbe Save data to a file in the output folder.

    Parameters:

    Name Type Description Default data Any

    The data to save.

    required filename Optional[str]

    The filename to use when saving the data to a file.

    None"},{"location":"core/core_data_batch_output/#core.data.batch_output.BatchOutput.to_kafka","title":"to_kafka(output_topic, kafka_cluster_connection_string)","text":"

    Produce messages to a Kafka topic from the files in the output folder.

    Parameters:

    Name Type Description Default output_topic str

    Kafka topic to produce data to.

    required kafka_cluster_connection_string str

    Connection string for the Kafka cluster.

    required key_serializer Optional[str]

    Serializer for message keys. Defaults to None.

    required

    Raises:

    Type Description KafkaConnectionError

    If unable to connect to Kafka.

    Exception

    If any other error occurs during processing.

    "},{"location":"core/core_data_batch_output/#core.data.batch_output.BatchOutput.to_s3","title":"to_s3()","text":"

    \u2601\ufe0f Recursively copy all files and directories from the output folder to a given S3 bucket and folder.

    "},{"location":"core/core_data_batch_output/#core.data.batch_output.BatchOutput.to_spark","title":"to_spark(spark)","text":"

    Get a Spark DataFrame from the output folder.

    Returns:

    Type Description pyspark.sql.DataFrame

    pyspark.sql.DataFrame: A Spark DataFrame where each row corresponds to a file in the output folder.

    Raises:

    Type Description FileNotExistError

    If the output folder does not exist.

    "},{"location":"core/core_data_batch_output/#core.data.batch_output.FileNotExistError","title":"FileNotExistError","text":"

    Bases: Exception

    \u274c Custom exception for file not existing.

    "},{"location":"core/core_data_batch_output/#core.data.batch_output.KafkaConnectionError","title":"KafkaConnectionError","text":"

    Bases: Exception

    \u274c Custom exception for Kafka connection problems.

    "},{"location":"core/core_data_input/","title":"Data input","text":"

    Input manager base class

    "},{"location":"core/core_data_input/#core.data.input.Input","title":"Input","text":"

    Bases: ABC

    Abstract class for managing input data.

    Attributes:

    Name Type Description log logging.Logger

    Logger instance.

    "},{"location":"core/core_data_input/#core.data.input.Input.__add__","title":"__add__(*inputs)","text":"

    Compose multiple inputs.

    Parameters:

    Name Type Description Default inputs Input

    Variable number of Input instances.

    ()

    Returns:

    Type Description Union[bool, str]

    Union[bool, str]: True if successful, error message otherwise.

    "},{"location":"core/core_data_input/#core.data.input.Input.collect_metrics","title":"collect_metrics() abstractmethod","text":"

    Collect metrics like latency.

    Returns:

    Type Description Dict[str, float]

    Dict[str, float]: A dictionary containing metrics.

    "},{"location":"core/core_data_input/#core.data.input.Input.compose","title":"compose(*inputs) abstractmethod","text":"

    Compose multiple inputs.

    Parameters:

    Name Type Description Default inputs Input

    Variable number of Input instances.

    ()

    Returns:

    Type Description Union[bool, str]

    Union[bool, str]: True if successful, error message otherwise.

    "},{"location":"core/core_data_input/#core.data.input.Input.get","title":"get() abstractmethod","text":"

    Abstract method to get data from the input source.

    Returns:

    Name Type Description Any Any

    The data from the input source.

    "},{"location":"core/core_data_input/#core.data.input.Input.retryable_get","title":"retryable_get()","text":"

    Retryable get method.

    Returns:

    Name Type Description Any Any

    The data from the input source.

    "},{"location":"core/core_data_output/","title":"Data output","text":"

    Output manager base class

    "},{"location":"core/core_data_output/#core.data.output.Output","title":"Output","text":"

    Bases: ABC

    Abstract base class for managing output data.

    "},{"location":"core/core_data_output/#core.data.output.Output.flush","title":"flush() abstractmethod","text":"

    Flush the output. This method should be implemented by subclasses.

    "},{"location":"core/core_data_output/#core.data.output.Output.save","title":"save(data, **kwargs) abstractmethod","text":"

    Save data to a file or ingest it into a Kafka topic.

    Parameters:

    Name Type Description Default data Any

    The data to save or ingest.

    required filename str

    The filename to use when saving the data to a file.

    required"},{"location":"core/core_data_streaming_input/","title":"Streaming data input","text":"

    Streaming input manager

    "},{"location":"core/core_data_streaming_input/#core.data.streaming_input.KafkaConnectionError","title":"KafkaConnectionError","text":"

    Bases: Exception

    \u274c Custom exception for kafka connection problems.

    "},{"location":"core/core_data_streaming_input/#core.data.streaming_input.StreamingInput","title":"StreamingInput","text":"

    Bases: Input

    \ud83d\udce1 StreamingInput: Manages streaming input data from Kafka and other streaming sources.

    Attributes:

    Name Type Description input_topic str

    Kafka topic to consume data from.

    kafka_cluster_connection_string str

    Connection string for the Kafka cluster.

    group_id str

    Kafka consumer group ID.

    consumer KafkaConsumer

    Kafka consumer instance.

    Usage

    input = StreamingInput(\"my_topic\", \"localhost:9094\") for message in input.get(): print(message.value)

    Parameters:

    Name Type Description Default input_topic str

    Kafka topic to consume data from.

    required kafka_cluster_connection_string str

    Connection string for the Kafka cluster.

    required group_id str

    Kafka consumer group ID. Defaults to \"geniusrise\".

    'geniusrise' **kwargs

    Additional keyword arguments for KafkaConsumer.

    {}

    Raises:

    Type Description KafkaConnectionError

    If unable to connect to Kafka.

    Usage"},{"location":"core/core_data_streaming_input/#core.data.streaming_input.StreamingInput--using-get-method-to-consume-from-kafka","title":"Using get method to consume from Kafka","text":"
    input = StreamingInput(\"my_topic\", \"localhost:9094\")\nconsumer = input.get()\nfor message in consumer:\nprint(message.value)\n
    "},{"location":"core/core_data_streaming_input/#core.data.streaming_input.StreamingInput--using-from_streamz-method-to-process-streamz-dataframe","title":"Using from_streamz method to process streamz DataFrame","text":"
    input = StreamingInput(\"my_topic\", \"localhost:9094\")\nstreamz_df = ...  # Assume this is a streamz DataFrame\nfor row in input.from_streamz(streamz_df):\nprint(row)\n
    "},{"location":"core/core_data_streaming_input/#core.data.streaming_input.StreamingInput--using-from_spark-method-to-process-spark-dataframe","title":"Using from_spark method to process Spark DataFrame","text":"
    input = StreamingInput(\"my_topic\", \"localhost:9094\")\nspark_df = ...  # Assume this is a Spark DataFrame\nmap_func = lambda row: {\"key\": row.key, \"value\": row.value}\nquery_or_rdd = input.from_spark(spark_df, map_func)\n
    "},{"location":"core/core_data_streaming_input/#core.data.streaming_input.StreamingInput--using-compose-method-to-merge-multiple-streaminginput-instances","title":"Using compose method to merge multiple StreamingInput instances","text":"
    input1 = StreamingInput(\"topic1\", \"localhost:9094\")\ninput2 = StreamingInput(\"topic2\", \"localhost:9094\")\nresult = input1.compose(input2)\n
    "},{"location":"core/core_data_streaming_input/#core.data.streaming_input.StreamingInput--using-close-method-to-close-the-kafka-consumer","title":"Using close method to close the Kafka consumer","text":"
    input = StreamingInput(\"my_topic\", \"localhost:9094\")\ninput.close()\n
    "},{"location":"core/core_data_streaming_input/#core.data.streaming_input.StreamingInput--using-seek-method-to-seek-to-a-specific-offset","title":"Using seek method to seek to a specific offset","text":"
    input = StreamingInput(\"my_topic\", \"localhost:9094\")\ninput.seek(42)\n
    "},{"location":"core/core_data_streaming_input/#core.data.streaming_input.StreamingInput--using-commit-method-to-manually-commit-offsets","title":"Using commit method to manually commit offsets","text":"
    input = StreamingInput(\"my_topic\", \"localhost:9094\")\ninput.commit()\n
    "},{"location":"core/core_data_streaming_input/#core.data.streaming_input.StreamingInput--using-collect_metrics-method-to-collect-kafka-metrics","title":"Using collect_metrics method to collect Kafka metrics","text":"
    input = StreamingInput(\"my_topic\", \"localhost:9094\")\nmetrics = input.collect_metrics()\nprint(metrics)\n
    "},{"location":"core/core_data_streaming_input/#core.data.streaming_input.StreamingInput.__init__","title":"__init__(input_topic, kafka_cluster_connection_string, group_id='geniusrise', **kwargs)","text":"

    \ud83d\udca5 Initialize a new streaming input data.

    Parameters:

    Name Type Description Default input_topic str

    Kafka topic to consume data.

    required kafka_cluster_connection_string str

    Kafka cluster connection string.

    required group_id str

    Kafka consumer group id. Defaults to \"geniusrise\".

    'geniusrise'"},{"location":"core/core_data_streaming_input/#core.data.streaming_input.StreamingInput.close","title":"close()","text":"

    \ud83d\udeaa Close the Kafka consumer.

    Raises:

    Type Description Exception

    If an error occurs while closing the consumer.

    "},{"location":"core/core_data_streaming_input/#core.data.streaming_input.StreamingInput.collect_metrics","title":"collect_metrics()","text":"

    \ud83d\udcca Collect metrics related to the Kafka consumer.

    Returns:

    Type Description Dict[str, Union[int, float]]

    Dict[str, Union[int, float]]: A dictionary containing metrics like latency.

    "},{"location":"core/core_data_streaming_input/#core.data.streaming_input.StreamingInput.commit","title":"commit()","text":"

    \u2705 Manually commit offsets.

    Raises:

    Type Description Exception

    If an error occurs while committing offsets.

    "},{"location":"core/core_data_streaming_input/#core.data.streaming_input.StreamingInput.compose","title":"compose(*inputs)","text":"

    Compose multiple StreamingInput instances by merging their iterators.

    Parameters:

    Name Type Description Default inputs StreamingInput

    Variable number of StreamingInput instances.

    ()

    Returns:

    Type Description Union[bool, str]

    Union[bool, str]: True if successful, error message otherwise.

    Caveat

    On merging different topics, other operations such as

    "},{"location":"core/core_data_streaming_input/#core.data.streaming_input.StreamingInput.from_spark","title":"from_spark(spark_df, map_func)","text":"

    Process a Spark DataFrame as a stream, similar to Kafka processing.

    Parameters:

    Name Type Description Default spark_df DataFrame

    The Spark DataFrame to process.

    required map_func Callable[[Row], Any]

    Function to map each row of the DataFrame.

    required

    Returns:

    Type Description Union[StreamingQuery, RDD[Any]]

    Union[StreamingQuery, RDD[Any]]: Returns a StreamingQuery for streaming DataFrames, and an RDD for batch DataFrames.

    Raises:

    Type Description Exception

    If an error occurs during processing.

    "},{"location":"core/core_data_streaming_input/#core.data.streaming_input.StreamingInput.from_streamz","title":"from_streamz(streamz_df, sentinel=None, timeout=5)","text":"

    Process a streamz DataFrame as a stream, similar to Kafka processing.

    Parameters:

    Name Type Description Default streamz_df ZDataFrame

    The streamz DataFrame to process.

    required sentinel Any

    The value that, when received, will stop the generator.

    None timeout int

    The time to wait for an item from the queue before raising an exception.

    5

    Yields:

    Name Type Description Any Any

    Yields each row as a dictionary.

    "},{"location":"core/core_data_streaming_input/#core.data.streaming_input.StreamingInput.get","title":"get()","text":"

    \ud83d\udce5 Get data from the input topic.

    Returns:

    Name Type Description KafkaConsumer KafkaConsumer

    The Kafka consumer.

    Raises:

    Type Description Exception

    If no input source or consumer is specified.

    "},{"location":"core/core_data_streaming_output/","title":"Streaming data output","text":"

    Streaming output manager

    "},{"location":"core/core_data_streaming_output/#core.data.streaming_output.StreamingOutput","title":"StreamingOutput","text":"

    Bases: Output

    \ud83d\udce1 StreamingOutput: Manages streaming output data.

    Attributes:

    Name Type Description output_topic str

    Kafka topic to ingest data.

    producer KafkaProducer

    Kafka producer for ingesting data.

    Usage:

    config = StreamingOutput(\"my_topic\", \"localhost:9094\")\nconfig.save({\"key\": \"value\"}, \"ignored_filename\")\nconfig.flush()\n

    Note: - Ensure the Kafka cluster is running and accessible.

    "},{"location":"core/core_data_streaming_output/#core.data.streaming_output.StreamingOutput.__init__","title":"__init__(output_topic, kafka_servers)","text":"

    Initialize a new streaming output data.

    Parameters:

    Name Type Description Default output_topic str

    Kafka topic to ingest data.

    required kafka_servers str

    Kafka bootstrap servers.

    required"},{"location":"core/core_data_streaming_output/#core.data.streaming_output.StreamingOutput.close","title":"close()","text":"

    \ud83d\udeaa Close the Kafka producer.

    Raises:

    Type Description Exception

    If no Kafka producer is available.

    "},{"location":"core/core_data_streaming_output/#core.data.streaming_output.StreamingOutput.flush","title":"flush()","text":"

    \ud83d\udd04 Flush the output by flushing the Kafka producer.

    Raises:

    Type Description Exception

    If no Kafka producer is available.

    "},{"location":"core/core_data_streaming_output/#core.data.streaming_output.StreamingOutput.partition_available","title":"partition_available(partition)","text":"

    \ud83e\uddd0 Check if a partition is available in the Kafka topic.

    Parameters:

    Name Type Description Default partition int

    The partition to check.

    required

    Returns:

    Name Type Description bool bool

    True if the partition is available, False otherwise.

    Raises:

    Type Description Exception

    If no Kafka producer is available.

    "},{"location":"core/core_data_streaming_output/#core.data.streaming_output.StreamingOutput.save","title":"save(data, **kwargs)","text":"

    \ud83d\udce4 Ingest data into the Kafka topic.

    Parameters:

    Name Type Description Default data Any

    The data to ingest.

    required filename str

    This argument is ignored for streaming outputs.

    required

    Raises:

    Type Description Exception

    If no Kafka producer is available or an error occurs.

    "},{"location":"core/core_data_streaming_output/#core.data.streaming_output.StreamingOutput.save_to_partition","title":"save_to_partition(value, partition)","text":"

    \ud83c\udfaf Send a message to a specific partition in the Kafka topic.

    Parameters:

    Name Type Description Default value Any

    The value of the message.

    required partition int

    The partition to send the message to.

    required

    Raises:

    Type Description Exception

    If no Kafka producer is available or an error occurs.

    "},{"location":"core/core_spout/","title":"Spout","text":"

    Core Spout class

    "},{"location":"core/core_spout/#core.spout.Spout","title":"Spout","text":"

    Bases: Task

    Base class for all spouts.

    "},{"location":"core/core_spout/#core.spout.Spout.__call__","title":"__call__(method_name, *args, **kwargs)","text":"

    Execute a method locally and manage the state.

    Parameters:

    Name Type Description Default method_name str

    The name of the method to execute.

    required *args

    Positional arguments to pass to the method.

    () **kwargs

    Keyword arguments to pass to the method. Keyword Arguments: - Additional keyword arguments specific to the method.

    {}

    Returns:

    Name Type Description Any Any

    The result of the method.

    "},{"location":"core/core_spout/#core.spout.Spout.__init__","title":"__init__(output, state, id=None, **kwargs)","text":"

    The Spout class is a base class for all spouts in the given context. It inherits from the Task class and provides methods for executing tasks both locally and remotely, as well as managing their state, with state management options including in-memory, Redis, PostgreSQL, and DynamoDB, and output data for batch or streaming data.

    The Spout class uses the Output and State classes, which are abstract base classes for managing output data and states, respectively. The Output class has two subclasses: StreamingOutput and BatchOutput, which manage streaming and batch output data, respectively. The State class is used to get and set state, and it has several subclasses for different types of state managers.

    The Spout class also uses the ECSManager and K8sManager classes in the execute_remote method, which are used to manage tasks on Amazon ECS and Kubernetes, respectively.

    Usage
    • Create an instance of the Spout class by providing an Output object and a State object.
    • The Output object specifies the output data for the spout.
    • The State object handles the management of the spout's state.
    Example

    output = Output(...) state = State(...) spout = Spout(output, state)

    Parameters:

    Name Type Description Default output Output

    The output data.

    required state State

    The state manager.

    required"},{"location":"core/core_spout/#core.spout.Spout.create","title":"create(klass, output_type, state_type, id=None, **kwargs) staticmethod","text":"

    Create a spout of a specific type.

    Parameters:

    Name Type Description Default klass type

    The Spout class to create.

    required output_type str

    The type of output (\"batch\" or \"streaming\").

    required state_type str

    The type of state manager (\"none\", \"redis\", \"postgres\", or \"dynamodb\").

    required **kwargs

    Additional keyword arguments for initializing the spout.

    Keyword Arguments:\n    Batch output:\n    - output_folder (str): The directory where output files should be stored temporarily.\n    - output_s3_bucket (str): The name of the S3 bucket for output storage.\n    - output_s3_folder (str): The S3 folder for output storage.\n    Streaming output:\n    - output_kafka_topic (str): Kafka output topic for streaming spouts.\n    - output_kafka_cluster_connection_string (str): Kafka connection string for streaming spouts.\n    Stream to Batch output:\n    - output_folder (str): The directory where output files should be stored temporarily.\n    - output_s3_bucket (str): The name of the S3 bucket for output storage.\n    - output_s3_folder (str): The S3 folder for output storage.\n    - buffer_size (int): Number of messages to buffer.\n    Redis state manager config:\n    - redis_host (str): The host address for the Redis server.\n    - redis_port (int): The port number for the Redis server.\n    - redis_db (int): The Redis database to be used.\n    Postgres state manager config:\n    - postgres_host (str): The host address for the PostgreSQL server.\n    - postgres_port (int): The port number for the PostgreSQL server.\n    - postgres_user (str): The username for the PostgreSQL server.\n    - postgres_password (str): The password for the PostgreSQL server.\n    - postgres_database (str): The PostgreSQL database to be used.\n    - postgres_table (str): The PostgreSQL table to be used.\n    DynamoDB state manager config:\n    - dynamodb_table_name (str): The name of the DynamoDB table.\n    - dynamodb_region_name (str): The AWS region for DynamoDB\n

    {}

    Returns:

    Name Type Description Spout Spout

    The created spout.

    Raises:

    Type Description ValueError

    If an invalid output type or state type is provided.

    "},{"location":"core/core_state_base/","title":"State","text":"

    Base class for task state mnager

    "},{"location":"core/core_state_base/#core.state.base.State","title":"State","text":"

    Bases: ABC

    Abstract base class for a state manager.

    This class is responsible for managing task states. It provides an interface for state management and captures task-related metrics.

    Attributes:

    Name Type Description buffer Dict[str, Any]

    Buffer for state data.

    log logging.Logger

    Logger for capturing logs.

    task_id str

    Identifier for the task.

    "},{"location":"core/core_state_base/#core.state.base.State.__del__","title":"__del__()","text":"

    Destructor to flush the buffer before object deletion.

    This ensures that any buffered state data is not lost when the object is deleted.

    "},{"location":"core/core_state_base/#core.state.base.State.flush","title":"flush()","text":"

    Flush the buffer to the state storage.

    This method is responsible for writing the buffered state data to the underlying storage mechanism.

    "},{"location":"core/core_state_base/#core.state.base.State.get","title":"get(task_id, key) abstractmethod","text":"

    Abstract method to get the state associated with a task and key.

    Parameters:

    Name Type Description Default task_id str

    The task identifier.

    required key str

    The key to get the state for.

    required

    Returns:

    Type Description Optional[Dict[str, Any]]

    Optional[Dict[str, Any]]: The state associated with the task and key, if it exists.

    "},{"location":"core/core_state_base/#core.state.base.State.get_state","title":"get_state(key)","text":"

    Get the state associated with a key from the buffer or underlying storage.

    Parameters:

    Name Type Description Default key str

    The key to get the state for.

    required

    Returns:

    Type Description Optional[Dict[str, Any]]

    Optional[Dict[str, Any]]: The state associated with the key.

    "},{"location":"core/core_state_base/#core.state.base.State.set","title":"set(task_id, key, value) abstractmethod","text":"

    Abstract method to set the state associated with a task and key.

    Parameters:

    Name Type Description Default task_id str

    The task identifier.

    required key str

    The key to set the state for.

    required value Dict[str, Any]

    The state to set.

    required"},{"location":"core/core_state_base/#core.state.base.State.set_state","title":"set_state(key, value)","text":"

    Set the state associated with a key in the buffer.

    Parameters:

    Name Type Description Default key str

    The key to set the state for.

    required value Dict[str, Any]

    The state to set.

    required"},{"location":"core/core_state_dynamo/","title":"DynamoDB State","text":"

    State manager using dynamoDB

    "},{"location":"core/core_state_dynamo/#core.state.dynamo.DynamoDBState","title":"DynamoDBState","text":"

    Bases: State

    DynamoDBState: A state manager that stores state in DynamoDB.

    Attributes:

    Name Type Description dynamodb boto3.resources.factory.dynamodb.ServiceResource

    The DynamoDB service resource.

    table boto3.resources.factory.dynamodb.Table

    The DynamoDB table.

    "},{"location":"core/core_state_dynamo/#core.state.dynamo.DynamoDBState.__init__","title":"__init__(task_id, table_name, region_name)","text":"

    Initialize a new DynamoDB state manager.

    Parameters:

    Name Type Description Default task_id str

    The task identifier.

    required table_name str

    The name of the DynamoDB table.

    required region_name str

    The name of the AWS region.

    required"},{"location":"core/core_state_dynamo/#core.state.dynamo.DynamoDBState.get","title":"get(task_id, key)","text":"

    Get the state associated with a task and key.

    Parameters:

    Name Type Description Default task_id str

    The task identifier.

    required key str

    The key to get the state for.

    required

    Returns:

    Type Description Optional[Dict[str, Any]]

    Optional[Dict[str, Any]]: The state associated with the task and key, if it exists.

    "},{"location":"core/core_state_dynamo/#core.state.dynamo.DynamoDBState.set","title":"set(task_id, key, value)","text":"

    Set the state associated with a task and key.

    Parameters:

    Name Type Description Default task_id str

    The task identifier.

    required key str

    The key to set the state for.

    required value Dict[str, Any]

    The state to set.

    required"},{"location":"core/core_state_memory/","title":"In-memory State","text":"

    State manager using local memory

    "},{"location":"core/core_state_memory/#core.state.memory.InMemoryState","title":"InMemoryState","text":"

    Bases: State

    \ud83e\udde0 InMemoryState: A state manager that stores state in memory.

    This manager is useful for temporary storage or testing purposes. Since it's in-memory, the data will be lost once the application stops.

    "},{"location":"core/core_state_memory/#core.state.memory.InMemoryState--attributes","title":"Attributes:","text":"
    • store (Dict[str, Dict]): The in-memory store for states.
    "},{"location":"core/core_state_memory/#core.state.memory.InMemoryState--usage","title":"Usage:","text":"
    manager = InMemoryState()\nmanager.set_state(\"user123\", {\"status\": \"active\"})\nstate = manager.get_state(\"user123\")\nprint(state)  # Outputs: {\"status\": \"active\"}\n

    Remember, this is an in-memory store. Do not use it for persistent storage!

    "},{"location":"core/core_state_memory/#core.state.memory.InMemoryState.__init__","title":"__init__(task_id)","text":"

    \ud83d\udca5 Initialize a new in-memory state manager.

    "},{"location":"core/core_state_memory/#core.state.memory.InMemoryState.get","title":"get(task_id, key)","text":"

    \ud83d\udcd6 Get the state associated with a key.

    Parameters:

    Name Type Description Default key str

    The key to get the state for.

    required

    Returns:

    Name Type Description Dict Optional[Dict]

    The state associated with the key, or None if not found.

    "},{"location":"core/core_state_memory/#core.state.memory.InMemoryState.set","title":"set(task_id, key, value)","text":"

    \ud83d\udcdd Set the state associated with a key.

    Parameters:

    Name Type Description Default key str

    The key to set the state for.

    required value Dict

    The state to set.

    required

    Example:

    manager.set_state(\"user123\", {\"status\": \"active\"})\n

    "},{"location":"core/core_state_postgres/","title":"Postgres State","text":"

    State manager using postgres database

    "},{"location":"core/core_state_postgres/#core.state.postgres.PostgresState","title":"PostgresState","text":"

    Bases: State

    \ud83d\uddc4\ufe0f PostgresState: A state manager that stores state in a PostgreSQL database.

    This manager provides a persistent storage solution using a PostgreSQL database.

    Attributes:

    Name Type Description conn psycopg2.extensions.connection

    The PostgreSQL connection.

    table str

    The table to use for storing state data.

    "},{"location":"core/core_state_postgres/#core.state.postgres.PostgresState.__init__","title":"__init__(task_id, host, port, user, password, database, table='geniusrise_state')","text":"

    Initialize a new PostgreSQL state manager.

    Parameters:

    Name Type Description Default task_id str

    The identifier for the task.

    required host str

    The host of the PostgreSQL server.

    required port int

    The port of the PostgreSQL server.

    required user str

    The user to connect as.

    required password str

    The user's password.

    required database str

    The database to connect to.

    required table str

    The table to use. Defaults to \"geniusrise_state\".

    'geniusrise_state'"},{"location":"core/core_state_postgres/#core.state.postgres.PostgresState.get","title":"get(task_id, key)","text":"

    Get the state associated with a task and key.

    Parameters:

    Name Type Description Default task_id str

    The task identifier.

    required key str

    The key to get the state for.

    required

    Returns:

    Type Description Optional[Dict]

    Optional[Dict]: The state associated with the task and key, or None if not found.

    "},{"location":"core/core_state_postgres/#core.state.postgres.PostgresState.set","title":"set(task_id, key, value)","text":"

    Set the state associated with a task and key.

    Parameters:

    Name Type Description Default task_id str

    The task identifier.

    required key str

    The key to set the state for.

    required value Dict

    The state to set.

    required"},{"location":"core/core_state_redis/","title":"Redis State","text":"

    State manager using redis

    "},{"location":"core/core_state_redis/#core.state.redis.RedisState","title":"RedisState","text":"

    Bases: State

    RedisState: A state manager that stores state in Redis.

    This manager provides a fast, in-memory storage solution using Redis.

    Attributes:

    Name Type Description redis redis.Redis

    The Redis connection.

    "},{"location":"core/core_state_redis/#core.state.redis.RedisState.__init__","title":"__init__(task_id, host, port, db)","text":"

    Initialize a new Redis state manager.

    Parameters:

    Name Type Description Default task_id str

    The task identifier.

    required host str

    The host of the Redis server.

    required port int

    The port of the Redis server.

    required db int

    The database number to connect to.

    required"},{"location":"core/core_state_redis/#core.state.redis.RedisState.get","title":"get(task_id, key)","text":"

    Get the state associated with a task and key.

    Parameters:

    Name Type Description Default task_id str

    The task identifier.

    required key str

    The key to get the state for.

    required

    Returns:

    Type Description Optional[Dict[str, Any]]

    Optional[Dict[str, Any]]: The state associated with the task and key, if it exists.

    "},{"location":"core/core_state_redis/#core.state.redis.RedisState.set","title":"set(task_id, key, value)","text":"

    Set the state associated with a task and key.

    Parameters:

    Name Type Description Default task_id str

    The task identifier.

    required key str

    The key to set the state for.

    required value Dict[str, Any]

    The state to set.

    required"},{"location":"core/core_task_base/","title":"Task","text":"

    Base class for Task

    "},{"location":"core/core_task_base/#core.task.base.Task","title":"Task","text":"

    Bases: ABC

    \ud83d\udee0\ufe0f Task: Class for managing tasks.

    This class provides a foundation for creating and managing tasks. Each task has a unique identifier and can be associated with specific input and output data.

    "},{"location":"core/core_task_base/#core.task.base.Task--attributes","title":"Attributes:","text":"
    • id (uuid.UUID): Unique identifier for the task.
    • input (Input): Configuration for input data.
    • output (Output): Configuration for output data.
    "},{"location":"core/core_task_base/#core.task.base.Task--usage","title":"Usage:","text":"
    task = Task()\ntask.execute(\"fetch_data\")\n

    !!! note Extend this class to implement specific task functionalities.

    "},{"location":"core/core_task_base/#core.task.base.Task.__init__","title":"__init__(id=None)","text":"

    Initialize a new task.

    Parameters:

    Name Type Description Default input Input

    Configuration for input data.

    required output Output

    Configuration for output data.

    required"},{"location":"core/core_task_base/#core.task.base.Task.__repr__","title":"__repr__()","text":"

    Return a string representation of the task.

    Returns:

    Name Type Description str str

    A string representation of the task.

    "},{"location":"core/core_task_base/#core.task.base.Task.execute","title":"execute(method_name, *args, **kwargs)","text":"

    \ud83d\ude80 Execute a given fetch_* method if it exists.

    Parameters:

    Name Type Description Default method_name str

    The name of the fetch_* method to execute.

    required *args

    Positional arguments to pass to the method.

    () **kwargs

    Keyword arguments to pass to the method.

    {}

    Returns:

    Name Type Description Any Any

    The result of the fetch_* method, or None if the method does not exist.

    Raises:

    Type Description AttributeError

    If the specified method doesn't exist.

    "},{"location":"core/core_task_base/#core.task.base.Task.get_methods","title":"get_methods() staticmethod","text":"

    \ud83d\udcdc Get all the fetch_* methods and their parameters along with their default values and docstrings.

    Returns:

    Type Description List[Tuple[str, List[str], Optional[str]]]

    List[Tuple[str, List[str], str]]: A list of tuples, where each tuple contains the name of a fetch_* method,

    List[Tuple[str, List[str], Optional[str]]]

    a list of its parameters along with their default values, and its docstring.

    "},{"location":"core/core_task_base/#core.task.base.Task.print_help","title":"print_help() staticmethod","text":"

    \ud83d\udda8\ufe0f Pretty print the fetch_* methods and their parameters along with their default values and docstrings. Also prints the class's docstring and init parameters.

    "},{"location":"core/docker/","title":"Docker Deployment","text":"

    DockerResourceManager is a utility for managing Docker resources, including containers and images. It provides a command-line interface (CLI) for various Docker operations, such as listing, inspecting, creating, starting, and stopping containers, as well as managing images.

    This class uses the Docker SDK for Python to interact with the Docker daemon, offering a convenient way to manage Docker containers and images from the command line.

    CLI Usage

    genius docker sub-command

    Sub-commands
    • list_containers: List all containers, with an option to include stopped containers. genius docker list_containers [--all]
    • inspect_container: Inspect a specific container by its ID. genius docker inspect_container <container_id>
    • create_container: Create a new container with specified image, command, and other parameters. genius docker create_container <image> [options]
    • start_container: Start a container by its ID. genius docker start_container <container_id>
    • stop_container: Stop a container by its ID. genius docker stop_container <container_id>
    • list_images: List all Docker images available on the local system. genius docker list_images
    • inspect_image: Inspect a specific image by its ID. genius docker inspect_image <image_id>
    • pull_image: Pull an image from a Docker registry. genius docker pull_image <image>
    • push_image: Push an image to a Docker registry. genius docker push_image <image>

    Each sub-command supports various options to specify the details of the container or image operation, such as environment variables, port mappings, volume mappings, and more.

    Attributes:

    Name Type Description client

    The Docker client connection to interact with the Docker daemon.

    log

    Logger for the class to log information, warnings, and errors.

    console

    Rich console object to print formatted and styled outputs.

    Methods
    • connect: Method to establish a connection to the Docker daemon.
    • list_containers: Method to list all containers, with an option to include stopped ones.
    • inspect_container: Method to inspect details of a specific container.
    • create_container: Method to create a new container with given parameters.
    • start_container: Method to start a specific container.
    • stop_container: Method to stop a specific container.
    • list_images: Method to list all Docker images.
    • inspect_image: Method to inspect a specific image.
    • pull_image: Method to pull an image from a Docker registry.
    • push_image: Method to push an image to a Docker registry.
    Note
    • Ensure that the Docker daemon is running and accessible at the specified URL.
    • Make sure to have the necessary permissions to interact with the Docker daemon and manage containers and images.
    "},{"location":"core/docker/#runners.docker.base.DockerResourceManager.__init__","title":"__init__()","text":"

    Initialize the Docker Resource Manager.

    "},{"location":"core/docker/#runners.docker.base.DockerResourceManager.connect","title":"connect(base_url='unix://var/run/docker.sock')","text":"

    Connect to the Docker daemon.

    Parameters:

    Name Type Description Default base_url str

    URL to the Docker daemon.

    'unix://var/run/docker.sock'"},{"location":"core/docker/#runners.docker.base.DockerResourceManager.create_container","title":"create_container(image, command=None, name=None, env_vars=None, ports=None, volumes=None, **kwargs)","text":"

    Create a new container.

    Parameters:

    Name Type Description Default image str

    Name of the image to create the container from.

    required command Optional[str]

    Command to run in the container.

    None name Optional[str]

    Name of the container.

    None env_vars Optional[Dict[str, str]]

    Environment variables.

    None ports Optional[Dict[str, str]]

    Port mappings.

    None volumes Optional[Dict[str, Dict[str, str]]]

    Volume mappings.

    None

    Returns:

    Name Type Description str str

    ID of the created container.

    "},{"location":"core/docker/#runners.docker.base.DockerResourceManager.create_parser","title":"create_parser(parser)","text":"

    Create a parser for CLI commands.

    Returns:

    Name Type Description ArgumentParser ArgumentParser

    The parser for Docker operations.

    "},{"location":"core/docker/#runners.docker.base.DockerResourceManager.inspect_container","title":"inspect_container(container_id)","text":"

    Inspect a specific container.

    Parameters:

    Name Type Description Default container_id str

    ID of the container to inspect.

    required

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: Container details.

    "},{"location":"core/docker/#runners.docker.base.DockerResourceManager.inspect_image","title":"inspect_image(image_id)","text":"

    Inspect a specific image.

    Parameters:

    Name Type Description Default image_id str

    ID of the image to inspect.

    required

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: Image details.

    "},{"location":"core/docker/#runners.docker.base.DockerResourceManager.list_containers","title":"list_containers(all_containers=False)","text":"

    List all containers.

    Parameters:

    Name Type Description Default all_containers bool

    Flag to list all containers, including stopped ones.

    False

    Returns:

    Type Description List[Any]

    List[Any]: List of containers.

    "},{"location":"core/docker/#runners.docker.base.DockerResourceManager.list_images","title":"list_images()","text":"

    List all Docker images.

    Returns:

    Type Description List[Any]

    List[Any]: List of images.

    "},{"location":"core/docker/#runners.docker.base.DockerResourceManager.pull_image","title":"pull_image(image)","text":"

    Pull an image from a Docker registry.

    Parameters:

    Name Type Description Default image str

    Name of the image to pull.

    required"},{"location":"core/docker/#runners.docker.base.DockerResourceManager.push_image","title":"push_image(image)","text":"

    Push an image to a Docker registry.

    Parameters:

    Name Type Description Default image str

    Name of the image to push.

    required"},{"location":"core/docker/#runners.docker.base.DockerResourceManager.run","title":"run(args)","text":"

    Run the Docker Resource Manager based on the parsed CLI arguments.

    Parameters:

    Name Type Description Default args Namespace

    The parsed CLI arguments.

    required"},{"location":"core/docker/#runners.docker.base.DockerResourceManager.start_container","title":"start_container(container_id)","text":"

    Start a container.

    Parameters:

    Name Type Description Default container_id str

    ID of the container to start.

    required"},{"location":"core/docker/#runners.docker.base.DockerResourceManager.stop_container","title":"stop_container(container_id)","text":"

    Stop a container.

    Parameters:

    Name Type Description Default container_id str

    ID of the container to stop.

    required"},{"location":"core/docker_swarm/","title":"Docker Swarm Deployment","text":"

    Bases: DockerResourceManager

    DockerSwarmManager is a utility for managing Docker Swarm services, including creating, inspecting, updating, and removing services. It extends DockerResourceManager to provide swarm-specific functionalities and commands via a command-line interface (CLI).

    The manager interacts with the Docker Swarm API, offering a convenient way to manage Swarm services, nodes, and other swarm-related tasks from the command line.

    CLI Usage

    genius docker swarm sub-command

    Sub-commands
    • list_nodes: List all nodes in the Docker Swarm. genius docker swarm list_nodes
    • inspect_node: Inspect a specific Swarm node by its ID. genius docker swarm inspect_node <node_id>
    • create_service: Create a new service in the Docker Swarm with comprehensive specifications. genius docker swarm create_service [options]
    • list_services: List all services in the Docker Swarm. genius docker swarm list_services
    • inspect_service: Inspect a specific service by its ID. genius docker swarm inspect_service <service_id>
    • update_service: Update an existing service with new parameters. genius docker swarm update_service <service_id> [options]
    • remove_service: Remove a service from the Docker Swarm. genius docker swarm remove_service <service_id>
    • service_logs: Retrieve logs of a Docker Swarm service. genius docker swarm service_logs <service_id> [--tail] [--follow]
    • scale_service: Scale a service to a specified number of replicas. genius docker swarm scale_service <service_id> <replicas>

    Each sub-command supports various options to specify the details of the swarm node or service operation. These options include node and service IDs, image and command specifications for services, environment variables, resource limits, and much more.

    Attributes:

    Name Type Description swarm_client

    The Docker Swarm client connection to interact with the Docker Swarm API.

    log

    Logger for the class to log information, warnings, and errors.

    console

    Rich console object to print formatted and styled outputs.

    Methods
    • connect_to_swarm: Method to establish a connection to the Docker Swarm.
    • list_nodes: Method to list all nodes in the Docker Swarm.
    • inspect_node: Method to inspect details of a specific Swarm node.
    • create_service: Method to create a new service with given specifications.
    • list_services: Method to list all services in the Docker Swarm.
    • inspect_service: Method to inspect a specific service.
    • update_service: Method to update an existing service with new parameters.
    • remove_service: Method to remove a service from the Docker Swarm.
    • get_service_logs: Method to retrieve logs of a Docker Swarm service.
    • scale_service: Method to scale a service to a specified number of replicas.
    Note
    • Ensure that the Docker Swarm is initialized and running.
    • Make sure to have the necessary permissions to interact with the Docker Swarm and manage services and nodes.
    "},{"location":"core/docker_swarm/#runners.docker.swarm.DockerSwarmManager.__init__","title":"__init__()","text":"

    Initialize the Docker Swarm Manager.

    "},{"location":"core/docker_swarm/#runners.docker.swarm.DockerSwarmManager.connect_to_swarm","title":"connect_to_swarm(base_url='unix://var/run/docker.sock')","text":"

    Connect to the Docker Swarm.

    Parameters:

    Name Type Description Default base_url str

    URL to the Docker daemon.

    'unix://var/run/docker.sock'"},{"location":"core/docker_swarm/#runners.docker.swarm.DockerSwarmManager.create_parser","title":"create_parser(parser)","text":"

    Extend the parser for CLI commands to include Docker Swarm operations.

    Parameters:

    Name Type Description Default parser ArgumentParser

    The existing parser.

    required

    Returns:

    Name Type Description ArgumentParser ArgumentParser

    The extended parser with Docker Swarm operations.

    "},{"location":"core/docker_swarm/#runners.docker.swarm.DockerSwarmManager.create_service","title":"create_service(image, command, args)","text":"

    Create a new service in the Docker Swarm with comprehensive specifications.

    Parameters:

    Name Type Description Default image str

    Docker image to use for the service.

    required command Union[str, List[str]]

    Command to run in the service.

    required args Namespace

    Arguments from the CLI for service creation.

    required

    Returns:

    Name Type Description str str

    ID of the created service.

    "},{"location":"core/docker_swarm/#runners.docker.swarm.DockerSwarmManager.get_service_logs","title":"get_service_logs(service_id, tail=100, follow=False)","text":"

    Retrieve logs of a Docker Swarm service.

    Parameters:

    Name Type Description Default service_id str

    ID of the service.

    required tail int

    Number of lines to tail from the end of the logs. Defaults to 100.

    100 follow bool

    Follow log output. Defaults to False.

    False"},{"location":"core/docker_swarm/#runners.docker.swarm.DockerSwarmManager.inspect_node","title":"inspect_node(node_id)","text":"

    Inspect a specific Swarm node.

    Parameters:

    Name Type Description Default node_id str

    ID of the node to inspect.

    required

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: Node details.

    "},{"location":"core/docker_swarm/#runners.docker.swarm.DockerSwarmManager.inspect_service","title":"inspect_service(service_id)","text":"

    Inspect a specific service in the Docker Swarm.

    Parameters:

    Name Type Description Default service_id str

    ID of the service to inspect.

    required

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: Service details.

    "},{"location":"core/docker_swarm/#runners.docker.swarm.DockerSwarmManager.list_nodes","title":"list_nodes()","text":"

    List all nodes in the Docker Swarm.

    Returns:

    Type Description List[Any]

    List[Any]: List of Swarm nodes.

    "},{"location":"core/docker_swarm/#runners.docker.swarm.DockerSwarmManager.list_services","title":"list_services()","text":"

    List all services in the Docker Swarm.

    Returns:

    Type Description List[Any]

    List[Any]: List of services.

    "},{"location":"core/docker_swarm/#runners.docker.swarm.DockerSwarmManager.remove_service","title":"remove_service(service_id)","text":"

    Remove a service from the Docker Swarm.

    Parameters:

    Name Type Description Default service_id str

    ID of the service to remove.

    required"},{"location":"core/docker_swarm/#runners.docker.swarm.DockerSwarmManager.run","title":"run(args)","text":"

    Run the Docker Swarm Manager based on the parsed CLI arguments.

    Parameters:

    Name Type Description Default args Namespace

    The parsed CLI arguments.

    required"},{"location":"core/docker_swarm/#runners.docker.swarm.DockerSwarmManager.scale_service","title":"scale_service(service_id, replicas)","text":"

    Scale a Docker Swarm service to a specified number of replicas.

    Parameters:

    Name Type Description Default service_id str

    ID of the service to scale.

    required replicas int

    Desired number of replicas.

    required"},{"location":"core/docker_swarm/#runners.docker.swarm.DockerSwarmManager.update_service","title":"update_service(service_id, image, command, args)","text":"

    Update an existing service in the Docker Swarm.

    Parameters:

    Name Type Description Default service_id str

    ID of the service to update.

    required args Namespace

    Arguments from the CLI for service update.

    required"},{"location":"core/k8s_base/","title":"Kubernetes","text":""},{"location":"core/k8s_base/#runners.k8s.base.K8sResourceManager.__create_image_pull_secret","title":"__create_image_pull_secret(name, registry, username, password)","text":"

    \ud83d\udd11 Create an image pull secret for a Docker registry.

    Parameters:

    Name Type Description Default name str

    Name of the secret.

    required registry str

    Docker registry URL.

    required username str

    Username for the registry.

    required password str

    Password for the registry.

    required"},{"location":"core/k8s_base/#runners.k8s.base.K8sResourceManager.__init__","title":"__init__()","text":"

    \ud83d\ude80 Initialize the Kubernetes Resource Manager.

    Attributes:

    Name Type Description api_instance

    Core API instance for Kubernetes

    apps_api_instance

    Apps API instance for Kubernetes

    cluster_name

    Name of the Kubernetes cluster

    context_name

    Name of the kubeconfig context

    namespace

    Kubernetes namespace

    labels

    Labels for Kubernetes resources

    annotations

    Annotations for Kubernetes resources

    "},{"location":"core/k8s_base/#runners.k8s.base.K8sResourceManager.__wait_for_pod_completion","title":"__wait_for_pod_completion(pod_name, timeout=600, poll_interval=5)","text":"

    \u23f3 Wait for a Pod to complete its execution.

    Parameters:

    Name Type Description Default pod_name str

    Name of the Pod.

    required timeout int

    Maximum time to wait in seconds.

    600 poll_interval int

    Time between status checks in seconds.

    5

    Returns:

    Name Type Description bool bool

    True if the Pod succeeded, False otherwise.

    Raises:

    Type Description TimeoutError

    If waiting for the Pod times out.

    "},{"location":"core/k8s_base/#runners.k8s.base.K8sResourceManager.connect","title":"connect(kube_config_path, cluster_name, context_name, namespace='default', labels={}, annotations={}, api_key=None, api_host=None, verify_ssl=True, ssl_ca_cert=None)","text":"

    \ud83c\udf10 Connect to a Kubernetes cluster.

    Parameters:

    Name Type Description Default kube_config_path str

    Path to the kubeconfig file.

    required cluster_name str

    Name of the Kubernetes cluster.

    required context_name str

    Name of the kubeconfig context.

    required namespace str

    Kubernetes namespace.

    'default' labels dict

    Labels for Kubernetes resources.

    {} annotations dict

    Annotations for Kubernetes resources.

    {} api_key str

    API key for Kubernetes cluster.

    None api_host str

    API host for Kubernetes cluster.

    None verify_ssl bool

    Whether to verify SSL certificates.

    True ssl_ca_cert str

    Path to the SSL CA certificate.

    None

    Raises:

    Type Description ValueError

    If neither kube_config_path and context_name nor api_key and api_host are provided.

    "},{"location":"core/k8s_base/#runners.k8s.base.K8sResourceManager.describe","title":"describe(pod_name)","text":"

    \ud83d\udcdd Describe a Kubernetes pod.

    Parameters:

    Name Type Description Default pod_name str

    Name of the pod.

    required

    Returns:

    Name Type Description dict V1Pod

    Description of the pod.

    "},{"location":"core/k8s_base/#runners.k8s.base.K8sResourceManager.logs","title":"logs(name, tail=10, follow=True)","text":"

    \ud83d\udcdc Get logs of a Kubernetes pod.

    Parameters:

    Name Type Description Default name str

    Name of the pod.

    required tail int

    Number of lines to tail.

    10

    Returns:

    Name Type Description str str

    Logs of the pod.

    "},{"location":"core/k8s_base/#runners.k8s.base.K8sResourceManager.run","title":"run(args)","text":"

    \ud83d\ude80 Run the Kubernetes resource manager.

    Parameters:

    Name Type Description Default args Namespace

    The parsed command line arguments.

    required"},{"location":"core/k8s_base/#runners.k8s.base.K8sResourceManager.show","title":"show()","text":"

    \ud83d\udccb Show all pods in the namespace.

    Returns:

    Name Type Description list List[V1Pod]

    List of pods.

    "},{"location":"core/k8s_base/#runners.k8s.base.K8sResourceManager.status","title":"status(pod_name)","text":"

    \ud83d\udcdc Get the status of a Pod.

    Parameters:

    Name Type Description Default pod_name str

    Name of the Pod.

    required

    Returns:

    Name Type Description str V1Pod

    The status of the Pod.

    "},{"location":"core/k8s_cron_job/","title":"Kubernetes CronJob","text":"

    Bases: Job

    \ud83d\ude80 The CronJob class is responsible for managing Kubernetes CronJobs. It extends the Job class and provides additional functionalities specific to Kubernetes CronJobs.

    CLI Usage

    genius cronjob sub-command Examples:

    genius cronjob create_cronjob --name example-cronjob --image example-image --command \"echo hello\" --schedule \"*/5 * * * *\" --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n

    ```bash\ngenius cronjob delete_cronjob --name example-cronjob --namespace geniusrise \\\n    --context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n```\n\n```bash\ngenius cronjob get_cronjob_status --name example-cronjob --namespace geniusrise \\\n    --context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n```\n

    YAML Configuration:

        version: \"1.0\"\ncronjobs:\n- name: \"example-cronjob\"\nimage: \"example-image\"\ncommand: \"example-command\"\nschedule: \"*/5 * * * *\"\nenv_vars:\nKEY: \"value\"\ncpu: \"100m\"\nmemory: \"256Mi\"\nstorage: \"1Gi\"\ngpu: \"1\"\n

    Extended CLI Examples
    genius cronjob create_cronjob \\\n--k8s_kind cronjob \\\n--k8s_namespace geniusrise \\\n--k8s_context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev \\\n--k8s_name example-cronjob \\\n--k8s_image \"genius-dev.dkr.ecr.ap-south-1.amazonaws.com/geniusrise\" \\\n--k8s_schedule \"*/5 * * * *\" \\\n--k8s_env_vars '{\"AWS_DEFAULT_REGION\": \"ap-south-1\", \"AWS_SECRET_ACCESS_KEY\": \"\", \"AWS_ACCESS_KEY_ID\": \"\"}' \\\n--k8s_cpu \"100m\" \\\n--k8s_memory \"256Mi\"\n
    genius cronjob delete_cronjob \\\nexample-cronjob \\\n--namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    genius cronjob get_cronjob_status \\\nexample-cronjob \\\n--namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"core/k8s_cron_job/#runners.k8s.cron_job.CronJob.__create_cronjob_spec","title":"__create_cronjob_spec(image, command, schedule, env_vars={}, cpu=None, memory=None, storage=None, gpu=None, image_pull_secret_name=None)","text":"

    \ud83d\udce6 Create a Kubernetes CronJob specification.

    Parameters:

    Name Type Description Default image str

    Docker image for the CronJob.

    required command str

    Command to run in the container.

    required env_vars dict

    Environment variables for the CronJob.

    {} cpu Optional[str]

    CPU requirements.

    None memory Optional[str]

    Memory requirements.

    None storage Optional[str]

    Storage requirements.

    None gpu Optional[str]

    GPU requirements.

    None image_pull_secret_name Optional[str]

    Name of the image pull secret.

    None

    Returns:

    Type Description client.V1CronJobSpec

    client.V1CronJobSpec: The CronJob specification.

    "},{"location":"core/k8s_cron_job/#runners.k8s.cron_job.CronJob.__init__","title":"__init__()","text":"

    \ud83d\ude80 Initialize the CronJob class for managing Kubernetes Cron Jobs.

    "},{"location":"core/k8s_cron_job/#runners.k8s.cron_job.CronJob.create","title":"create(name, image, schedule, command, env_vars={}, cpu=None, memory=None, storage=None, gpu=None, image_pull_secret_name=None, **kwargs)","text":"

    \ud83d\udee0 Create a Kubernetes CronJob.

    Parameters:

    Name Type Description Default name str

    Name of the CronJob.

    required image str

    Docker image for the CronJob.

    required command str

    Command to run in the container.

    required schedule str

    Cron schedule.

    required env_vars dict

    Environment variables for the CronJob.

    {}"},{"location":"core/k8s_cron_job/#runners.k8s.cron_job.CronJob.create_parser","title":"create_parser(parser)","text":"

    \ud83c\udf9b Create a parser for CLI commands related to Cron Job functionalities.

    Parameters:

    Name Type Description Default parser ArgumentParser

    The main parser.

    required

    Returns:

    Name Type Description ArgumentParser ArgumentParser

    The parser with subparsers for each command.

    "},{"location":"core/k8s_cron_job/#runners.k8s.cron_job.CronJob.delete","title":"delete(name)","text":"

    \ud83d\uddd1 Delete a Kubernetes CronJob.

    Parameters:

    Name Type Description Default name str

    Name of the CronJob to delete.

    required"},{"location":"core/k8s_cron_job/#runners.k8s.cron_job.CronJob.run","title":"run(args)","text":"

    \ud83d\ude80 Run the Cron Job manager.

    Parameters:

    Name Type Description Default args Namespace

    The parsed command line arguments.

    required"},{"location":"core/k8s_cron_job/#runners.k8s.cron_job.CronJob.status","title":"status(name)","text":"

    \ud83d\udcca Get the status of a Kubernetes CronJob.

    Parameters:

    Name Type Description Default name str

    Name of the CronJob.

    required

    Returns:

    Name Type Description dict V1CronJob

    Status of the CronJob.

    "},{"location":"core/k8s_deployment/","title":"Kubernetes Deployment","text":"

    Bases: K8sResourceManager

    \ud83d\ude80 Initialize the Deployment class for managing Kubernetes Deployments.

    CLI Usage

    geniusrise deployment sub-command Examples:

    genius deployment create --name example-deployment --image example-image --command \"echo hello\" --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    genius deployment scale --name example-deployment --replicas 3 --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    genius deployment describe --name example-deployment --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    genius deployment delete --name example-deployment --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    genius deployment status --name example-deployment --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n

    YAML Configuration:

    version: \"1.0\"\ndeployments:\n- name: \"example-deployment\"\nimage: \"example-image\"\ncommand: \"example-command\"\nreplicas: 3\nenv_vars:\nKEY: \"value\"\ncpu: \"100m\"\nmemory: \"256Mi\"\nstorage: \"1Gi\"\ngpu: \"1\"\n
    "},{"location":"core/k8s_deployment/#runners.k8s.deployment.Deployment.__create_deployment_spec","title":"__create_deployment_spec(image, command, replicas, image_pull_secret_name, env_vars, cpu=None, memory=None, storage=None, gpu=None)","text":"

    \ud83d\udce6 Create a Kubernetes Deployment specification.

    Parameters:

    Name Type Description Default image str

    Docker image for the Deployment.

    required command str

    Command to run in the container.

    required replicas int

    Number of replicas.

    required image_pull_secret_name str

    Name of the image pull secret.

    required env_vars dict

    Environment variables for the Deployment.

    required cpu str

    CPU requirements.

    None memory str

    Memory requirements.

    None storage str

    Storage requirements.

    None gpu str

    GPU requirements.

    None

    Returns:

    Type Description client.V1DeploymentSpec

    client.V1DeploymentSpec: The Deployment specification.

    "},{"location":"core/k8s_deployment/#runners.k8s.deployment.Deployment.__init__","title":"__init__()","text":"

    \ud83d\ude80 Initialize the Deployment class for managing Kubernetes Deployments.

    "},{"location":"core/k8s_deployment/#runners.k8s.deployment.Deployment.create","title":"create(name, image, command, registry_creds=None, replicas=1, env_vars={}, cpu=None, memory=None, storage=None, gpu=None, **kwargs)","text":"

    \ud83d\udee0 Create a Kubernetes resource Deployment.

    Parameters:

    Name Type Description Default name str

    Name of the resource.

    required image str

    Docker image for the resource.

    required command str

    Command to run in the container.

    required registry_creds dict

    Credentials for Docker registry.

    None replicas int

    Number of replicas for Deployment.

    1 env_vars dict

    Environment variables for the resource.

    {} cpu str

    CPU requirements.

    None memory str

    Memory requirements.

    None storage str

    Storage requirements.

    None gpu str

    GPU requirements.

    None"},{"location":"core/k8s_deployment/#runners.k8s.deployment.Deployment.create_parser","title":"create_parser(parser)","text":"

    \ud83c\udf9b Create a parser for CLI commands related to Deployment functionalities.

    Parameters:

    Name Type Description Default parser ArgumentParser

    The main parser.

    required

    Returns:

    Name Type Description ArgumentParser ArgumentParser

    The parser with subparsers for each command.

    "},{"location":"core/k8s_deployment/#runners.k8s.deployment.Deployment.delete","title":"delete(name)","text":"

    \ud83d\uddd1 Delete a Kubernetes resource (Pod/Deployment/Service).

    Parameters:

    Name Type Description Default name str

    Name of the resource to delete.

    required"},{"location":"core/k8s_deployment/#runners.k8s.deployment.Deployment.describe","title":"describe(deployment_name)","text":"

    \ud83d\uddc2 Describe a Kubernetes deployment.

    Parameters:

    Name Type Description Default deployment_name str

    Name of the deployment.

    required

    Returns:

    Name Type Description dict V1Deployment

    Description of the deployment.

    "},{"location":"core/k8s_deployment/#runners.k8s.deployment.Deployment.run","title":"run(args)","text":"

    \ud83d\ude80 Run the Deployment manager.

    Parameters:

    Name Type Description Default args Namespace

    The parsed command line arguments.

    required"},{"location":"core/k8s_deployment/#runners.k8s.deployment.Deployment.scale","title":"scale(name, replicas)","text":"

    \ud83d\udcc8 Scale a Kubernetes deployment.

    Parameters:

    Name Type Description Default name str

    Name of the deployment.

    required replicas int

    Number of replicas.

    required"},{"location":"core/k8s_deployment/#runners.k8s.deployment.Deployment.show","title":"show()","text":"

    \ud83d\uddc2 List all deployments in the namespace.

    Returns:

    Name Type Description list List[V1Deployment]

    List of deployments.

    "},{"location":"core/k8s_deployment/#runners.k8s.deployment.Deployment.status","title":"status(name)","text":"

    \ud83d\udcca Get the status of a Kubernetes deployment.

    Parameters:

    Name Type Description Default name str

    Name of the deployment.

    required

    Returns:

    Name Type Description dict V1Deployment

    Status of the deployment.

    "},{"location":"core/k8s_job/","title":"Kubernetes Job","text":"

    Bases: Deployment

    \ud83d\ude80 The Job class is responsible for managing Kubernetes Jobs. It extends the Deployment class and provides additional functionalities specific to Kubernetes Jobs.

    CLI Usage

    genius job sub-command Examples:

    genius job create --name example-job --image example-image --command \"echo hello\" --cpu \"100m\" --memory \"256Mi\" --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n

    ```bash\ngenius job delete --name example-job --namespace geniusrise \\\n    --context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n```\n\n```bash\ngenius job status --name example-job --namespace geniusrise \\\n    --context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n```\n

    YAML Configuration:

        version: \"1.0\"\njobs:\n- name: \"example-job\"\nimage: \"example-image\"\ncommand: \"example-command\"\nenv_vars:\nKEY: \"value\"\ncpu: \"100m\"\nmemory: \"256Mi\"\nstorage: \"1Gi\"\ngpu: \"1\"\n

    Extended CLI Examples:

        genius job create \\\n--k8s_kind job \\\n--k8s_namespace geniusrise \\\n--k8s_context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev \\\n--k8s_name example-job \\\n--k8s_image \"genius-dev.dkr.ecr.ap-south-1.amazonaws.com/geniusrise\" \\\n--k8s_env_vars '{\"AWS_DEFAULT_REGION\": \"ap-south-1\", \"AWS_SECRET_ACCESS_KEY\": \"\", \"AWS_ACCESS_KEY_ID\": \"\"}' \\\n--k8s_cpu \"100m\" \\\n--k8s_memory \"256Mi\"\n
        genius job delete \\\nexample-job \\\n--namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
        genius job status \\\nexample-job \\\n--namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"core/k8s_job/#runners.k8s.job.Job.__init__","title":"__init__()","text":"

    \ud83d\ude80 Initialize the Job class for managing Kubernetes Jobs.

    "},{"location":"core/k8s_job/#runners.k8s.job.Job.create","title":"create(name, image, command, env_vars={}, cpu=None, memory=None, storage=None, gpu=None, image_pull_secret_name=None, **kwargs)","text":"

    \ud83d\udee0 Create a Kubernetes Job.

    Parameters:

    Name Type Description Default name str

    Name of the Job.

    required image str

    Docker image for the Job.

    required command str

    Command to run in the container.

    required env_vars dict

    Environment variables for the Job.

    {}"},{"location":"core/k8s_job/#runners.k8s.job.Job.create_parser","title":"create_parser(parser)","text":"

    \ud83c\udf9b Create a parser for CLI commands related to Job functionalities.

    Parameters:

    Name Type Description Default parser ArgumentParser

    The main parser.

    required

    Returns:

    Name Type Description ArgumentParser ArgumentParser

    The parser with subparsers for each command.

    "},{"location":"core/k8s_job/#runners.k8s.job.Job.delete","title":"delete(name)","text":"

    \ud83d\uddd1 Delete a Kubernetes Job.

    Parameters:

    Name Type Description Default name str

    Name of the Job to delete.

    required"},{"location":"core/k8s_job/#runners.k8s.job.Job.run","title":"run(args)","text":"

    \ud83d\ude80 Run the Job manager.

    Parameters:

    Name Type Description Default args Namespace

    The parsed command line arguments.

    required"},{"location":"core/k8s_job/#runners.k8s.job.Job.status","title":"status(name)","text":"

    \ud83d\udcca Get the status of a Kubernetes Job.

    Parameters:

    Name Type Description Default name str

    Name of the Job.

    required

    Returns:

    Name Type Description dict V1Job

    Status of the Job.

    "},{"location":"core/k8s_service/","title":"Kubernetes Service","text":"

    Bases: Deployment

    \ud83d\ude80 Initialize the Service class for managing Kubernetes Services.

    CLI Usage

    genius service sub-command Examples:

    genius service create --name example-service --image example-image --command \"echo hello\" --port 8080 --target_port 8080 --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    genius service delete --name example-service --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    genius service describe --name example-service --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    genius service show --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n

    YAML Configuration:

    version: \"1.0\"\nservices:\n- name: \"example-service\"\nimage: \"example-image\"\ncommand: \"example-command\"\nreplicas: 3\nport: 8080\ntarget_port: 8080\nenv_vars:\nKEY: \"value\"\ncpu: \"100m\"\nmemory: \"256Mi\"\nstorage: \"1Gi\"\ngpu: \"1\"\n
    Extended CLI Examples
        genius service deploy \\\n--k8s_kind service \\\n--k8s_namespace geniusrise \\\n--k8s_context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev \\\n--k8s_name webhook \\\n--k8s_image \"genius-dev.dkr.ecr.ap-south-1.amazonaws.com/geniusrise\" \\\n--k8s_env_vars '{\"AWS_DEFAULT_REGION\": \"ap-south-1\", \"AWS_SECRET_ACCESS_KEY\": \"\", \"AWS_ACCESS_KEY_ID\": \"\"}' \\\n--k8s_port 8080 \\\n--k8s_target_port 8080\n
        genius service delete \\\nwebhook \\\n--namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"core/k8s_service/#runners.k8s.service.Service.__create_service_spec","title":"__create_service_spec(node_port, port, target_port)","text":"

    \ud83d\udce6 Create a Kubernetes Service specification.

    Parameters:

    Name Type Description Default port int

    Service port.

    required target_port int

    Container target port.

    required

    Returns:

    Type Description client.V1ServiceSpec

    client.V1ServiceSpec: The Service specification.

    "},{"location":"core/k8s_service/#runners.k8s.service.Service.__init__","title":"__init__()","text":"

    \ud83d\ude80 Initialize the Service class for managing Kubernetes Services.

    "},{"location":"core/k8s_service/#runners.k8s.service.Service.create","title":"create(name, image, command, registry_creds=None, replicas=1, node_port=80, port=80, target_port=8080, env_vars={}, cpu=None, memory=None, storage=None, gpu=None, **kwargs)","text":"

    \ud83d\udee0 Create a Kubernetes resource Service.

    Parameters:

    Name Type Description Default name str

    Name of the resource.

    required image str

    Docker image for the resource.

    required command str

    Command to run in the container.

    required registry_creds dict

    Credentials for Docker registry.

    None replicas int

    Number of replicas for Deployment.

    1 node_port int

    Service port that is exposed.

    80 port int

    Service port.

    80 target_port int

    Container target port.

    8080 env_vars dict

    Environment variables for the resource.

    {} cpu str

    CPU requirements.

    None memory str

    Memory requirements.

    None storage str

    Storage requirements.

    None gpu str

    GPU requirements.

    None"},{"location":"core/k8s_service/#runners.k8s.service.Service.create_parser","title":"create_parser(parser)","text":"

    \ud83c\udf9b Create a parser for CLI commands related to Service functionalities.

    Parameters:

    Name Type Description Default parser ArgumentParser

    The main parser.

    required

    Returns:

    Name Type Description ArgumentParser ArgumentParser

    The parser with subparsers for each command.

    "},{"location":"core/k8s_service/#runners.k8s.service.Service.delete","title":"delete(name)","text":"

    \ud83d\uddd1 Delete a Kubernetes resource (Pod/Deployment/Service).

    Parameters:

    Name Type Description Default name str

    Name of the resource to delete.

    required"},{"location":"core/k8s_service/#runners.k8s.service.Service.describe","title":"describe(service_name)","text":"

    \ud83c\udf10 Describe a Kubernetes service.

    Parameters:

    Name Type Description Default service_name str

    Name of the service.

    required

    Returns:

    Name Type Description dict V1Service

    Description of the service.

    "},{"location":"core/k8s_service/#runners.k8s.service.Service.run","title":"run(args)","text":"

    \ud83d\ude80 Run the Service manager.

    Parameters:

    Name Type Description Default args Namespace

    The parsed command line arguments.

    required"},{"location":"core/k8s_service/#runners.k8s.service.Service.show","title":"show()","text":"

    \ud83c\udf10 Show all services in the namespace.

    Returns:

    Name Type Description list List[V1Service]

    List of services.

    "},{"location":"core/k8s_service/#runners.k8s.service.Service.status","title":"status(name)","text":"

    \ud83d\udcca Get the status of a Kubernetes service.

    Parameters:

    Name Type Description Default name str

    Name of the service.

    required

    Returns:

    Name Type Description dict V1Deployment

    Status of the service.

    "},{"location":"databases/arangodb/","title":"ArangoDB","text":"

    Bases: Spout

    "},{"location":"databases/arangodb/#geniusrise_databases.arangodb.ArangoDB.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Arango class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/arangodb/#geniusrise_databases.arangodb.ArangoDB.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Arango rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args host=myarangodb.example.com username=myusername password=mypassword database=mydb collection=mycollection\n
    "},{"location":"databases/arangodb/#geniusrise_databases.arangodb.ArangoDB.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_arangodb_spout:\nname: \"Arango\"\nmethod: \"fetch\"\nargs:\nhost: \"myarangodb.example.com\"\nusername: \"myusername\"\npassword: \"mypassword\"\ndatabase: \"mydb\"\ncollection: \"mycollection\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/arangodb/#geniusrise_databases.arangodb.ArangoDB.fetch","title":"fetch(host, username, password, database, collection)","text":"

    \ud83d\udcd6 Fetch data from an ArangoDB collection and save it in batch.

    Parameters:

    Name Type Description Default host str

    The ArangoDB host.

    required username str

    The ArangoDB username.

    required password str

    The ArangoDB password.

    required database str

    The ArangoDB database name.

    required collection str

    The name of the ArangoDB collection.

    required

    Raises:

    Type Description Exception

    If unable to connect to the ArangoDB server or execute the command.

    "},{"location":"databases/athena/","title":"Athena","text":"

    Bases: Spout

    "},{"location":"databases/athena/#geniusrise_databases.athena.Athena.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Athena class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/athena/#geniusrise_databases.athena.Athena.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Athena rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args region_name=us-east-1 output_location=s3://mybucket/output query=\"SELECT * FROM mytable\"\n
    "},{"location":"databases/athena/#geniusrise_databases.athena.Athena.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_athena_spout:\nname: \"Athena\"\nmethod: \"fetch\"\nargs:\nregion_name: \"us-east-1\"\noutput_location: \"s3://mybucket/output\"\nquery: \"SELECT * FROM mytable\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/athena/#geniusrise_databases.athena.Athena.fetch","title":"fetch(region_name, output_location, query)","text":"

    \ud83d\udcd6 Fetch data from an AWS Athena table and save it in batch.

    Parameters:

    Name Type Description Default region_name str

    The AWS region name.

    required output_location str

    The S3 output location for the query results.

    required query str

    The SQL query to execute.

    required

    Raises:

    Type Description Exception

    If unable to connect to the AWS Athena service or execute the query.

    "},{"location":"databases/azure_table/","title":"Athena","text":"

    Bases: Spout

    "},{"location":"databases/azure_table/#geniusrise_databases.azure_table.AzureTableStorage.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the AzureTableStorage class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/azure_table/#geniusrise_databases.azure_table.AzureTableStorage.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius AzureTableStorage rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args account_name=my_account account_key=my_key table_name=my_table\n
    "},{"location":"databases/azure_table/#geniusrise_databases.azure_table.AzureTableStorage.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_azure_table_spout:\nname: \"AzureTableStorage\"\nmethod: \"fetch\"\nargs:\naccount_name: \"my_account\"\naccount_key: \"my_key\"\ntable_name: \"my_table\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/azure_table/#geniusrise_databases.azure_table.AzureTableStorage.fetch","title":"fetch(account_name, account_key, table_name)","text":"

    \ud83d\udcd6 Fetch data from Azure Table Storage and save it in batch.

    Parameters:

    Name Type Description Default account_name str

    The Azure Storage account name.

    required account_key str

    The Azure Storage account key.

    required table_name str

    The Azure Table Storage table name.

    required

    Raises:

    Type Description Exception

    If unable to connect to Azure Table Storage or fetch the data.

    "},{"location":"databases/bigquery/","title":"Bigquery","text":"

    Bases: Spout

    "},{"location":"databases/bigquery/#geniusrise_databases.bigquery.BigQuery.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the BigQuery class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/bigquery/#geniusrise_databases.bigquery.BigQuery.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius BigQuery rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--project_id my_project_id dataset_id=my_dataset table_id=my_table\n
    "},{"location":"databases/bigquery/#geniusrise_databases.bigquery.BigQuery.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_bigquery_spout:\nname: \"BigQuery\"\nmethod: \"fetch\"\nargs:\nproject_id: \"my_project_id\"\ndataset_id: \"my_dataset\"\ntable_id: \"my_table\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/bigquery/#geniusrise_databases.bigquery.BigQuery.fetch","title":"fetch(project_id, dataset_id, table_id)","text":"

    \ud83d\udcd6 Fetch data from a BigQuery table and save it in batch.

    Parameters:

    Name Type Description Default project_id str

    The Google Cloud project ID.

    required dataset_id str

    The BigQuery dataset ID.

    required table_id str

    The BigQuery table ID.

    required

    Raises:

    Type Description Exception

    If unable to connect to the BigQuery server or execute the query.

    "},{"location":"databases/bigtable/","title":"BigTable","text":"

    Bases: Spout

    "},{"location":"databases/bigtable/#geniusrise_databases.bigtable.Bigtable.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Bigtable class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/bigtable/#geniusrise_databases.bigtable.Bigtable.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Bigtable rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args project_id=my_project instance_id=my_instance table_id=my_table\n
    "},{"location":"databases/bigtable/#geniusrise_databases.bigtable.Bigtable.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_bigtable_spout:\nname: \"Bigtable\"\nmethod: \"fetch\"\nargs:\nproject_id: \"my_project\"\ninstance_id: \"my_instance\"\ntable_id: \"my_table\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/bigtable/#geniusrise_databases.bigtable.Bigtable.fetch","title":"fetch(project_id, instance_id, table_id)","text":"

    \ud83d\udcd6 Fetch data from a Google Cloud Bigtable and save it in batch.

    Parameters:

    Name Type Description Default project_id str

    The Google Cloud Project ID.

    required instance_id str

    The Bigtable instance ID.

    required table_id str

    The Bigtable table ID.

    required

    Raises:

    Type Description Exception

    If unable to connect to the Bigtable server or fetch the data.

    "},{"location":"databases/cassandra/","title":"Cassandra","text":"

    Bases: Spout

    "},{"location":"databases/cassandra/#geniusrise_databases.cassandra.Cassandra.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Cassandra class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs Any

    Additional keyword arguments.

    {}"},{"location":"databases/cassandra/#geniusrise_databases.cassandra.Cassandra.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Cassandra rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args hosts=localhost keyspace=my_keyspace query=\"SELECT * FROM my_table\" page_size=100\n
    "},{"location":"databases/cassandra/#geniusrise_databases.cassandra.Cassandra.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_cassandra_spout:\nname: \"Cassandra\"\nmethod: \"fetch\"\nargs:\nhosts: \"localhost\"\nkeyspace: \"my_keyspace\"\nquery: \"SELECT * FROM my_table\"\npage_size: 100\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/cassandra/#geniusrise_databases.cassandra.Cassandra.fetch","title":"fetch(hosts, keyspace, query, page_size=100)","text":"

    \ud83d\udcd6 Fetch data from a Cassandra database and save it in batch.

    Parameters:

    Name Type Description Default hosts str

    Comma-separated list of Cassandra hosts.

    required keyspace str

    The Cassandra keyspace to use.

    required query str

    The CQL query to execute.

    required page_size int

    The number of rows to fetch per page. Defaults to 100.

    100

    Raises:

    Type Description Exception

    If unable to connect to the Cassandra cluster or execute the query.

    "},{"location":"databases/cloud_sql/","title":"Google Cloud SQL","text":"

    Bases: Spout

    "},{"location":"databases/cloud_sql/#geniusrise_databases.cloud_sql.GoogleCloudSQL.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the GoogleCloudSQL class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs Any

    Additional keyword arguments.

    {}"},{"location":"databases/cloud_sql/#geniusrise_databases.cloud_sql.GoogleCloudSQL.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius GoogleCloudSQL rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args host=127.0.0.1 port=3306 user=root password=root database=mydb query=\"SELECT * FROM table\" page_size=100\n
    "},{"location":"databases/cloud_sql/#geniusrise_databases.cloud_sql.GoogleCloudSQL.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_google_cloud_sql_spout:\nname: \"GoogleCloudSQL\"\nmethod: \"fetch\"\nargs:\nhost: \"127.0.0.1\"\nport: 3306\nuser: \"root\"\npassword: \"root\"\ndatabase: \"mydb\"\nquery: \"SELECT * FROM table\"\npage_size: 100\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/cloud_sql/#geniusrise_databases.cloud_sql.GoogleCloudSQL.fetch","title":"fetch(host, port, user, password, database, query, page_size=100)","text":"

    \ud83d\udcd6 Fetch data from a Google Cloud SQL database and save it in batch.

    Parameters:

    Name Type Description Default host str

    The Google Cloud SQL host.

    required port int

    The Google Cloud SQL port.

    required user str

    The Google Cloud SQL user.

    required password str

    The Google Cloud SQL password.

    required database str

    The Google Cloud SQL database name.

    required query str

    The SQL query to execute.

    required page_size int

    The number of rows to fetch per page. Defaults to 100.

    100

    Raises:

    Type Description Exception

    If unable to connect to the Google Cloud SQL or fetch the data.

    "},{"location":"databases/cockroach/","title":"CockroachDB","text":"

    Bases: Spout

    "},{"location":"databases/cockroach/#geniusrise_databases.cockroach.CockroachDB.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the CockroachDB class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs Any

    Additional keyword arguments.

    {}"},{"location":"databases/cockroach/#geniusrise_databases.cockroach.CockroachDB.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius CockroachDB rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args host=localhost port=26257 user=root password=root database=mydb query=\"SELECT * FROM table\" page_size=100\n
    "},{"location":"databases/cockroach/#geniusrise_databases.cockroach.CockroachDB.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_cockroachdb_spout:\nname: \"CockroachDB\"\nmethod: \"fetch\"\nargs:\nhost: \"localhost\"\nport: 26257\nuser: \"root\"\npassword: \"root\"\ndatabase: \"mydb\"\nquery: \"SELECT * FROM table\"\npage_size: 100\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/cockroach/#geniusrise_databases.cockroach.CockroachDB.fetch","title":"fetch(host, port, user, password, database, query, page_size=100)","text":"

    \ud83d\udcd6 Fetch data from a CockroachDB database and save it in batch.

    Parameters:

    Name Type Description Default host str

    The CockroachDB host.

    required port int

    The CockroachDB port.

    required user str

    The CockroachDB user.

    required password str

    The CockroachDB password.

    required database str

    The CockroachDB database name.

    required query str

    The SQL query to execute.

    required page_size int

    The number of rows to fetch per page. Defaults to 100.

    100

    Raises:

    Type Description Exception

    If unable to connect to the CockroachDB server or execute the query.

    "},{"location":"databases/cosmosdb/","title":"CosmosDB","text":"

    Bases: Spout

    "},{"location":"databases/cosmosdb/#geniusrise_databases.cosmosdb.CosmosDB.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Cosmos DB class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/cosmosdb/#geniusrise_databases.cosmosdb.CosmosDB.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius CosmosDB rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args endpoint=https://mycosmosdb.documents.azure.com:443/ my_database my_collection\n
    "},{"location":"databases/cosmosdb/#geniusrise_databases.cosmosdb.CosmosDB.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_cosmosdb_spout:\nname: \"CosmosDB\"\nmethod: \"fetch\"\nargs:\nendpoint: \"https://mycosmosdb.documents.azure.com:443/\"\ndatabase: \"my_database\"\ncollection: \"my_collection\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/cosmosdb/#geniusrise_databases.cosmosdb.CosmosDB.fetch","title":"fetch(endpoint, database, collection)","text":"

    \ud83d\udcd6 Fetch data from a Cosmos DB collection and save it in batch.

    Parameters:

    Name Type Description Default endpoint str

    The Cosmos DB endpoint URL.

    required database str

    The Cosmos DB database name.

    required collection str

    The Cosmos DB collection name.

    required

    Raises:

    Type Description Exception

    If unable to connect to the Cosmos DB server or execute the query.

    "},{"location":"databases/couchbase/","title":"Couchbase","text":"

    Bases: Spout

    "},{"location":"databases/couchbase/#geniusrise_databases.couchbase.Couchbase.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the CouchbaseSpout class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs Any

    Additional keyword arguments.

    {}"},{"location":"databases/couchbase/#geniusrise_databases.couchbase.Couchbase.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius CouchbaseSpout rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args host=localhost username=admin password=password bucket_name=my_bucket query=\"SELECT * FROM my_bucket\" page_size=100\n
    "},{"location":"databases/couchbase/#geniusrise_databases.couchbase.Couchbase.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_couchbase_spout:\nname: \"CouchbaseSpout\"\nmethod: \"fetch\"\nargs:\nhost: \"localhost\"\nusername: \"admin\"\npassword: \"password\"\nbucket_name: \"my_bucket\"\nquery: \"SELECT * FROM my_bucket\"\npage_size: 100\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/couchbase/#geniusrise_databases.couchbase.Couchbase.fetch","title":"fetch(host, username, password, bucket_name, query, page_size=100)","text":"

    \ud83d\udcd6 Fetch data from a Couchbase bucket and save it in batch.

    Parameters:

    Name Type Description Default host str

    The Couchbase host.

    required username str

    The Couchbase username.

    required password str

    The Couchbase password.

    required bucket_name str

    The Couchbase bucket name.

    required query str

    The N1QL query to execute.

    required page_size int

    The number of documents to fetch per page. Defaults to 100.

    100

    Raises:

    Type Description Exception

    If unable to connect to the Couchbase cluster or execute the query.

    "},{"location":"databases/db2/","title":"IBM DB2","text":"

    Bases: Spout

    "},{"location":"databases/db2/#geniusrise_databases.db2.DB2.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the DB2 class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/db2/#geniusrise_databases.db2.DB2.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius DB2 rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args hostname=mydb2.example.com port=50000 username=myusername password=mypassword database=mydb\n
    "},{"location":"databases/db2/#geniusrise_databases.db2.DB2.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_db2_spout:\nname: \"DB2\"\nmethod: \"fetch\"\nargs:\nhostname: \"mydb2.example.com\"\nport: 50000\nusername: \"myusername\"\npassword: \"mypassword\"\ndatabase: \"mydb\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/db2/#geniusrise_databases.db2.DB2.fetch","title":"fetch(hostname, port, username, password, database)","text":"

    \ud83d\udcd6 Fetch data from a DB2 database and save it in batch.

    Parameters:

    Name Type Description Default hostname str

    The DB2 hostname.

    required port int

    The DB2 port.

    required username str

    The DB2 username.

    required password str

    The DB2 password.

    required database str

    The DB2 database name.

    required

    Raises:

    Type Description Exception

    If unable to connect to the DB2 server or execute the command.

    "},{"location":"databases/documentdb/","title":"AWS DocumentDB","text":"

    Bases: Spout

    "},{"location":"databases/documentdb/#geniusrise_databases.documentdb.DocumentDB.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the DocumentDB class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/documentdb/#geniusrise_databases.documentdb.DocumentDB.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius DocumentDB rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args host=localhost port=27017 user=myuser password=mypassword database=mydb collection=mycollection query=\"{}\" page_size=100\n
    "},{"location":"databases/documentdb/#geniusrise_databases.documentdb.DocumentDB.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_documentdb_spout:\nname: \"DocumentDB\"\nmethod: \"fetch\"\nargs:\nhost: \"localhost\"\nport: 27017\nuser: \"myuser\"\npassword: \"mypassword\"\ndatabase: \"mydb\"\ncollection: \"mycollection\"\nquery: \"{}\"\npage_size: 100\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/documentdb/#geniusrise_databases.documentdb.DocumentDB.fetch","title":"fetch(host, port, user, password, database, collection, query, page_size=100)","text":"

    \ud83d\udcd6 Fetch data from a DocumentDB database and save it in batch.

    Parameters:

    Name Type Description Default host str

    The DocumentDB host.

    required port int

    The DocumentDB port.

    required user str

    The DocumentDB user.

    required password str

    The DocumentDB password.

    required database str

    The DocumentDB database name.

    required collection str

    The DocumentDB collection name.

    required query str

    The query to execute.

    required page_size int

    The number of documents to fetch per page. Defaults to 100.

    100

    Raises:

    Type Description Exception

    If unable to connect to the DocumentDB server or execute the query.

    "},{"location":"databases/dynamodb/","title":"AWS DynamoDB","text":"

    Bases: Spout

    "},{"location":"databases/dynamodb/#geniusrise_databases.dynamodb.DynamoDB.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the DynamoDB class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs Any

    Additional keyword arguments.

    {}"},{"location":"databases/dynamodb/#geniusrise_databases.dynamodb.DynamoDB.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius DynamoDB rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args table_name=my_table page_size=100\n
    "},{"location":"databases/dynamodb/#geniusrise_databases.dynamodb.DynamoDB.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_dynamodb_spout:\nname: \"DynamoDB\"\nmethod: \"fetch\"\nargs:\ntable_name: \"my_table\"\npage_size: 100\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/dynamodb/#geniusrise_databases.dynamodb.DynamoDB.fetch","title":"fetch(table_name, page_size=100)","text":"

    \ud83d\udcd6 Fetch data from a DynamoDB table and save it in batch.

    Parameters:

    Name Type Description Default table_name str

    The DynamoDB table name.

    required page_size int

    The number of rows to fetch per page. Defaults to 100.

    100

    Raises:

    Type Description Exception

    If unable to connect to the DynamoDB or fetch the data.

    "},{"location":"databases/elasticsearch/","title":"Elasticsearch","text":"

    Bases: Spout

    "},{"location":"databases/elasticsearch/#geniusrise_databases.elasticsearch.Elasticsearch.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Elasticsearch class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs Any

    Additional keyword arguments.

    {}"},{"location":"databases/elasticsearch/#geniusrise_databases.elasticsearch.Elasticsearch.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Elasticsearch rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args hosts=localhost:9200 index=my_index query='{\"query\": {\"match_all\": {}}}' page_size=100\n
    "},{"location":"databases/elasticsearch/#geniusrise_databases.elasticsearch.Elasticsearch.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_elasticsearch_spout:\nname: \"Elasticsearch\"\nmethod: \"fetch\"\nargs:\nhosts: \"localhost:9200\"\nindex: \"my_index\"\nquery: '{\"query\": {\"match_all\": {}}}'\npage_size: 100\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/elasticsearch/#geniusrise_databases.elasticsearch.Elasticsearch.fetch","title":"fetch(hosts, index, query, page_size=100)","text":"

    \ud83d\udcd6 Fetch data from an Elasticsearch index and save it in batch.

    Parameters:

    Name Type Description Default hosts str

    Comma-separated list of Elasticsearch hosts.

    required index str

    The Elasticsearch index to query.

    required query str

    The Elasticsearch query in JSON format.

    required page_size int

    The number of documents to fetch per page. Defaults to 100.

    100

    Raises:

    Type Description Exception

    If unable to connect to the Elasticsearch cluster or execute the query.

    "},{"location":"databases/firestore/","title":"Firestore","text":"

    Bases: Spout

    "},{"location":"databases/firestore/#geniusrise_databases.firestore.Firestore.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Firestore class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/firestore/#geniusrise_databases.firestore.Firestore.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Firestore rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args project_id=my-project collection_id=my-collection\n
    "},{"location":"databases/firestore/#geniusrise_databases.firestore.Firestore.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_firestore_spout:\nname: \"Firestore\"\nmethod: \"fetch\"\nargs:\nproject_id: \"my-project\"\ncollection_id: \"my-collection\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/firestore/#geniusrise_databases.firestore.Firestore.fetch","title":"fetch(project_id, collection_id)","text":"

    \ud83d\udcd6 Fetch data from a Firestore collection and save it in batch.

    Parameters:

    Name Type Description Default project_id str

    The Google Cloud project ID.

    required collection_id str

    The Firestore collection ID.

    required

    Raises:

    Type Description Exception

    If unable to connect to the Firestore server or execute the query.

    "},{"location":"databases/graphite/","title":"Graphite","text":"

    Bases: Spout

    "},{"location":"databases/graphite/#geniusrise_databases.graphite.Graphite.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Graphite class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/graphite/#geniusrise_databases.graphite.Graphite.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Graphite rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args url=http://localhost:8080 target=stats_counts.myapp output_format=json from=-1h until=now\n
    "},{"location":"databases/graphite/#geniusrise_databases.graphite.Graphite.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_graphite_spout:\nname: \"Graphite\"\nmethod: \"fetch\"\nargs:\nurl: \"http://localhost:8080\"\ntarget: \"stats_counts.myapp\"\noutput_format: \"json\"\nfrom: \"-1h\"\nuntil: \"now\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/graphite/#geniusrise_databases.graphite.Graphite.fetch","title":"fetch(url, target, output_format='json', from_time='-1h', until='now')","text":"

    \ud83d\udcd6 Fetch data from a Graphite database and save it in batch.

    Parameters:

    Name Type Description Default url str

    The Graphite API URL.

    required target str

    The target metric to fetch.

    required output_format str

    The output format. Defaults to \"json\".

    'json' from_time str

    The start time for fetching data. Defaults to \"-1h\".

    '-1h' until str

    The end time for fetching data. Defaults to \"now\".

    'now'

    Raises:

    Type Description Exception

    If unable to connect to the Graphite server or fetch the data.

    "},{"location":"databases/hbase/","title":"HBase","text":"

    Bases: Spout

    "},{"location":"databases/hbase/#geniusrise_databases.hbase.HBase.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the HBase class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/hbase/#geniusrise_databases.hbase.HBase.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius HBase rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args host=localhost table=my_table row_start=start row_stop=stop batch_size=100\n
    "},{"location":"databases/hbase/#geniusrise_databases.hbase.HBase.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_hbase_spout:\nname: \"HBase\"\nmethod: \"fetch\"\nargs:\nhost: \"localhost\"\ntable: \"my_table\"\nrow_start: \"start\"\nrow_stop: \"stop\"\nbatch_size: 100\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/hbase/#geniusrise_databases.hbase.HBase.fetch","title":"fetch(host, table, row_start, row_stop, batch_size=100)","text":"

    \ud83d\udcd6 Fetch data from an HBase table and save it in batch.

    Parameters:

    Name Type Description Default host str

    The HBase host.

    required table str

    The HBase table name.

    required row_start str

    The row key to start scanning from.

    required row_stop str

    The row key to stop scanning at.

    required batch_size int

    The number of rows to fetch per batch. Defaults to 100.

    100

    Raises:

    Type Description Exception

    If unable to connect to the HBase server or execute the scan.

    "},{"location":"databases/influxdb/","title":"InfluxDB","text":"

    Bases: Spout

    "},{"location":"databases/influxdb/#geniusrise_databases.influxdb.InfluxDB.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the InfluxDB class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/influxdb/#geniusrise_databases.influxdb.InfluxDB.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius InfluxDB rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args host=localhost port=8086 username=myusername password=mypassword database=mydatabase\n
    "},{"location":"databases/influxdb/#geniusrise_databases.influxdb.InfluxDB.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_influxdb_spout:\nname: \"InfluxDB\"\nmethod: \"fetch\"\nargs:\nhost: \"localhost\"\nport: 8086\nusername: \"myusername\"\npassword: \"mypassword\"\ndatabase: \"mydatabase\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/influxdb/#geniusrise_databases.influxdb.InfluxDB.fetch","title":"fetch(host, port, username, password, database)","text":"

    \ud83d\udcd6 Fetch data from an InfluxDB database and save it in batch.

    Parameters:

    Name Type Description Default host str

    The InfluxDB host.

    required port int

    The InfluxDB port.

    required username str

    The InfluxDB username.

    required password str

    The InfluxDB password.

    required database str

    The InfluxDB database name.

    required

    Raises:

    Type Description Exception

    If unable to connect to the InfluxDB server or execute the query.

    "},{"location":"databases/kairosdb/","title":"KairosDB","text":"

    Bases: Spout

    "},{"location":"databases/kairosdb/#geniusrise_databases.kairosdb.KairosDB.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the KairosDB class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/kairosdb/#geniusrise_databases.kairosdb.KairosDB.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius KairosDB rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args url=http://mykairosdbhost:8080/api/v1/datapoints query=\"SELECT * FROM mymetric\"\n
    "},{"location":"databases/kairosdb/#geniusrise_databases.kairosdb.KairosDB.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_kairosdb_spout:\nname: \"KairosDB\"\nmethod: \"fetch\"\nargs:\nurl: \"http://mykairosdbhost:8080/api/v1/datapoints\"\nquery: \"SELECT * FROM mymetric\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/kairosdb/#geniusrise_databases.kairosdb.KairosDB.fetch","title":"fetch(url, query)","text":"

    \ud83d\udcd6 Fetch data from a KairosDB metric and save it in batch.

    Parameters:

    Name Type Description Default url str

    The URL of the KairosDB API endpoint.

    required query str

    The SQL query to execute.

    required

    Raises:

    Type Description Exception

    If unable to connect to the KairosDB server or execute the query.

    "},{"location":"databases/keyspaces/","title":"AWSKeyspaces","text":"

    Bases: Spout

    "},{"location":"databases/keyspaces/#geniusrise_databases.keyspaces.AWSKeyspaces.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the AWS Keyspaces class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/keyspaces/#geniusrise_databases.keyspaces.AWSKeyspaces.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius AWSKeyspaces rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args region_name=us-east-1 cluster_name=mycluster table_name=mytable\n
    "},{"location":"databases/keyspaces/#geniusrise_databases.keyspaces.AWSKeyspaces.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_aws_keyspaces_spout:\nname: \"AWSKeyspaces\"\nmethod: \"fetch\"\nargs:\nregion_name: \"us-east-1\"\ncluster_name: \"mycluster\"\ntable_name: \"mytable\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/keyspaces/#geniusrise_databases.keyspaces.AWSKeyspaces.fetch","title":"fetch(region_name, cluster_name, table_name)","text":"

    \ud83d\udcd6 Fetch data from an AWS Keyspaces table and save it in batch.

    Parameters:

    Name Type Description Default region_name str

    The AWS region name.

    required cluster_name str

    The AWS Keyspaces cluster name.

    required table_name str

    The name of the AWS Keyspaces table.

    required

    Raises:

    Type Description Exception

    If unable to connect to the AWS Keyspaces cluster or execute the query.

    "},{"location":"databases/ldap/","title":"LDAP","text":"

    Bases: Spout

    "},{"location":"databases/ldap/#geniusrise_databases.ldap.LDAP.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the LDAP class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/ldap/#geniusrise_databases.ldap.LDAP.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius LDAP rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args url=ldap://myldap.example.com:389 bind_dn=\"cn=admin,dc=example,dc=com\" bind_password=\"password\" search_base=\"dc=example,dc=com\" search_filter=\"(objectClass=person)\" attributes=[\"cn\", \"givenName\", \"sn\"]\n
    "},{"location":"databases/ldap/#geniusrise_databases.ldap.LDAP.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_ldap_spout:\nname: \"LDAP\"\nmethod: \"fetch\"\nargs:\nurl: \"ldap://myldap.example.com:389\"\nbind_dn: \"cn=admin,dc=example,dc=com\"\nbind_password: \"password\"\nsearch_base: \"dc=example,dc=com\"\nsearch_filter: \"(objectClass=person)\"\nattributes: [\"cn\", \"givenName\", \"sn\"]\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/ldap/#geniusrise_databases.ldap.LDAP.fetch","title":"fetch(url, bind_dn, bind_password, search_base, search_filter, attributes)","text":"

    \ud83d\udcd6 Fetch data from an LDAP server and save it in batch.

    Parameters:

    Name Type Description Default url str

    The LDAP URL.

    required bind_dn str

    The DN to bind as.

    required bind_password str

    The password for the DN.

    required search_base str

    The search base.

    required search_filter str

    The search filter.

    required attributes list

    The list of attributes to retrieve.

    required

    Raises:

    Type Description Exception

    If unable to connect to the LDAP server or execute the search.

    "},{"location":"databases/memsql/","title":"MemSQL","text":"

    Bases: Spout

    "},{"location":"databases/memsql/#geniusrise_databases.memsql.MemSQL.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the MemSQL class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/memsql/#geniusrise_databases.memsql.MemSQL.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius MemSQL rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args host=mymemsqlhost user=myuser password=<PASSWORD> database=mydatabase query=\"SELECT * FROM mytable\"\n
    "},{"location":"databases/memsql/#geniusrise_databases.memsql.MemSQL.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_memsql_spout:\nname: \"MemSQL\"\nmethod: \"fetch\"\nargs:\nhost: \"mymemsqlhost\"\nuser: \"myuser\"\npassword: \"<PASSWORD>\"\ndatabase: \"mydatabase\"\nquery: \"SELECT * FROM mytable\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/memsql/#geniusrise_databases.memsql.MemSQL.fetch","title":"fetch(host, user, password, database, query)","text":"

    \ud83d\udcd6 Fetch data from a MemSQL database and save it in batch.

    Parameters:

    Name Type Description Default host str

    The MemSQL host.

    required user str

    The MemSQL user.

    required password str

    The MemSQL password.

    required database str

    The MemSQL database name.

    required query str

    The SQL query to execute.

    required

    Raises:

    Type Description Exception

    If unable to connect to the MemSQL server or execute the query.

    "},{"location":"databases/mongodb/","title":"MongoDB","text":"

    Bases: Spout

    "},{"location":"databases/mongodb/#geniusrise_databases.mongodb.MongoDB.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the MongoDB class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/mongodb/#geniusrise_databases.mongodb.MongoDB.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius MongoDB rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args host=localhost port=27017 username=myusername password=mypassword database=mydatabase collection=mycollection\n
    "},{"location":"databases/mongodb/#geniusrise_databases.mongodb.MongoDB.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_mongodb_spout:\nname: \"MongoDB\"\nmethod: \"fetch\"\nargs:\nhost: \"localhost\"\nport: 27017\nusername: \"myusername\"\npassword: \"mypassword\"\ndatabase: \"mydatabase\"\ncollection: \"mycollection\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/mongodb/#geniusrise_databases.mongodb.MongoDB.fetch","title":"fetch(host, port, username, password, database, collection)","text":"

    \ud83d\udcd6 Fetch data from a MongoDB database and save it in batch.

    Parameters:

    Name Type Description Default host str

    The MongoDB host.

    required port int

    The MongoDB port.

    required username str

    The MongoDB username.

    required password str

    The MongoDB password.

    required database str

    The MongoDB database name.

    required collection str

    The MongoDB collection name.

    required

    Raises:

    Type Description Exception

    If unable to connect to the MongoDB server or execute the query.

    "},{"location":"databases/mysql/","title":"MySQL","text":"

    Bases: Spout

    "},{"location":"databases/mysql/#geniusrise_databases.mysql.MySQL.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the MySQL class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/mysql/#geniusrise_databases.mysql.MySQL.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius MySQL rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args host=localhost port=3306 user=root password=root database=mydb query=\"SELECT * FROM table\" page_size=100\n
    "},{"location":"databases/mysql/#geniusrise_databases.mysql.MySQL.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_mysql_spout:\nname: \"MySQL\"\nmethod: \"fetch\"\nargs:\nhost: \"localhost\"\nport: 3306\nuser: \"root\"\npassword: \"root\"\ndatabase: \"mydb\"\nquery: \"SELECT * FROM table\"\npage_size: 100\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/mysql/#geniusrise_databases.mysql.MySQL.fetch","title":"fetch(host, port, user, password, database, query, page_size=100)","text":"

    \ud83d\udcd6 Fetch data from a MySQL database and save it in batch.

    Parameters:

    Name Type Description Default host str

    The MySQL host.

    required port int

    The MySQL port.

    required user str

    The MySQL user.

    required password str

    The MySQL password.

    required database str

    The MySQL database name.

    required query str

    The SQL query to execute.

    required page_size int

    The number of rows to fetch per page. Defaults to 100.

    100

    Raises:

    Type Description Exception

    If unable to connect to the MySQL server or execute the query.

    "},{"location":"databases/neo4j/","title":"Neo4j","text":"

    Bases: Spout

    "},{"location":"databases/neo4j/#geniusrise_databases.neo4j.Neo4j.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Neo4j class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/neo4j/#geniusrise_databases.neo4j.Neo4j.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Neo4j rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args host=localhost port=7687 username=myusername password=mypassword\n
    "},{"location":"databases/neo4j/#geniusrise_databases.neo4j.Neo4j.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_neo4j_spout:\nname: \"Neo4j\"\nmethod: \"fetch\"\nargs:\nhost: \"localhost\"\nport: 7687\nusername: \"myusername\"\npassword: \"mypassword\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/neo4j/#geniusrise_databases.neo4j.Neo4j.fetch","title":"fetch(host, port, username, password)","text":"

    \ud83d\udcd6 Fetch data from a Neo4j database and save it in batch.

    Parameters:

    Name Type Description Default host str

    The Neo4j host.

    required port int

    The Neo4j port.

    required username str

    The Neo4j username.

    required password str

    The Neo4j password.

    required

    Raises:

    Type Description Exception

    If unable to connect to the Neo4j server or execute the query.

    "},{"location":"databases/nuodb/","title":"NuoDB","text":"

    Bases: Spout

    "},{"location":"databases/nuodb/#geniusrise_databases.nuodb.NuoDB.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the NuoDB class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/nuodb/#geniusrise_databases.nuodb.NuoDB.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius NuoDB rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args url=http://mynuodbhost:8080/v1/statement query=\"SELECT * FROM mytable\"\n
    "},{"location":"databases/nuodb/#geniusrise_databases.nuodb.NuoDB.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_nuodb_spout:\nname: \"NuoDB\"\nmethod: \"fetch\"\nargs:\nurl: \"http://mynuodbhost:8080/v1/statement\"\nquery: \"SELECT * FROM mytable\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/nuodb/#geniusrise_databases.nuodb.NuoDB.fetch","title":"fetch(url, query)","text":"

    \ud83d\udcd6 Fetch data from a NuoDB table and save it in batch.

    Parameters:

    Name Type Description Default url str

    The URL of the NuoDB API endpoint.

    required query str

    The SQL query to execute.

    required

    Raises:

    Type Description Exception

    If unable to connect to the NuoDB server or execute the query.

    "},{"location":"databases/opentsdb/","title":"OpenTSDB","text":"

    Bases: Spout

    "},{"location":"databases/opentsdb/#geniusrise_databases.opentsdb.OpenTSDB.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the OpenTSDB class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/opentsdb/#geniusrise_databases.opentsdb.OpenTSDB.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius OpenTSDB rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args host=http://localhost:4242\n
    "},{"location":"databases/opentsdb/#geniusrise_databases.opentsdb.OpenTSDB.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_opentsdb_spout:\nname: \"OpenTSDB\"\nmethod: \"fetch\"\nargs:\nhost: \"http://localhost:4242\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/opentsdb/#geniusrise_databases.opentsdb.OpenTSDB.fetch","title":"fetch(host)","text":"

    \ud83d\udcd6 Fetch data from an OpenTSDB database and save it in batch.

    Parameters:

    Name Type Description Default host str

    The URL of the OpenTSDB instance.

    required

    Raises:

    Type Description Exception

    If unable to connect to the OpenTSDB server or execute the query.

    "},{"location":"databases/oracle/","title":"Oracle","text":"

    Bases: Spout

    "},{"location":"databases/oracle/#geniusrise_databases.oracle.Oracle.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the OracleSQL class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/oracle/#geniusrise_databases.oracle.Oracle.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius OracleSQL rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args server=localhost port=1521 service_name=myservice user=myuser password=mypassword\n
    "},{"location":"databases/oracle/#geniusrise_databases.oracle.Oracle.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_oraclesql_spout:\nname: \"OracleSQL\"\nmethod: \"fetch\"\nargs:\nserver: \"localhost\"\nport: 1521\nservice_name: \"myservice\"\nuser: \"myuser\"\npassword: \"mypassword\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/oracle/#geniusrise_databases.oracle.Oracle.fetch","title":"fetch(server, port, service_name, user, password, query)","text":"

    \ud83d\udcd6 Fetch data from an Oracle SQL database and save it in batch.

    Parameters:

    Name Type Description Default server str

    The Oracle SQL server.

    required port int

    The Oracle SQL port.

    required service_name str

    The Oracle service name.

    required user str

    The Oracle user.

    required password str

    The Oracle password.

    required query str

    The SQL query to execute.

    required

    Raises:

    Type Description Exception

    If unable to connect to the Oracle SQL server or execute the query.

    "},{"location":"databases/postgres/","title":"PostgreSQL","text":"

    Bases: Spout

    "},{"location":"databases/postgres/#geniusrise_databases.postgres.PostgreSQL.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the PostgreSQL class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/postgres/#geniusrise_databases.postgres.PostgreSQL.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius PostgreSQL rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args host=localhost port=5432 user=postgres password=postgres database=mydb query=\"SELECT * FROM table\" page_size=100\n
    "},{"location":"databases/postgres/#geniusrise_databases.postgres.PostgreSQL.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_postgresql_spout:\nname: \"PostgreSQL\"\nmethod: \"fetch\"\nargs:\nhost: \"localhost\"\nport: 5432\nuser: \"postgres\"\npassword: \"postgres\"\ndatabase: \"mydb\"\nquery: \"SELECT * FROM table\"\npage_size: 100\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/postgres/#geniusrise_databases.postgres.PostgreSQL.fetch","title":"fetch(host, port, user, password, database, query, page_size=100)","text":"

    \ud83d\udcd6 Fetch data from a PostgreSQL database and save it in batch.

    Parameters:

    Name Type Description Default host str

    The PostgreSQL host.

    required port int

    The PostgreSQL port.

    required user str

    The PostgreSQL user.

    required password str

    The PostgreSQL password.

    required database str

    The PostgreSQL database name.

    required query str

    The SQL query to execute.

    required page_size int

    The number of rows to fetch per page. Defaults to 100.

    100

    Raises:

    Type Description Exception

    If unable to connect to the PostgreSQL server or execute the query.

    "},{"location":"databases/presto/","title":"Presto","text":"

    Bases: Spout

    "},{"location":"databases/presto/#geniusrise_databases.presto.Presto.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Presto class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/presto/#geniusrise_databases.presto.Presto.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Presto rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args host=mypresto.example.com username=myusername password=mypassword catalog=mycatalog schema=myschema table=mytable\n
    "},{"location":"databases/presto/#geniusrise_databases.presto.Presto.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_presto_spout:\nname: \"Presto\"\nmethod: \"fetch\"\nargs:\nhost: \"mypresto.example.com\"\nusername: \"myusername\"\npassword: \"mypassword\"\ncatalog: \"mycatalog\"\nschema: \"myschema\"\ntable: \"mytable\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/presto/#geniusrise_databases.presto.Presto.fetch","title":"fetch(host, username, password, catalog, schema, table)","text":"

    \ud83d\udcd6 Fetch data from a Presto table and save it in batch.

    Parameters:

    Name Type Description Default host str

    The Presto host.

    required username str

    The Presto username.

    required password str

    The Presto password.

    required catalog str

    The Presto catalog name.

    required schema str

    The Presto schema name.

    required table str

    The name of the Presto table.

    required

    Raises:

    Type Description Exception

    If unable to connect to the Presto server or execute the command.

    "},{"location":"databases/redis/","title":"Redis","text":"

    Bases: Spout

    "},{"location":"databases/redis/#geniusrise_databases.redis.Redis.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Redis class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/redis/#geniusrise_databases.redis.Redis.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Redis rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args host=localhost port=6379 password=mypassword database=0\n
    "},{"location":"databases/redis/#geniusrise_databases.redis.Redis.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_redis_spout:\nname: \"Redis\"\nmethod: \"fetch\"\nargs:\nhost: \"localhost\"\nport: 6379\npassword: \"mypassword\"\ndatabase: 0\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/redis/#geniusrise_databases.redis.Redis.fetch","title":"fetch(host, port, password, database)","text":"

    \ud83d\udcd6 Fetch data from a Redis database and save it in batch.

    Parameters:

    Name Type Description Default host str

    The Redis host.

    required port int

    The Redis port.

    required password str

    The Redis password.

    required database int

    The Redis database number.

    required

    Raises:

    Type Description Exception

    If unable to connect to the Redis server or execute the command.

    "},{"location":"databases/riak/","title":"Riak","text":"

    Bases: Spout

    "},{"location":"databases/riak/#geniusrise_databases.riak.Riak.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Riak class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/riak/#geniusrise_databases.riak.Riak.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Riak rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args host=localhost port=8098\n
    "},{"location":"databases/riak/#geniusrise_databases.riak.Riak.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_riak_spout:\nname: \"Riak\"\nmethod: \"fetch\"\nargs:\nhost: \"localhost\"\nport: 8098\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/riak/#geniusrise_databases.riak.Riak.fetch","title":"fetch(host, port)","text":"

    \ud83d\udcd6 Fetch data from a Riak database and save it in batch.

    Parameters:

    Name Type Description Default host str

    The Riak host.

    required port int

    The Riak port.

    required

    Raises:

    Type Description Exception

    If unable to connect to the Riak server or execute the query.

    "},{"location":"databases/spanner/","title":"Couchbase","text":"

    Bases: Spout

    "},{"location":"databases/spanner/#geniusrise_databases.spanner.Spanner.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Spanner class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/spanner/#geniusrise_databases.spanner.Spanner.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Spanner rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--project_id my_project_id instance_id=my_instance database_id=my_database table_id=my_table\n
    "},{"location":"databases/spanner/#geniusrise_databases.spanner.Spanner.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_spanner_spout:\nname: \"Spanner\"\nmethod: \"fetch\"\nargs:\nproject_id: \"my_project_id\"\ninstance_id: \"my_instance\"\ndatabase_id: \"my_database\"\ntable_id: \"my_table\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/spanner/#geniusrise_databases.spanner.Spanner.fetch","title":"fetch(project_id, instance_id, database_id, table_id)","text":"

    \ud83d\udcd6 Fetch data from a Spanner database and save it in batch.

    Parameters:

    Name Type Description Default project_id str

    The Google Cloud project ID.

    required instance_id str

    The Spanner instance ID.

    required database_id str

    The Spanner database ID.

    required table_id str

    The Spanner table ID.

    required

    Raises:

    Type Description Exception

    If unable to connect to the Spanner database or execute the query.

    "},{"location":"databases/sql_server/","title":"SQLServer","text":"

    Bases: Spout

    "},{"location":"databases/sql_server/#geniusrise_databases.sql_server.SQLServer.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the SQLServer class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/sql_server/#geniusrise_databases.sql_server.SQLServer.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius SQLServer rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args server=localhost port=1433 user=myuser password=mypassword database=mydatabase query=\"SELECT * FROM mytable\"\n
    "},{"location":"databases/sql_server/#geniusrise_databases.sql_server.SQLServer.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_sqlserver_spout:\nname: \"SQLServer\"\nmethod: \"fetch\"\nargs:\nserver: \"localhost\"\nport: 1433\nuser: \"myuser\"\npassword: \"mypassword\"\ndatabase: \"mydatabase\"\nquery: \"SELECT * FROM mytable\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/sql_server/#geniusrise_databases.sql_server.SQLServer.fetch","title":"fetch(server, port, user, password, database, query)","text":"

    \ud83d\udcd6 Fetch data from a SQL Server database and save it in batch.

    Parameters:

    Name Type Description Default server str

    The SQL Server host.

    required port int

    The SQL Server port.

    required user str

    The SQL Server user.

    required password str

    The SQL Server password.

    required database str

    The SQL Server database name.

    required query str

    The SQL query to execute.

    required

    Raises:

    Type Description Exception

    If unable to connect to the SQL Server server or execute the query.

    "},{"location":"databases/sqlite/","title":"SQLite","text":"

    Bases: Spout

    "},{"location":"databases/sqlite/#geniusrise_databases.sqlite.SQLite.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the SQLite class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs Any

    Additional keyword arguments.

    {}"},{"location":"databases/sqlite/#geniusrise_databases.sqlite.SQLite.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius SQLite rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args s3_bucket=my_s3_bucket s3_key=mydb.sqlite query=\"SELECT * FROM table\" page_size=100\n
    "},{"location":"databases/sqlite/#geniusrise_databases.sqlite.SQLite.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_sqlite_spout:\nname: \"SQLite\"\nmethod: \"fetch\"\nargs:\ns3_bucket: \"my_s3_bucket\"\ns3_key: \"mydb.sqlite\"\nquery: \"SELECT * FROM table\"\npage_size: 100\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/sqlite/#geniusrise_databases.sqlite.SQLite.fetch","title":"fetch(s3_bucket, s3_key, query, page_size=100)","text":"

    \ud83d\udcd6 Fetch data from an SQLite database and save it in batch.

    Parameters:

    Name Type Description Default s3_bucket str

    The S3 bucket containing the SQLite database.

    required s3_key str

    The S3 key for the SQLite database.

    required query str

    The SQL query to execute.

    required page_size int

    The number of rows to fetch per page. Defaults to 100.

    100

    Raises:

    Type Description Exception

    If unable to connect to the SQLite database or execute the query.

    "},{"location":"databases/sybase/","title":"Sybase","text":"

    Bases: Spout

    "},{"location":"databases/sybase/#geniusrise_databases.sybase.Sybase.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Sybase class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/sybase/#geniusrise_databases.sybase.Sybase.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Sybase rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args host=localhost port=5000 user=sa password=secret database=mydb query=\"SELECT * FROM table\" page_size=100\n
    "},{"location":"databases/sybase/#geniusrise_databases.sybase.Sybase.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_sybase_spout:\nname: \"Sybase\"\nmethod: \"fetch\"\nargs:\nhost: \"localhost\"\nport: 5000\nuser: \"sa\"\npassword: \"secret\"\ndatabase: \"mydb\"\nquery: \"SELECT * FROM table\"\npage_size: 100\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/sybase/#geniusrise_databases.sybase.Sybase.fetch","title":"fetch(host, port, user, password, database, query, page_size=100)","text":"

    \ud83d\udcd6 Fetch data from a Sybase database and save it in batch.

    Parameters:

    Name Type Description Default host str

    The Sybase host.

    required port int

    The Sybase port.

    required user str

    The Sybase user.

    required password str

    The Sybase password.

    required database str

    The Sybase database name.

    required query str

    The SQL query to execute.

    required page_size int

    The number of rows to fetch per page. Defaults to 100.

    100

    Raises:

    Type Description Exception

    If unable to connect to the Sybase server or execute the query.

    "},{"location":"databases/teradata/","title":"Teradata","text":"

    Bases: Spout

    "},{"location":"databases/teradata/#geniusrise_databases.teradata.Teradata.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Teradata class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/teradata/#geniusrise_databases.teradata.Teradata.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Teradata rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args host=myteradata.example.com username=myusername password=mypassword database=mydb\n
    "},{"location":"databases/teradata/#geniusrise_databases.teradata.Teradata.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_teradata_spout:\nname: \"Teradata\"\nmethod: \"fetch\"\nargs:\nhost: \"myteradata.example.com\"\nusername: \"myusername\"\npassword: \"mypassword\"\ndatabase: \"mydb\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/teradata/#geniusrise_databases.teradata.Teradata.fetch","title":"fetch(host, username, password, database)","text":"

    \ud83d\udcd6 Fetch data from a Teradata database and save it in batch.

    Parameters:

    Name Type Description Default host str

    The Teradata host.

    required username str

    The Teradata username.

    required password str

    The Teradata password.

    required database str

    The Teradata database name.

    required

    Raises:

    Type Description Exception

    If unable to connect to the Teradata server or execute the command.

    "},{"location":"databases/tidb/","title":"TiDB","text":"

    Bases: Spout

    "},{"location":"databases/tidb/#geniusrise_databases.tidb.TiDB.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the TiDB class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs Any

    Additional keyword arguments.

    {}"},{"location":"databases/tidb/#geniusrise_databases.tidb.TiDB.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius TiDB rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args host=localhost port=4000 user=root password=root database=mydb query=\"SELECT * FROM table\" page_size=100\n
    "},{"location":"databases/tidb/#geniusrise_databases.tidb.TiDB.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_tidb_spout:\nname: \"TiDB\"\nmethod: \"fetch\"\nargs:\nhost: \"localhost\"\nport: 4000\nuser: \"root\"\npassword: \"root\"\ndatabase: \"mydb\"\nquery: \"SELECT * FROM table\"\npage_size: 100\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/tidb/#geniusrise_databases.tidb.TiDB.fetch","title":"fetch(host, port, user, password, database, query, page_size=100)","text":"

    \ud83d\udcd6 Fetch data from a TiDB database and save it in batch.

    Parameters:

    Name Type Description Default host str

    The TiDB host.

    required port int

    The TiDB port.

    required user str

    The TiDB user.

    required password str

    The TiDB password.

    required database str

    The TiDB database name.

    required query str

    The SQL query to execute.

    required page_size int

    The number of rows to fetch per page. Defaults to 100.

    100

    Raises:

    Type Description Exception

    If unable to connect to the TiDB server or execute the query.

    "},{"location":"databases/timescaledb/","title":"TimescaleDB","text":"

    Bases: Spout

    "},{"location":"databases/timescaledb/#geniusrise_databases.timescaledb.TimescaleDB.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the TimescaleDB class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs Any

    Additional keyword arguments.

    {}"},{"location":"databases/timescaledb/#geniusrise_databases.timescaledb.TimescaleDB.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius TimescaleDB rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args host=localhost port=5432 user=postgres password=postgres database=mydb query=\"SELECT * FROM hypertable\" page_size=100\n
    "},{"location":"databases/timescaledb/#geniusrise_databases.timescaledb.TimescaleDB.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_timescaledb_spout:\nname: \"TimescaleDB\"\nmethod: \"fetch\"\nargs:\nhost: \"localhost\"\nport: 5432\nuser: \"postgres\"\npassword: \"postgres\"\ndatabase: \"mydb\"\nquery: \"SELECT * FROM hypertable\"\npage_size: 100\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/timescaledb/#geniusrise_databases.timescaledb.TimescaleDB.fetch","title":"fetch(host, port, user, password, database, query, page_size=100)","text":"

    \ud83d\udcd6 Fetch data from a TimescaleDB hypertable and save it in batch.

    Parameters:

    Name Type Description Default host str

    The TimescaleDB host.

    required port int

    The TimescaleDB port.

    required user str

    The TimescaleDB user.

    required password str

    The TimescaleDB password.

    required database str

    The TimescaleDB database name.

    required query str

    The SQL query to execute.

    required page_size int

    The number of rows to fetch per page. Defaults to 100.

    100

    Raises:

    Type Description Exception

    If unable to connect to the TimescaleDB server or execute the query.

    "},{"location":"databases/vertica/","title":"Vertica","text":"

    Bases: Spout

    "},{"location":"databases/vertica/#geniusrise_databases.vertica.Vertica.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Vertica class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/vertica/#geniusrise_databases.vertica.Vertica.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Vertica rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--host my_host port=5433 user=my_user password=my_password database=my_database query=\"SELECT * FROM my_table\"\n
    "},{"location":"databases/vertica/#geniusrise_databases.vertica.Vertica.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_vertica_spout:\nname: \"Vertica\"\nmethod: \"fetch\"\nargs:\nhost: \"my_host\"\nport: 5433\nuser: \"my_user\"\npassword: \"my_password\"\ndatabase: \"my_database\"\nquery: \"SELECT * FROM my_table\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/vertica/#geniusrise_databases.vertica.Vertica.fetch","title":"fetch(host, port, user, password, database, query)","text":"

    \ud83d\udcd6 Fetch data from a Vertica database and save it in batch.

    Parameters:

    Name Type Description Default host str

    The Vertica host.

    required port int

    The Vertica port.

    required user str

    The Vertica user.

    required password str

    The Vertica password.

    required database str

    The Vertica database name.

    required query str

    The SQL query to execute.

    required

    Raises:

    Type Description Exception

    If unable to connect to the Vertica server or execute the query.

    "},{"location":"databases/voltdb/","title":"VoltDB","text":"

    Bases: Spout

    "},{"location":"databases/voltdb/#geniusrise_databases.voltdb.VoltDB.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the VoltDB class.

    Parameters:

    Name Type Description Default output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"databases/voltdb/#geniusrise_databases.voltdb.VoltDB.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius VoltDB rise \\\nbatch \\\n--output_s3_bucket my_bucket \\\n--output_s3_folder s3/folder \\\nnone \\\nfetch \\\n--args host=localhost port=21212 username=myuser password=<PASSWORD>\n
    "},{"location":"databases/voltdb/#geniusrise_databases.voltdb.VoltDB.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_voltdb_spout:\nname: \"VoltDB\"\nmethod: \"fetch\"\nargs:\nhost: \"localhost\"\nport: 21212\nusername: \"myuser\"\npassword: \"<PASSWORD>\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/folder\"\n
    "},{"location":"databases/voltdb/#geniusrise_databases.voltdb.VoltDB.fetch","title":"fetch(host, port, username, password)","text":"

    \ud83d\udcd6 Fetch data from a VoltDB database and save it in batch.

    Parameters:

    Name Type Description Default host str

    The VoltDB host.

    required port int

    The VoltDB port.

    required username str

    The VoltDB username.

    required password str

    The VoltDB password.

    required

    Raises:

    Type Description Exception

    If unable to connect to the VoltDB server or execute the query.

    "},{"location":"guides/404/","title":"WIP","text":"

    Working on it! \ud83d\ude05

    "},{"location":"guides/architecture/","title":"Architecture","text":""},{"location":"guides/architecture/#introduction","title":"Introduction","text":"

    The Geniusrise framework is designed to provide a modular, scalable, and interoperable system for orchestrating machine learning workflows, particularly in the context of Large Language Models (LLMs). The architecture is built around the core concept of a Task, which represents a discrete unit of work. This document provides an overview of the architecture, detailing the primary components and their interactions.

    "},{"location":"guides/architecture/#system-overview","title":"System Overview","text":"

    The Geniusrise framework is composed of several key components:

    1. Tasks: The fundamental units of work.
    2. State Managers: Responsible for monitoring and managing the state of tasks.
    3. Data Managers: Oversee the input and output data associated with tasks.
    4. Model Managers: Handle model operations, ensuring efficient management.
    5. Runners: Wrappers for executing tasks on various platforms.
    6. Spouts and Bolts: Specialized tasks for data ingestion and processing.
    "},{"location":"guides/architecture/#tasks","title":"Tasks","text":"

    A task is the fundamental unit of work in the Geniusrise framework. It represents a specific operation or computation and can run for an arbitrary amount of time, performing any amount of work.

    109337f9-563a-4af4-b2e9-1404da62fcc4
    "},{"location":"guides/architecture/#state-managers","title":"State Managers","text":"

    State Managers play a pivotal role in maintaining the state of tasks. They ensure that the progress and status of tasks are tracked, especially in distributed environments. Geniusrise offers various types of State Managers:

    1. DynamoDBStateManager: Interfaces with Amazon DynamoDB.
    2. InMemoryStateManager: Maintains state within the application's memory.
    3. PostgresStateManager: Interfaces with PostgreSQL databases.
    4. RedisStateManager: Interfaces with Redis in-memory data structure store.

    State Managers store data in various locations, allowing organizations to connect dashboards to these storage systems for real-time monitoring and analytics. This centralized storage and reporting mechanism ensures that stakeholders have a unified view of task states.

    13ecf79d-3d42-49b9-bc1c-5a8653c5faae
    "},{"location":"guides/architecture/#data-managers","title":"Data Managers","text":"

    Data Managers are responsible for handling the input and output data for tasks. They implement various data operations methods that tasks can leverage to ingest or save data during their runs. Data Managers can be categorized based on their function and data processing type:

    1. BatchInputConfig: Manages batch input data.
    2. BatchOutputConfig: Manages batch output data.
    3. StreamingInputConfig: Manages streaming input data.
    4. StreamingOutputConfig: Manages streaming output data.

    Data Managers manage data partitioning for both batch and streaming data. By adhering to common data patterns, they enable the system's components to operate independently, fostering the creation of intricate networks of tasks. This independence, while allowing for flexibility and scalability, ensures that cascading failures in one component don't necessarily compromise the entire system.

    307acb9e-f87a-4e76-a1d2-783ad6dcca81
    "},{"location":"guides/architecture/#model-managers","title":"Model Managers","text":"

    Model Managers oversee model operations, ensuring that models are saved, loaded, and managed. They can be of two primary types:

    1. S3ModelManager: Interfaces with Amazon S3 for model storage.
    2. WANDBModelManager: Interfaces with Weights & Biases for model versioning.
    3. GitModelManager: Interfaces with Git repositories for versioning of models.
    ed40fcb8-ebac-4982-96c4-dc0ffbb0413e
    "},{"location":"guides/architecture/#spouts-and-bolts","title":"Spouts and Bolts","text":"

    At the heart of the Geniusrise framework are two primary component types: spouts and bolts.

    1. Spouts: These are tasks responsible for ingesting data from various sources. Depending on the output type, spouts can either produce streaming output or batch output.

      1. Batch: Runs periodically, Produces data as a batch output.
      2. Stream: Runs forever, produces data into a streaming output.
    2. Bolts: Bolts are tasks that take in data, process it, and produce output. They can be categorized based on their input and output types:

      1. Stream-Stream: Reads streaming data and produces streaming output.
      2. Stream-Batch: Reads streaming data and produces batch output.
      3. Batch-Stream: Reads batch data and produces streaming output.
      4. Batch-Batch: Reads batch data and produces batch output.
    0534137c-71a6-416b-b86d-1424007ce1f4
    "},{"location":"guides/architecture/#runners","title":"Runners","text":"

    Runners are the backbone of the Geniusrise framework, ensuring that tasks are executed seamlessly across various platforms. They encapsulate the environment and resources required for task execution, abstracting away the underlying complexities. Geniusrise offers the following runners:

    1. Local Runner: Executes tasks directly on a local machine, ideal for development and testing.
    2. Docker Runner: Runs tasks within Docker containers, ensuring a consistent and isolated environment.
    3. Kubernetes Runner: Deploys tasks on Kubernetes clusters, leveraging its scalability and orchestration capabilities.
    4. Airflow Runner: Integrates with Apache Airflow, allowing for complex workflow orchestration and scheduling.
    5. ECS Runner: Executes tasks on AWS ECS, providing a managed container service.
    6. Batch Runner: Optimized for batch computing workloads on platforms like AWS Batch.
    "},{"location":"guides/cli/","title":"CLI","text":"
    • DESCRIPTION
    • Spouts
      • Command: genius TestSpoutCtlSpout
      • Command: genius TestSpoutCtlSpout rise
      • Command: genius TestSpoutCtlSpout deploy
      • Command: genius TestSpoutCtlSpout help
    • Bolts
      • Command: genius TestBoltCtlBolt
      • Command: genius TestBoltCtlBolt rise
      • Command: genius TestBoltCtlBolt deploy
      • Command: genius TestBoltCtlBolt help
    • Deployment
      • Command: genius rise
      • Command: genius rise up
    • Kubernetes Pods
      • Command: genius pod
      • Command: genius pod status
      • Command: genius pod show
      • Command: genius pod describe
      • Command: genius pod logs
    • Kubernetes Deployments
      • Command: genius deployment
      • Command: genius deployment create
      • Command: genius deployment scale
      • Command: genius deployment describe
      • Command: genius deployment show
      • Command: genius deployment delete
      • Command: genius deployment status
    • Kubernetes Services
      • Command: genius service
      • Command: genius service create
      • Command: genius service delete
      • Command: genius service describe
      • Command: genius service show
    • Kubernetes Jobs
      • Command: genius job
      • Command: genius job create
      • Command: genius job delete
      • Command: genius job status
      • Kubernetes Cron Jobs
      • Command: genius cron_job
      • Command: genius cron_job create_cronjob
      • Command: genius cron_job delete_cronjob
      • Command: genius cron_job get_cronjob_status
    • Packaging
      • Command: genius docker package
    • Miscellaneous
      • Command: genius plugins
      • Command: genius list
    "},{"location":"guides/cli/#description","title":"DESCRIPTION","text":"

    Geniusrise

    POSITIONAL ARGUMENTS

    genius TestSpoutCtlSpout

    : Manage spout TestSpoutCtlSpout.

    genius TestBoltCtlBolt

    : Manage bolt TestBoltCtlBolt.

    genius rise

    : Manage spouts and bolts with a YAML file.

    genius docker

    : Package this application into a Docker image.

    genius pod

    : Manage spouts and bolts as kubernetes pod

    genius deployment

    : Manage spouts and bolts as kubernetes deployment

    genius service

    : Manage spouts and bolts as kubernetes service

    genius job

    : Manage spouts and bolts as kubernetes job

    genius cron_job

    : Manage spouts and bolts as kubernetes cron_job

    genius plugins

    : Print help for all spouts and bolts.

    genius list

    : List all discovered spouts and bolts.

    "},{"location":"guides/cli/#spouts","title":"Spouts","text":""},{"location":"guides/cli/#command-genius-testspoutctlspout","title":"Command: genius TestSpoutCtlSpout","text":"

    Usage: genius TestSpoutCtlSpout [-h] {rise,deploy,help} ...

    POSITIONAL ARGUMENTS genius TestSpoutCtlSpout

    genius TestSpoutCtlSpout rise

    : Run a spout locally.

    genius TestSpoutCtlSpout deploy

    : Run a spout remotely.

    genius TestSpoutCtlSpout help

    : Print help for the spout.

    "},{"location":"guides/cli/#command-genius-testspoutctlspout-rise","title":"Command: genius TestSpoutCtlSpout rise","text":"

    Usage: genius TestSpoutCtlSpout rise [-h] [--buffer_size BUFFER_SIZE] [--output_folder OUTPUT_FOLDER] [--output_kafka_topic OUTPUT_KAFKA_TOPIC] [--output_kafka_cluster_connection_string OUTPUT_KAFKA_CLUSTER_CONNECTION_STRING] [--output_s3_bucket OUTPUT_S3_BUCKET] [--output_s3_folder OUTPUT_S3_FOLDER] [--redis_host REDIS_HOST] [--redis_port REDIS_PORT] [--redis_db REDIS_DB] [--postgres_host POSTGRES_HOST] [--postgres_port POSTGRES_PORT] [--postgres_user POSTGRES_USER] [--postgres_password POSTGRES_PASSWORD] [--postgres_database POSTGRES_DATABASE] [--postgres_table POSTGRES_TABLE] [--dynamodb_table_name DYNAMODB_TABLE_NAME] [--dynamodb_region_name DYNAMODB_REGION_NAME] [--prometheus_gateway PROMETHEUS_GATEWAY] [--args ...] {batch,streaming,stream_to_batch} {none,redis,postgres,dynamodb,prometheus} method_name

    {batch,streaming,stream_to_batch}

    : Choose the type of output data: batch or streaming.

    {none,redis,postgres,dynamodb,prometheus}

    : Select the type of state manager: none, redis, postgres, or dynamodb.

    method_name

    : The name of the method to execute on the spout.

    Options genius TestSpoutCtlSpout rise

    --buffer_size BUFFER_SIZE: Specify the size of the buffer. --output_folder OUTPUT_FOLDER: Specify the directory where output files should be stored temporarily

    --output_kafka_topic OUTPUT_KAFKA_TOPIC: Kafka output topic for streaming spouts. --output_kafka_cluster_connection_string OUTPUT_KAFKA_CLUSTER_CONNECTION_STRING: Kafka connection string for streaming spouts. --output_s3_bucket OUTPUT_S3_BUCKET: Provide the name of the S3 bucket for output storage. --output_s3_folder OUTPUT_S3_FOLDER: Indicate the S3 folder for output storage. --redis_host REDIS_HOST: Enter the host address for the Redis server. --redis_port REDIS_PORT: Enter the port number for the Redis server. --redis_db REDIS_DB: Specify the Redis database to be used. --postgres_host POSTGRES_HOST: Enter the host address for the PostgreSQL server. --postgres_port POSTGRES_PORT: Enter the port number for the PostgreSQL server. --postgres_user POSTGRES_USER: Provide the username for the PostgreSQL server. --postgres_password POSTGRES_PASSWORD: Provide the password for the PostgreSQL server. --postgres_database POSTGRES_DATABASE: Specify the PostgreSQL database to be used. --postgres_table POSTGRES_TABLE: Specify the PostgreSQL table to be used. --dynamodb_table_name DYNAMODB_TABLE_NAME: Provide the name of the DynamoDB table. --dynamodb_region_name DYNAMODB_REGION_NAME: Specify the AWS region for DynamoDB. --prometheus_gateway PROMETHEUS_GATEWAY: Specify the prometheus gateway URL. --args ...: Additional keyword arguments to pass to the spout.

    "},{"location":"guides/cli/#command-genius-testspoutctlspout-deploy","title":"Command: genius TestSpoutCtlSpout deploy","text":"

    Usage: genius TestSpoutCtlSpout deploy [-h] [--buffer_size BUFFER_SIZE] [--output_folder OUTPUT_FOLDER] [--output_kafka_topic OUTPUT_KAFKA_TOPIC] [--output_kafka_cluster_connection_string OUTPUT_KAFKA_CLUSTER_CONNECTION_STRING] [--output_s3_bucket OUTPUT_S3_BUCKET] [--output_s3_folder OUTPUT_S3_FOLDER] [--redis_host REDIS_HOST] [--redis_port REDIS_PORT] [--redis_db REDIS_DB] [--postgres_host POSTGRES_HOST] [--postgres_port POSTGRES_PORT] [--postgres_user POSTGRES_USER] [--postgres_password POSTGRES_PASSWORD] [--postgres_database POSTGRES_DATABASE] [--postgres_table POSTGRES_TABLE] [--dynamodb_table_name DYNAMODB_TABLE_NAME] [--dynamodb_region_name DYNAMODB_REGION_NAME] [--prometheus_gateway PROMETHEUS_GATEWAY] [--k8s_kind {deployment,service,job,cron_job}] [--k8s_name K8S_NAME] [--k8s_image K8S_IMAGE] [--k8s_replicas K8S_REPLICAS] [--k8s_env_vars K8S_ENV_VARS] [--k8s_cpu K8S_CPU] [--k8s_memory K8S_MEMORY] [--k8s_storage K8S_STORAGE] [--k8s_gpu K8S_GPU] [--k8s_kube_config_path K8S_KUBE_CONFIG_PATH] [--k8s_api_key K8S_API_KEY] [--k8s_api_host K8S_API_HOST] [--k8s_verify_ssl K8S_VERIFY_SSL] [--k8s_ssl_ca_cert K8S_SSL_CA_CERT] [--k8s_cluster_name K8S_CLUSTER_NAME] [--k8s_context_name K8S_CONTEXT_NAME] [--k8s_namespace K8S_NAMESPACE] [--k8s_labels K8S_LABELS] [--k8s_annotations K8S_ANNOTATIONS] [--k8s_port K8S_PORT] [--k8s_target_port K8S_TARGET_PORT] [--k8s_schedule K8S_SCHEDULE] [--args ...] {batch,streaming,stream_to_batch} {none,redis,postgres,dynamodb,prometheus} {k8s} method_name

    {batch,streaming,stream_to_batch}

    : Choose the type of output data: batch or streaming.

    {none,redis,postgres,dynamodb,prometheus}

    : Select the type of state manager: none, redis, postgres, or dynamodb.

    {k8s}

    : Choose the type of deployment.

    method_name

    : The name of the method to execute on the spout.

    Options genius TestSpoutCtlSpout deploy

    --buffer_size BUFFER_SIZE: Specify the size of the buffer. --output_folder OUTPUT_FOLDER: Specify the directory where output files should be stored temporarily

    --output_kafka_topic OUTPUT_KAFKA_TOPIC: Kafka output topic for streaming spouts. --output_kafka_cluster_connection_string OUTPUT_KAFKA_CLUSTER_CONNECTION_STRING: Kafka connection string for streaming spouts. --output_s3_bucket OUTPUT_S3_BUCKET: Provide the name of the S3 bucket for output storage. --output_s3_folder OUTPUT_S3_FOLDER: Indicate the S3 folder for output storage. --redis_host REDIS_HOST: Enter the host address for the Redis server. --redis_port REDIS_PORT: Enter the port number for the Redis server. --redis_db REDIS_DB: Specify the Redis database to be used. --postgres_host POSTGRES_HOST: Enter the host address for the PostgreSQL server. --postgres_port POSTGRES_PORT: Enter the port number for the PostgreSQL server. --postgres_user POSTGRES_USER: Provide the username for the PostgreSQL server. --postgres_password POSTGRES_PASSWORD: Provide the password for the PostgreSQL server. --postgres_database POSTGRES_DATABASE: Specify the PostgreSQL database to be used. --postgres_table POSTGRES_TABLE: Specify the PostgreSQL table to be used. --dynamodb_table_name DYNAMODB_TABLE_NAME: Provide the name of the DynamoDB table. --dynamodb_region_name DYNAMODB_REGION_NAME: Specify the AWS region for DynamoDB. --prometheus_gateway PROMETHEUS_GATEWAY: Specify the prometheus gateway URL. --k8s_kind {deployment,service,job,cron_job}: Choose the type of kubernetes resource. --k8s_name K8S_NAME: Name of the Kubernetes resource. --k8s_image K8S_IMAGE: Docker image for the Kubernetes resource. --k8s_replicas K8S_REPLICAS: Number of replicas. --k8s_env_vars K8S_ENV_VARS: Environment variables as a JSON string. --k8s_cpu K8S_CPU: CPU requirements. --k8s_memory K8S_MEMORY: Memory requirements. --k8s_storage K8S_STORAGE: Storage requirements. --k8s_gpu K8S_GPU: GPU requirements. --k8s_kube_config_path K8S_KUBE_CONFIG_PATH: Name of the Kubernetes cluster local config. --k8s_api_key K8S_API_KEY: GPU requirements. --k8s_api_host K8S_API_HOST: GPU requirements. --k8s_verify_ssl K8S_VERIFY_SSL: GPU requirements. --k8s_ssl_ca_cert K8S_SSL_CA_CERT: GPU requirements. --k8s_cluster_name K8S_CLUSTER_NAME: Name of the Kubernetes cluster. --k8s_context_name K8S_CONTEXT_NAME: Name of the kubeconfig context. --k8s_namespace K8S_NAMESPACE: Kubernetes namespace. --k8s_labels K8S_LABELS: Labels for Kubernetes resources, as a JSON string. --k8s_annotations K8S_ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --k8s_port K8S_PORT: Port to run the spout on as a service. --k8s_target_port K8S_TARGET_PORT: Port to expose the spout on as a service. --k8s_schedule K8S_SCHEDULE: Schedule to run the spout on as a cron job. --args ...: Additional keyword arguments to pass to the spout.

    "},{"location":"guides/cli/#command-genius-testspoutctlspout-help","title":"Command: genius TestSpoutCtlSpout help","text":"

    Usage: genius TestSpoutCtlSpout help [-h] method

    method

    : The method to execute.

    "},{"location":"guides/cli/#bolts","title":"Bolts","text":""},{"location":"guides/cli/#command-genius-testboltctlbolt","title":"Command: genius TestBoltCtlBolt","text":"

    Usage: genius TestBoltCtlBolt [-h] {rise,deploy,help} ...

    POSITIONAL ARGUMENTS genius TestBoltCtlBolt

    genius TestBoltCtlBolt rise

    : Run a bolt locally.

    genius TestBoltCtlBolt deploy

    : Run a spout remotely.

    genius TestBoltCtlBolt help

    : Print help for the bolt.

    "},{"location":"guides/cli/#command-genius-testboltctlbolt-rise","title":"Command: genius TestBoltCtlBolt rise","text":"

    Usage: genius TestBoltCtlBolt rise [-h] [--buffer_size BUFFER_SIZE] [--input_folder INPUT_FOLDER] [--input_kafka_topic INPUT_KAFKA_TOPIC] [--input_kafka_cluster_connection_string INPUT_KAFKA_CLUSTER_CONNECTION_STRING] [--input_kafka_consumer_group_id INPUT_KAFKA_CONSUMER_GROUP_ID] [--input_s3_bucket INPUT_S3_BUCKET] [--input_s3_folder INPUT_S3_FOLDER] [--output_folder OUTPUT_FOLDER] [--output_kafka_topic OUTPUT_KAFKA_TOPIC] [--output_kafka_cluster_connection_string OUTPUT_KAFKA_CLUSTER_CONNECTION_STRING] [--output_s3_bucket OUTPUT_S3_BUCKET] [--output_s3_folder OUTPUT_S3_FOLDER] [--redis_host REDIS_HOST] [--redis_port REDIS_PORT] [--redis_db REDIS_DB] [--postgres_host POSTGRES_HOST] [--postgres_port POSTGRES_PORT] [--postgres_user POSTGRES_USER] [--postgres_password POSTGRES_PASSWORD] [--postgres_database POSTGRES_DATABASE] [--postgres_table POSTGRES_TABLE] [--dynamodb_table_name DYNAMODB_TABLE_NAME] [--dynamodb_region_name DYNAMODB_REGION_NAME] [--prometheus_gateway PROMETHEUS_GATEWAY] [--args ...] {batch,streaming,batch_to_stream,stream_to_batch} {batch,streaming,stream_to_batch} {none,redis,postgres,dynamodb,prometheus} method_name

    {batch,streaming,batch_to_stream,stream_to_batch}

    : Choose the type of input data: batch or streaming.

    {batch,streaming,stream_to_batch}

    : Choose the type of output data: batch or streaming.

    {none,redis,postgres,dynamodb,prometheus}

    : Select the type of state manager: none, redis, postgres, or dynamodb.

    method_name

    : The name of the method to execute on the bolt.

    Options genius TestBoltCtlBolt rise

    --buffer_size BUFFER_SIZE: Specify the size of the buffer. --input_folder INPUT_FOLDER: Specify the directory where output files should be stored temporarily

    --input_kafka_topic INPUT_KAFKA_TOPIC: Kafka output topic for streaming spouts. --input_kafka_cluster_connection_string INPUT_KAFKA_CLUSTER_CONNECTION_STRING: Kafka connection string for streaming spouts. --input_kafka_consumer_group_id INPUT_KAFKA_CONSUMER_GROUP_ID: Kafka consumer group id to use. --input_s3_bucket INPUT_S3_BUCKET: Provide the name of the S3 bucket for output storage. --input_s3_folder INPUT_S3_FOLDER: Indicate the S3 folder for output storage. --output_folder OUTPUT_FOLDER: Specify the directory where output files should be stored temporarily

    --output_kafka_topic OUTPUT_KAFKA_TOPIC: Kafka output topic for streaming spouts. --output_kafka_cluster_connection_string OUTPUT_KAFKA_CLUSTER_CONNECTION_STRING: Kafka connection string for streaming spouts. --output_s3_bucket OUTPUT_S3_BUCKET: Provide the name of the S3 bucket for output storage. --output_s3_folder OUTPUT_S3_FOLDER: Indicate the S3 folder for output storage. --redis_host REDIS_HOST: Enter the host address for the Redis server. --redis_port REDIS_PORT: Enter the port number for the Redis server. --redis_db REDIS_DB: Specify the Redis database to be used. --postgres_host POSTGRES_HOST: Enter the host address for the PostgreSQL server. --postgres_port POSTGRES_PORT: Enter the port number for the PostgreSQL server. --postgres_user POSTGRES_USER: Provide the username for the PostgreSQL server. --postgres_password POSTGRES_PASSWORD: Provide the password for the PostgreSQL server. --postgres_database POSTGRES_DATABASE: Specify the PostgreSQL database to be used. --postgres_table POSTGRES_TABLE: Specify the PostgreSQL table to be used. --dynamodb_table_name DYNAMODB_TABLE_NAME: Provide the name of the DynamoDB table. --dynamodb_region_name DYNAMODB_REGION_NAME: Specify the AWS region for DynamoDB. --prometheus_gateway PROMETHEUS_GATEWAY: Specify the prometheus gateway URL. --args ...: Additional keyword arguments to pass to the bolt.

    "},{"location":"guides/cli/#command-genius-testboltctlbolt-deploy","title":"Command: genius TestBoltCtlBolt deploy","text":"

    Usage: genius TestBoltCtlBolt deploy [-h] [--buffer_size BUFFER_SIZE] [--input_folder INPUT_FOLDER] [--input_kafka_topic INPUT_KAFKA_TOPIC] [--input_kafka_cluster_connection_string INPUT_KAFKA_CLUSTER_CONNECTION_STRING] [--input_kafka_consumer_group_id INPUT_KAFKA_CONSUMER_GROUP_ID] [--input_s3_bucket INPUT_S3_BUCKET] [--input_s3_folder INPUT_S3_FOLDER] [--output_folder OUTPUT_FOLDER] [--output_kafka_topic OUTPUT_KAFKA_TOPIC] [--output_kafka_cluster_connection_string OUTPUT_KAFKA_CLUSTER_CONNECTION_STRING] [--output_s3_bucket OUTPUT_S3_BUCKET] [--output_s3_folder OUTPUT_S3_FOLDER] [--redis_host REDIS_HOST] [--redis_port REDIS_PORT] [--redis_db REDIS_DB] [--postgres_host POSTGRES_HOST] [--postgres_port POSTGRES_PORT] [--postgres_user POSTGRES_USER] [--postgres_password POSTGRES_PASSWORD] [--postgres_database POSTGRES_DATABASE] [--postgres_table POSTGRES_TABLE] [--dynamodb_table_name DYNAMODB_TABLE_NAME] [--dynamodb_region_name DYNAMODB_REGION_NAME] [--prometheus_gateway PROMETHEUS_GATEWAY] [--k8s_kind {deployment,service,job,cron_job}] [--k8s_name K8S_NAME] [--k8s_image K8S_IMAGE] [--k8s_replicas K8S_REPLICAS] [--k8s_env_vars K8S_ENV_VARS] [--k8s_cpu K8S_CPU] [--k8s_memory K8S_MEMORY] [--k8s_storage K8S_STORAGE] [--k8s_gpu K8S_GPU] [--k8s_kube_config_path K8S_KUBE_CONFIG_PATH] [--k8s_api_key K8S_API_KEY] [--k8s_api_host K8S_API_HOST] [--k8s_verify_ssl K8S_VERIFY_SSL] [--k8s_ssl_ca_cert K8S_SSL_CA_CERT] [--k8s_cluster_name K8S_CLUSTER_NAME] [--k8s_context_name K8S_CONTEXT_NAME] [--k8s_namespace K8S_NAMESPACE] [--k8s_labels K8S_LABELS] [--k8s_annotations K8S_ANNOTATIONS] [--k8s_port K8S_PORT] [--k8s_target_port K8S_TARGET_PORT] [--k8s_schedule K8S_SCHEDULE] [--args ...] {batch,streaming,batch_to_stream,stream_to_batch} {batch,streaming,stream_to_batch} {none,redis,postgres,dynamodb,prometheus} {k8s} method_name

    {batch,streaming,batch_to_stream,stream_to_batch}

    : Choose the type of input data: batch or streaming.

    {batch,streaming,stream_to_batch}

    : Choose the type of output data: batch or streaming.

    {none,redis,postgres,dynamodb,prometheus}

    : Select the type of state manager: none, redis, postgres, or dynamodb.

    {k8s}

    : Choose the type of deployment.

    method_name

    : The name of the method to execute on the spout.

    Options genius TestBoltCtlBolt deploy

    --buffer_size BUFFER_SIZE: Specify the size of the buffer. --input_folder INPUT_FOLDER: Specify the directory where output files should be stored temporarily

    --input_kafka_topic INPUT_KAFKA_TOPIC: Kafka output topic for streaming spouts. --input_kafka_cluster_connection_string INPUT_KAFKA_CLUSTER_CONNECTION_STRING: Kafka connection string for streaming spouts. --input_kafka_consumer_group_id INPUT_KAFKA_CONSUMER_GROUP_ID: Kafka consumer group id to use. --input_s3_bucket INPUT_S3_BUCKET: Provide the name of the S3 bucket for output storage. --input_s3_folder INPUT_S3_FOLDER: Indicate the S3 folder for output storage. --output_folder OUTPUT_FOLDER: Specify the directory where output files should be stored temporarily

    --output_kafka_topic OUTPUT_KAFKA_TOPIC: Kafka output topic for streaming spouts. --output_kafka_cluster_connection_string OUTPUT_KAFKA_CLUSTER_CONNECTION_STRING: Kafka connection string for streaming spouts. --output_s3_bucket OUTPUT_S3_BUCKET: Provide the name of the S3 bucket for output storage. --output_s3_folder OUTPUT_S3_FOLDER: Indicate the S3 folder for output storage. --redis_host REDIS_HOST: Enter the host address for the Redis server. --redis_port REDIS_PORT: Enter the port number for the Redis server. --redis_db REDIS_DB: Specify the Redis database to be used. --postgres_host POSTGRES_HOST: Enter the host address for the PostgreSQL server. --postgres_port POSTGRES_PORT: Enter the port number for the PostgreSQL server. --postgres_user POSTGRES_USER: Provide the username for the PostgreSQL server. --postgres_password POSTGRES_PASSWORD: Provide the password for the PostgreSQL server. --postgres_database POSTGRES_DATABASE: Specify the PostgreSQL database to be used. --postgres_table POSTGRES_TABLE: Specify the PostgreSQL table to be used. --dynamodb_table_name DYNAMODB_TABLE_NAME: Provide the name of the DynamoDB table. --dynamodb_region_name DYNAMODB_REGION_NAME: Specify the AWS region for DynamoDB. --prometheus_gateway PROMETHEUS_GATEWAY: Specify the prometheus gateway URL. --k8s_kind {deployment,service,job,cron_job}: Choose the type of kubernetes resource. --k8s_name K8S_NAME: Name of the Kubernetes resource. --k8s_image K8S_IMAGE: Docker image for the Kubernetes resource. --k8s_replicas K8S_REPLICAS: Number of replicas. --k8s_env_vars K8S_ENV_VARS: Environment variables as a JSON string. --k8s_cpu K8S_CPU: CPU requirements. --k8s_memory K8S_MEMORY: Memory requirements. --k8s_storage K8S_STORAGE: Storage requirements. --k8s_gpu K8S_GPU: GPU requirements. --k8s_kube_config_path K8S_KUBE_CONFIG_PATH: Name of the Kubernetes cluster local config. --k8s_api_key K8S_API_KEY: GPU requirements. --k8s_api_host K8S_API_HOST: GPU requirements. --k8s_verify_ssl K8S_VERIFY_SSL: GPU requirements. --k8s_ssl_ca_cert K8S_SSL_CA_CERT: GPU requirements. --k8s_cluster_name K8S_CLUSTER_NAME: Name of the Kubernetes cluster. --k8s_context_name K8S_CONTEXT_NAME: Name of the kubeconfig context. --k8s_namespace K8S_NAMESPACE: Kubernetes namespace. --k8s_labels K8S_LABELS: Labels for Kubernetes resources, as a JSON string. --k8s_annotations K8S_ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --k8s_port K8S_PORT: Port to run the spout on as a service. --k8s_target_port K8S_TARGET_PORT: Port to expose the spout on as a service. --k8s_schedule K8S_SCHEDULE: Schedule to run the spout on as a cron job. --args ...: Additional keyword arguments to pass to the spout.

    "},{"location":"guides/cli/#command-genius-testboltctlbolt-help","title":"Command: genius TestBoltCtlBolt help","text":"

    Usage: genius TestBoltCtlBolt help [-h] method

    method

    : The method to execute.

    "},{"location":"guides/cli/#deployment","title":"Deployment","text":""},{"location":"guides/cli/#command-genius-rise","title":"Command: genius rise","text":"

    Usage: genius rise [-h] [--spout SPOUT] [--bolt BOLT] [--file FILE] {up} ...

    POSITIONAL ARGUMENTS genius rise

    genius rise up

    : Deploy according to the genius.yml file.

    "},{"location":"guides/cli/#command-genius-rise-up","title":"Command: genius rise up","text":"

    Usage: genius rise up [-h] [--spout SPOUT] [--bolt BOLT] [--file FILE]

    Options genius rise up

    --spout SPOUT: Name of the specific spout to run. --bolt BOLT: Name of the specific bolt to run. --file FILE: Path of the genius.yml file, default to .

    Options genius rise

    --spout SPOUT: Name of the specific spout to run. --bolt BOLT: Name of the specific bolt to run. --file FILE: Path of the genius.yml file, default to .

    "},{"location":"guides/cli/#kubernetes-pods","title":"Kubernetes Pods","text":""},{"location":"guides/cli/#command-genius-pod","title":"Command: genius pod","text":"

    usage: genius pod [-h] {status,show,describe,logs} ...

    POSITIONAL ARGUMENTS genius pod

    genius pod status

    : Get the status of the Kubernetes pod.

    genius pod show

    : List all pods.

    genius pod describe

    : Describe a pod.

    genius pod logs

    : Get the logs of a pod.

    "},{"location":"guides/cli/#command-genius-pod-status","title":"Command: genius pod status","text":"

    usage: genius pod status [-h] [--kube_config_path KUBE_CONFIG_PATH] [--cluster_name CLUSTER_NAME] [--context_name CONTEXT_NAME] [--namespace NAMESPACE] [--labels LABELS] [--annotations ANNOTATIONS] [--api_key API_KEY] [--api_host API_HOST] [--verify_ssl VERIFY_SSL] [--ssl_ca_cert SSL_CA_CERT] name

    name

    : Name of the Kubernetes pod.

    Options genius pod status

    --kube_config_path KUBE_CONFIG_PATH: Path to the kubeconfig file. --cluster_name CLUSTER_NAME: Name of the Kubernetes cluster. --context_name CONTEXT_NAME: Name of the kubeconfig context. --namespace NAMESPACE: Kubernetes namespace. --labels LABELS: Labels for Kubernetes resources, as a JSON string. --annotations ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --api_key API_KEY: API key for Kubernetes cluster. --api_host API_HOST: API host for Kubernetes cluster. --verify_ssl VERIFY_SSL: Whether to verify SSL certificates. --ssl_ca_cert SSL_CA_CERT: Path to the SSL CA certificate.

    "},{"location":"guides/cli/#command-genius-pod-show","title":"Command: genius pod show","text":"

    usage: genius pod show [-h] [--kube_config_path KUBE_CONFIG_PATH] [--cluster_name CLUSTER_NAME] [--context_name CONTEXT_NAME] [--namespace NAMESPACE] [--labels LABELS] [--annotations ANNOTATIONS] [--api_key API_KEY] [--api_host API_HOST] [--verify_ssl VERIFY_SSL] [--ssl_ca_cert SSL_CA_CERT]

    Options genius pod show

    --kube_config_path KUBE_CONFIG_PATH: Path to the kubeconfig file. --cluster_name CLUSTER_NAME: Name of the Kubernetes cluster. --context_name CONTEXT_NAME: Name of the kubeconfig context. --namespace NAMESPACE: Kubernetes namespace. --labels LABELS: Labels for Kubernetes resources, as a JSON string. --annotations ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --api_key API_KEY: API key for Kubernetes cluster. --api_host API_HOST: API host for Kubernetes cluster. --verify_ssl VERIFY_SSL: Whether to verify SSL certificates. --ssl_ca_cert SSL_CA_CERT: Path to the SSL CA certificate.

    "},{"location":"guides/cli/#command-genius-pod-describe","title":"Command: genius pod describe","text":"

    usage: genius pod describe [-h] [--kube_config_path KUBE_CONFIG_PATH] [--cluster_name CLUSTER_NAME] [--context_name CONTEXT_NAME] [--namespace NAMESPACE] [--labels LABELS] [--annotations ANNOTATIONS] [--api_key API_KEY] [--api_host API_HOST] [--verify_ssl VERIFY_SSL] [--ssl_ca_cert SSL_CA_CERT] name

    name

    : Name of the pod.

    Options genius pod describe

    --kube_config_path KUBE_CONFIG_PATH: Path to the kubeconfig file. --cluster_name CLUSTER_NAME: Name of the Kubernetes cluster. --context_name CONTEXT_NAME: Name of the kubeconfig context. --namespace NAMESPACE: Kubernetes namespace. --labels LABELS: Labels for Kubernetes resources, as a JSON string. --annotations ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --api_key API_KEY: API key for Kubernetes cluster. --api_host API_HOST: API host for Kubernetes cluster. --verify_ssl VERIFY_SSL: Whether to verify SSL certificates. --ssl_ca_cert SSL_CA_CERT: Path to the SSL CA certificate.

    "},{"location":"guides/cli/#command-genius-pod-logs","title":"Command: genius pod logs","text":"

    usage: genius pod logs [-h] [--follow FOLLOW] [--tail TAIL] [--kube_config_path KUBE_CONFIG_PATH] [--cluster_name CLUSTER_NAME] [--context_name CONTEXT_NAME] [--namespace NAMESPACE] [--labels LABELS] [--annotations ANNOTATIONS] [--api_key API_KEY] [--api_host API_HOST] [--verify_ssl VERIFY_SSL] [--ssl_ca_cert SSL_CA_CERT] name

    name

    : Name of the pod.

    Options genius pod logs

    --follow FOLLOW: Whether to follow the logs. --tail TAIL: Number of lines to show from the end of the logs. --kube_config_path KUBE_CONFIG_PATH: Path to the kubeconfig file. --cluster_name CLUSTER_NAME: Name of the Kubernetes cluster. --context_name CONTEXT_NAME: Name of the kubeconfig context. --namespace NAMESPACE: Kubernetes namespace. --labels LABELS: Labels for Kubernetes resources, as a JSON string. --annotations ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --api_key API_KEY: API key for Kubernetes cluster. --api_host API_HOST: API host for Kubernetes cluster. --verify_ssl VERIFY_SSL: Whether to verify SSL certificates. --ssl_ca_cert SSL_CA_CERT: Path to the SSL CA certificate.

    "},{"location":"guides/cli/#kubernetes-deployments","title":"Kubernetes Deployments","text":""},{"location":"guides/cli/#command-genius-deployment","title":"Command: genius deployment","text":"

    usage: genius deployment [-h] {create,scale,describe,show,delete,status} ...

    POSITIONAL ARGUMENTS genius deployment

    genius deployment create

    : Create a new deployment.

    genius deployment scale

    : Scale a deployment.

    genius deployment describe

    : Describe a deployment.

    genius deployment show

    : List all deployments.

    genius deployment delete

    : Delete a deployment.

    genius deployment status

    : Get the status of a deployment.

    "},{"location":"guides/cli/#command-genius-deployment-create","title":"Command: genius deployment create","text":"

    usage: genius deployment create [-h] [--replicas REPLICAS] [--env_vars ENV_VARS] [--cpu CPU] [--memory MEMORY] [--storage STORAGE] [--gpu GPU] [--kube_config_path KUBE_CONFIG_PATH] [--cluster_name CLUSTER_NAME] [--context_name CONTEXT_NAME] [--namespace NAMESPACE] [--labels LABELS] [--annotations ANNOTATIONS] [--api_key API_KEY] [--api_host API_HOST] [--verify_ssl VERIFY_SSL] [--ssl_ca_cert SSL_CA_CERT] name image command

    name

    : Name of the deployment.

    image

    : Docker image for the deployment.

    command

    : Command to run in the container.

    Options genius deployment create

    --replicas REPLICAS: Number of replicas. --env_vars ENV_VARS: Environment variables as a JSON string. --cpu CPU: CPU requirements. --memory MEMORY: Memory requirements. --storage STORAGE: Storage requirements. --gpu GPU: GPU requirements. --kube_config_path KUBE_CONFIG_PATH: Path to the kubeconfig file. --cluster_name CLUSTER_NAME: Name of the Kubernetes cluster. --context_name CONTEXT_NAME: Name of the kubeconfig context. --namespace NAMESPACE: Kubernetes namespace. --labels LABELS: Labels for Kubernetes resources, as a JSON string. --annotations ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --api_key API_KEY: API key for Kubernetes cluster. --api_host API_HOST: API host for Kubernetes cluster. --verify_ssl VERIFY_SSL: Whether to verify SSL certificates. --ssl_ca_cert SSL_CA_CERT: Path to the SSL CA certificate.

    "},{"location":"guides/cli/#command-genius-deployment-scale","title":"Command: genius deployment scale","text":"

    usage: genius deployment scale [-h] [--kube_config_path KUBE_CONFIG_PATH] [--cluster_name CLUSTER_NAME] [--context_name CONTEXT_NAME] [--namespace NAMESPACE] [--labels LABELS] [--annotations ANNOTATIONS] [--api_key API_KEY] [--api_host API_HOST] [--verify_ssl VERIFY_SSL] [--ssl_ca_cert SSL_CA_CERT] name replicas

    name

    : Name of the deployment.

    replicas

    : Number of replicas.

    Options genius deployment scale

    --kube_config_path KUBE_CONFIG_PATH: Path to the kubeconfig file. --cluster_name CLUSTER_NAME: Name of the Kubernetes cluster. --context_name CONTEXT_NAME: Name of the kubeconfig context. --namespace NAMESPACE: Kubernetes namespace. --labels LABELS: Labels for Kubernetes resources, as a JSON string. --annotations ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --api_key API_KEY: API key for Kubernetes cluster. --api_host API_HOST: API host for Kubernetes cluster. --verify_ssl VERIFY_SSL: Whether to verify SSL certificates. --ssl_ca_cert SSL_CA_CERT: Path to the SSL CA certificate.

    "},{"location":"guides/cli/#command-genius-deployment-describe","title":"Command: genius deployment describe","text":"

    usage: genius deployment describe [-h] [--kube_config_path KUBE_CONFIG_PATH] [--cluster_name CLUSTER_NAME] [--context_name CONTEXT_NAME] [--namespace NAMESPACE] [--labels LABELS] [--annotations ANNOTATIONS] [--api_key API_KEY] [--api_host API_HOST] [--verify_ssl VERIFY_SSL] [--ssl_ca_cert SSL_CA_CERT] name

    name

    : Name of the deployment.

    Options genius deployment describe

    --kube_config_path KUBE_CONFIG_PATH: Path to the kubeconfig file. --cluster_name CLUSTER_NAME: Name of the Kubernetes cluster. --context_name CONTEXT_NAME: Name of the kubeconfig context. --namespace NAMESPACE: Kubernetes namespace. --labels LABELS: Labels for Kubernetes resources, as a JSON string. --annotations ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --api_key API_KEY: API key for Kubernetes cluster. --api_host API_HOST: API host for Kubernetes cluster. --verify_ssl VERIFY_SSL: Whether to verify SSL certificates. --ssl_ca_cert SSL_CA_CERT: Path to the SSL CA certificate.

    "},{"location":"guides/cli/#command-genius-deployment-show","title":"Command: genius deployment show","text":"

    usage: genius deployment show [-h] [--kube_config_path KUBE_CONFIG_PATH] [--cluster_name CLUSTER_NAME] [--context_name CONTEXT_NAME] [--namespace NAMESPACE] [--labels LABELS] [--annotations ANNOTATIONS] [--api_key API_KEY] [--api_host API_HOST] [--verify_ssl VERIFY_SSL] [--ssl_ca_cert SSL_CA_CERT]

    Options genius deployment show

    --kube_config_path KUBE_CONFIG_PATH: Path to the kubeconfig file. --cluster_name CLUSTER_NAME: Name of the Kubernetes cluster. --context_name CONTEXT_NAME: Name of the kubeconfig context. --namespace NAMESPACE: Kubernetes namespace. --labels LABELS: Labels for Kubernetes resources, as a JSON string. --annotations ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --api_key API_KEY: API key for Kubernetes cluster. --api_host API_HOST: API host for Kubernetes cluster. --verify_ssl VERIFY_SSL: Whether to verify SSL certificates. --ssl_ca_cert SSL_CA_CERT: Path to the SSL CA certificate.

    "},{"location":"guides/cli/#command-genius-deployment-delete","title":"Command: genius deployment delete","text":"

    usage: genius deployment delete [-h] [--kube_config_path KUBE_CONFIG_PATH] [--cluster_name CLUSTER_NAME] [--context_name CONTEXT_NAME] [--namespace NAMESPACE] [--labels LABELS] [--annotations ANNOTATIONS] [--api_key API_KEY] [--api_host API_HOST] [--verify_ssl VERIFY_SSL] [--ssl_ca_cert SSL_CA_CERT] name

    name

    : Name of the deployment.

    Options genius deployment delete

    --kube_config_path KUBE_CONFIG_PATH: Path to the kubeconfig file. --cluster_name CLUSTER_NAME: Name of the Kubernetes cluster. --context_name CONTEXT_NAME: Name of the kubeconfig context. --namespace NAMESPACE: Kubernetes namespace. --labels LABELS: Labels for Kubernetes resources, as a JSON string. --annotations ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --api_key API_KEY: API key for Kubernetes cluster. --api_host API_HOST: API host for Kubernetes cluster. --verify_ssl VERIFY_SSL: Whether to verify SSL certificates. --ssl_ca_cert SSL_CA_CERT: Path to the SSL CA certificate.

    "},{"location":"guides/cli/#command-genius-deployment-status","title":"Command: genius deployment status","text":"

    usage: genius deployment status [-h] [--kube_config_path KUBE_CONFIG_PATH] [--cluster_name CLUSTER_NAME] [--context_name CONTEXT_NAME] [--namespace NAMESPACE] [--labels LABELS] [--annotations ANNOTATIONS] [--api_key API_KEY] [--api_host API_HOST] [--verify_ssl VERIFY_SSL] [--ssl_ca_cert SSL_CA_CERT] name

    name

    : Name of the deployment.

    Options genius deployment status

    --kube_config_path KUBE_CONFIG_PATH: Path to the kubeconfig file. --cluster_name CLUSTER_NAME: Name of the Kubernetes cluster. --context_name CONTEXT_NAME: Name of the kubeconfig context. --namespace NAMESPACE: Kubernetes namespace. --labels LABELS: Labels for Kubernetes resources, as a JSON string. --annotations ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --api_key API_KEY: API key for Kubernetes cluster. --api_host API_HOST: API host for Kubernetes cluster. --verify_ssl VERIFY_SSL: Whether to verify SSL certificates. --ssl_ca_cert SSL_CA_CERT: Path to the SSL CA certificate.

    "},{"location":"guides/cli/#kubernetes-services","title":"Kubernetes Services","text":""},{"location":"guides/cli/#command-genius-service","title":"Command: genius service","text":"

    usage: genius service [-h] {create,delete,describe,show} ...

    POSITIONAL ARGUMENTS genius service

    genius service create

    : Create a new service.

    genius service delete

    : Delete a service.

    genius service describe

    : Describe a service.

    genius service show

    : List all services.

    "},{"location":"guides/cli/#command-genius-service-create","title":"Command: genius service create","text":"

    usage: genius service create [-h] [--replicas REPLICAS] [--port PORT] [--target_port TARGET_PORT] [--env_vars ENV_VARS] [--cpu CPU] [--memory MEMORY] [--storage STORAGE] [--gpu GPU] [--kube_config_path KUBE_CONFIG_PATH] [--cluster_name CLUSTER_NAME] [--context_name CONTEXT_NAME] [--namespace NAMESPACE] [--labels LABELS] [--annotations ANNOTATIONS] [--api_key API_KEY] [--api_host API_HOST] [--verify_ssl VERIFY_SSL] [--ssl_ca_cert SSL_CA_CERT] name image command

    name

    : Name of the service.

    image

    : Docker image for the service.

    command

    : Command to run in the container.

    Options genius service create

    --replicas REPLICAS: Number of replicas. --port PORT: Service port. --target_port TARGET_PORT: Container target port. --env_vars ENV_VARS: Environment variables as a JSON string. --cpu CPU: CPU requirements. --memory MEMORY: Memory requirements. --storage STORAGE: Storage requirements. --gpu GPU: GPU requirements. --kube_config_path KUBE_CONFIG_PATH: Path to the kubeconfig file. --cluster_name CLUSTER_NAME: Name of the Kubernetes cluster. --context_name CONTEXT_NAME: Name of the kubeconfig context. --namespace NAMESPACE: Kubernetes namespace. --labels LABELS: Labels for Kubernetes resources, as a JSON string. --annotations ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --api_key API_KEY: API key for Kubernetes cluster. --api_host API_HOST: API host for Kubernetes cluster. --verify_ssl VERIFY_SSL: Whether to verify SSL certificates. --ssl_ca_cert SSL_CA_CERT: Path to the SSL CA certificate.

    "},{"location":"guides/cli/#command-genius-service-delete","title":"Command: genius service delete","text":"

    usage: genius service delete [-h] [--kube_config_path KUBE_CONFIG_PATH] [--cluster_name CLUSTER_NAME] [--context_name CONTEXT_NAME] [--namespace NAMESPACE] [--labels LABELS] [--annotations ANNOTATIONS] [--api_key API_KEY] [--api_host API_HOST] [--verify_ssl VERIFY_SSL] [--ssl_ca_cert SSL_CA_CERT] name

    name

    : Name of the service.

    Options genius service delete

    --kube_config_path KUBE_CONFIG_PATH: Path to the kubeconfig file. --cluster_name CLUSTER_NAME: Name of the Kubernetes cluster. --context_name CONTEXT_NAME: Name of the kubeconfig context. --namespace NAMESPACE: Kubernetes namespace. --labels LABELS: Labels for Kubernetes resources, as a JSON string. --annotations ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --api_key API_KEY: API key for Kubernetes cluster. --api_host API_HOST: API host for Kubernetes cluster. --verify_ssl VERIFY_SSL: Whether to verify SSL certificates. --ssl_ca_cert SSL_CA_CERT: Path to the SSL CA certificate.

    "},{"location":"guides/cli/#command-genius-service-describe","title":"Command: genius service describe","text":"

    usage: genius service describe [-h] [--kube_config_path KUBE_CONFIG_PATH] [--cluster_name CLUSTER_NAME] [--context_name CONTEXT_NAME] [--namespace NAMESPACE] [--labels LABELS] [--annotations ANNOTATIONS] [--api_key API_KEY] [--api_host API_HOST] [--verify_ssl VERIFY_SSL] [--ssl_ca_cert SSL_CA_CERT] name

    name

    : Name of the service.

    Options genius service describe

    --kube_config_path KUBE_CONFIG_PATH: Path to the kubeconfig file. --cluster_name CLUSTER_NAME: Name of the Kubernetes cluster. --context_name CONTEXT_NAME: Name of the kubeconfig context. --namespace NAMESPACE: Kubernetes namespace. --labels LABELS: Labels for Kubernetes resources, as a JSON string. --annotations ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --api_key API_KEY: API key for Kubernetes cluster. --api_host API_HOST: API host for Kubernetes cluster. --verify_ssl VERIFY_SSL: Whether to verify SSL certificates. --ssl_ca_cert SSL_CA_CERT: Path to the SSL CA certificate.

    "},{"location":"guides/cli/#command-genius-service-show","title":"Command: genius service show","text":"

    usage: genius service show [-h] [--kube_config_path KUBE_CONFIG_PATH] [--cluster_name CLUSTER_NAME] [--context_name CONTEXT_NAME] [--namespace NAMESPACE] [--labels LABELS] [--annotations ANNOTATIONS] [--api_key API_KEY] [--api_host API_HOST] [--verify_ssl VERIFY_SSL] [--ssl_ca_cert SSL_CA_CERT]

    Options genius service show

    --kube_config_path KUBE_CONFIG_PATH: Path to the kubeconfig file. --cluster_name CLUSTER_NAME: Name of the Kubernetes cluster. --context_name CONTEXT_NAME: Name of the kubeconfig context. --namespace NAMESPACE: Kubernetes namespace. --labels LABELS: Labels for Kubernetes resources, as a JSON string. --annotations ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --api_key API_KEY: API key for Kubernetes cluster. --api_host API_HOST: API host for Kubernetes cluster. --verify_ssl VERIFY_SSL: Whether to verify SSL certificates. --ssl_ca_cert SSL_CA_CERT: Path to the SSL CA certificate.

    "},{"location":"guides/cli/#kubernetes-jobs","title":"Kubernetes Jobs","text":""},{"location":"guides/cli/#command-genius-job","title":"Command: genius job","text":"

    usage: genius job [-h] {create,delete,status} ...

    POSITIONAL ARGUMENTS genius job

    genius job create

    : Create a new job.

    genius job delete

    : Delete a job.

    genius job status

    : Get the status of a job.

    "},{"location":"guides/cli/#command-genius-job-create","title":"Command: genius job create","text":"

    usage: genius job create [-h] [--env_vars ENV_VARS] [--cpu CPU] [--memory MEMORY] [--storage STORAGE] [--gpu GPU] [--kube_config_path KUBE_CONFIG_PATH] [--cluster_name CLUSTER_NAME] [--context_name CONTEXT_NAME] [--namespace NAMESPACE] [--labels LABELS] [--annotations ANNOTATIONS] [--api_key API_KEY] [--api_host API_HOST] [--verify_ssl VERIFY_SSL] [--ssl_ca_cert SSL_CA_CERT] name image command

    name

    : Name of the job.

    image

    : Docker image for the job.

    command

    : Command to run in the container.

    Options genius job create

    --env_vars ENV_VARS: Environment variables as a JSON string. --cpu CPU: CPU requirements. --memory MEMORY: Memory requirements. --storage STORAGE: Storage requirements. --gpu GPU: GPU requirements. --kube_config_path KUBE_CONFIG_PATH: Path to the kubeconfig file. --cluster_name CLUSTER_NAME: Name of the Kubernetes cluster. --context_name CONTEXT_NAME: Name of the kubeconfig context. --namespace NAMESPACE: Kubernetes namespace. --labels LABELS: Labels for Kubernetes resources, as a JSON string. --annotations ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --api_key API_KEY: API key for Kubernetes cluster. --api_host API_HOST: API host for Kubernetes cluster. --verify_ssl VERIFY_SSL: Whether to verify SSL certificates. --ssl_ca_cert SSL_CA_CERT: Path to the SSL CA certificate.

    "},{"location":"guides/cli/#command-genius-job-delete","title":"Command: genius job delete","text":"

    usage: genius job delete [-h] [--kube_config_path KUBE_CONFIG_PATH] [--cluster_name CLUSTER_NAME] [--context_name CONTEXT_NAME] [--namespace NAMESPACE] [--labels LABELS] [--annotations ANNOTATIONS] [--api_key API_KEY] [--api_host API_HOST] [--verify_ssl VERIFY_SSL] [--ssl_ca_cert SSL_CA_CERT] name

    name

    : Name of the job.

    Options genius job delete

    --kube_config_path KUBE_CONFIG_PATH: Path to the kubeconfig file. --cluster_name CLUSTER_NAME: Name of the Kubernetes cluster. --context_name CONTEXT_NAME: Name of the kubeconfig context. --namespace NAMESPACE: Kubernetes namespace. --labels LABELS: Labels for Kubernetes resources, as a JSON string. --annotations ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --api_key API_KEY: API key for Kubernetes cluster. --api_host API_HOST: API host for Kubernetes cluster. --verify_ssl VERIFY_SSL: Whether to verify SSL certificates. --ssl_ca_cert SSL_CA_CERT: Path to the SSL CA certificate.

    "},{"location":"guides/cli/#command-genius-job-status","title":"Command: genius job status","text":"

    usage: genius job status [-h] [--kube_config_path KUBE_CONFIG_PATH] [--cluster_name CLUSTER_NAME] [--context_name CONTEXT_NAME] [--namespace NAMESPACE] [--labels LABELS] [--annotations ANNOTATIONS] [--api_key API_KEY] [--api_host API_HOST] [--verify_ssl VERIFY_SSL] [--ssl_ca_cert SSL_CA_CERT] name

    name

    : Name of the job.

    Options genius job status

    --kube_config_path KUBE_CONFIG_PATH: Path to the kubeconfig file. --cluster_name CLUSTER_NAME: Name of the Kubernetes cluster. --context_name CONTEXT_NAME: Name of the kubeconfig context. --namespace NAMESPACE: Kubernetes namespace. --labels LABELS: Labels for Kubernetes resources, as a JSON string. --annotations ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --api_key API_KEY: API key for Kubernetes cluster. --api_host API_HOST: API host for Kubernetes cluster. --verify_ssl VERIFY_SSL: Whether to verify SSL certificates. --ssl_ca_cert SSL_CA_CERT: Path to the SSL CA certificate.

    "},{"location":"guides/cli/#kubernetes-cron-jobs","title":"Kubernetes Cron Jobs","text":""},{"location":"guides/cli/#command-genius-cron_job","title":"Command: genius cron_job","text":"

    usage: genius cron_job [-h] {create_cronjob,delete_cronjob,get_cronjob_status} ...

    POSITIONAL ARGUMENTS genius cron_job

    genius cron_job create_cronjob

    : Create a new cronjob.

    genius cron_job delete_cronjob

    : Delete a cronjob.

    genius cron_job get_cronjob_status

    : Get the status of a cronjob.

    "},{"location":"guides/cli/#command-genius-cron_job-create_cronjob","title":"Command: genius cron_job create_cronjob","text":"

    usage: genius cron_job create_cronjob [-h] [--env_vars ENV_VARS] [--cpu CPU] [--memory MEMORY] [--storage STORAGE] [--gpu GPU] [--kube_config_path KUBE_CONFIG_PATH] [--cluster_name CLUSTER_NAME] [--context_name CONTEXT_NAME] [--namespace NAMESPACE] [--labels LABELS] [--annotations ANNOTATIONS] [--api_key API_KEY] [--api_host API_HOST] [--verify_ssl VERIFY_SSL] [--ssl_ca_cert SSL_CA_CERT] name image command schedule

    name

    : Name of the cronjob.

    image

    : Docker image for the cronjob.

    command

    : Command to run in the container.

    schedule

    : Cron schedule.

    Options genius cron_job create_cronjob

    --env_vars ENV_VARS: Environment variables as a JSON string. --cpu CPU: CPU requirements. --memory MEMORY: Memory requirements. --storage STORAGE: Storage requirements. --gpu GPU: GPU requirements. --kube_config_path KUBE_CONFIG_PATH: Path to the kubeconfig file. --cluster_name CLUSTER_NAME: Name of the Kubernetes cluster. --context_name CONTEXT_NAME: Name of the kubeconfig context. --namespace NAMESPACE: Kubernetes namespace. --labels LABELS: Labels for Kubernetes resources, as a JSON string. --annotations ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --api_key API_KEY: API key for Kubernetes cluster. --api_host API_HOST: API host for Kubernetes cluster. --verify_ssl VERIFY_SSL: Whether to verify SSL certificates. --ssl_ca_cert SSL_CA_CERT: Path to the SSL CA certificate.

    "},{"location":"guides/cli/#command-genius-cron_job-delete_cronjob","title":"Command: genius cron_job delete_cronjob","text":"

    usage: genius cron_job delete_cronjob [-h] [--kube_config_path KUBE_CONFIG_PATH] [--cluster_name CLUSTER_NAME] [--context_name CONTEXT_NAME] [--namespace NAMESPACE] [--labels LABELS] [--annotations ANNOTATIONS] [--api_key API_KEY] [--api_host API_HOST] [--verify_ssl VERIFY_SSL] [--ssl_ca_cert SSL_CA_CERT] name

    name

    : Name of the cronjob.

    Options genius cron_job delete_cronjob

    --kube_config_path KUBE_CONFIG_PATH: Path to the kubeconfig file. --cluster_name CLUSTER_NAME: Name of the Kubernetes cluster. --context_name CONTEXT_NAME: Name of the kubeconfig context. --namespace NAMESPACE: Kubernetes namespace. --labels LABELS: Labels for Kubernetes resources, as a JSON string. --annotations ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --api_key API_KEY: API key for Kubernetes cluster. --api_host API_HOST: API host for Kubernetes cluster. --verify_ssl VERIFY_SSL: Whether to verify SSL certificates. --ssl_ca_cert SSL_CA_CERT: Path to the SSL CA certificate.

    "},{"location":"guides/cli/#command-genius-cron_job-get_cronjob_status","title":"Command: genius cron_job get_cronjob_status","text":"

    usage: genius cron_job get_cronjob_status [-h] [--kube_config_path KUBE_CONFIG_PATH] [--cluster_name CLUSTER_NAME] [--context_name CONTEXT_NAME] [--namespace NAMESPACE] [--labels LABELS] [--annotations ANNOTATIONS] [--api_key API_KEY] [--api_host API_HOST] [--verify_ssl VERIFY_SSL] [--ssl_ca_cert SSL_CA_CERT] name

    name

    : Name of the cronjob.

    Options genius cron_job get_cronjob_status

    --kube_config_path KUBE_CONFIG_PATH: Path to the kubeconfig file. --cluster_name CLUSTER_NAME: Name of the Kubernetes cluster. --context_name CONTEXT_NAME: Name of the kubeconfig context. --namespace NAMESPACE: Kubernetes namespace. --labels LABELS: Labels for Kubernetes resources, as a JSON string. --annotations ANNOTATIONS: Annotations for Kubernetes resources, as a JSON string. --api_key API_KEY: API key for Kubernetes cluster. --api_host API_HOST: API host for Kubernetes cluster. --verify_ssl VERIFY_SSL: Whether to verify SSL certificates. --ssl_ca_cert SSL_CA_CERT: Path to the SSL CA certificate.

    "},{"location":"guides/cli/#packaging","title":"Packaging","text":""},{"location":"guides/cli/#command-genius-docker-package","title":"Command: genius docker package","text":"

    Usage: genius docker package [-h] [--auth AUTH] [--base_image BASE_IMAGE] [--workdir WORKDIR] [--local_dir LOCAL_DIR] [--packages [PACKAGES ...]] [--os_packages [OS_PACKAGES ...]] [--env_vars ENV_VARS] image_name repository

    image_name

    : Name of the Docker image.

    repository

    : Container repository to upload to.

    Options genius docker package

    --auth AUTH: Authentication credentials as a JSON string. --base_image BASE_IMAGE: The base image to use for the Docker container. --workdir WORKDIR: The working directory in the Docker container. --local_dir LOCAL_DIR: The local directory to copy into the Docker container. --packages [PACKAGES ...]: List of Python packages to install in the Docker container. --os_packages [OS_PACKAGES ...]: List of OS packages to install in the Docker container. --env_vars ENV_VARS: Environment variables to set in the Docker container.

    "},{"location":"guides/cli/#miscellaneous","title":"Miscellaneous","text":""},{"location":"guides/cli/#command-genius-plugins","title":"Command: genius plugins","text":"

    Usage: genius plugins [-h] [spout_or_bolt]

    spout_or_bolt

    : The spout or bolt to print help for.

    "},{"location":"guides/cli/#command-genius-list","title":"Command: genius list","text":"

    Usage: genius list [-h] [--verbose]

    Options genius list

    --verbose: Print verbose output.

    "},{"location":"guides/concepts/","title":"Concepts","text":"

    The Geniusrise framework is built around loosely-coupled modules acting as a cohesive adhesive between distinct, modular components, much like how one would piece together Lego blocks. This design approach not only promotes flexibility but also ensures that each module or \"Lego block\" remains sufficiently independent. Such independence is crucial for diverse teams, each with its own unique infrastructure and requirements, to seamlessly build and manage their respective components.

    Geniusrise comes with a sizable set of plugins which implement various features and integrations. The independence and modularity of the design enable sharing of these building blocks in the community.

    "},{"location":"guides/concepts/#concepts_1","title":"Concepts","text":"
    1. Task: At its core, a task represents a discrete unit of work within the Geniusrise framework. Think of it as a singular action or operation that the system needs to execute. A task further manifests itself into a Bolt or a Spout as stated below.

    2. Components of a Task: Each task is equipped with four components:

      1. State Manager: This component is responsible for continuously monitoring and managing the task's state, ensuring that it progresses smoothly from initiation to completion and to report errors and ship logs into a central location.
      2. Data Manager: As the name suggests, the Data Manager oversees the input and output data associated with a task, ensuring data integrity and efficient data flow. It also ensures data sanity follows partition semantics and isolation.
      3. Runner: These are wrappers for executing a task on various platforms. Depending on the platform, the runner ensures that the task is executed seamlessly.
    3. Task Classification: Tasks within the Geniusrise framework can be broadly classified into two categories:

      • Spout: If a task's primary function is to ingest or bring in data, it's termed as a 'spout'.
      • Bolt: For tasks that don't primarily ingest data but perform other operations, they are termed 'bolts'.

    The beauty of the Geniusrise framework lies in its adaptability. Developers can script their workflow components once and have the freedom to deploy them across various platforms. To facilitate this, Geniusrise offers:

    1. Runners for Task Execution: Geniusrise is equipped with a diverse set of runners, each tailored for different platforms, ensuring that tasks can be executed almost anywhere:
      1. On your local machine for quick testing and development.
      2. Within Docker containers for isolated, reproducible environments.
      3. On Kubernetes clusters for scalable, cloud-native deployments.
      4. Using Apache Airflow for complex workflow orchestration. (Coming Soon).
      5. On AWS ECS for containerized application management. (Coming Soon).
      6. With AWS Batch for efficient batch computing workloads. (Coming Soon).
      7. With Docker Swarm clusters as an alternative orchestrator to kubernetes. (Coming Soon).

    This document delves into the core components and concepts that make up the Geniusrise framework.

    "},{"location":"guides/concepts/#tradeoffs","title":"Tradeoffs","text":"

    Because of the very loose coupling of the components, though the framework can be used to build very complex networks with independently running nodes, it provides limited orchestration capability, like synchronous pipelines. An external orchestrator like airflow can be used in such cases to orchestrate geniusrise components.

    "},{"location":"guides/deployment/","title":"Deployment","text":""},{"location":"guides/deployment/#introduction","title":"Introduction","text":"

    This guide provides comprehensive instructions on how to deploy and manage resources in a Kubernetes cluster using the Geniusrise platform. The guide covers the following functionalities:

    • Connecting to a Kubernetes cluster
    • Managing Pods
    • Managing Deployments
    • Managing Services
    • Managing Jobs
    • Managing Cron jobs
    "},{"location":"guides/deployment/#prerequisites","title":"Prerequisites","text":"
    • A working Kubernetes cluster
    • Kubeconfig file for cluster access
    • Python 3.x installed
    • Geniusrise CLI installed
    "},{"location":"guides/deployment/#connecting-to-a-kubernetes-cluster","title":"Connecting to a Kubernetes Cluster","text":"

    Before performing any operations, you need to connect to your Kubernetes cluster. You can do this in two ways:

    1. Using a kubeconfig file and context name
    2. Using an API key and API host
    "},{"location":"guides/deployment/#using-kubeconfig-and-context-name","title":"Using Kubeconfig and Context Name","text":"
    genius k8s <command> --kube_config_path /path/to/kubeconfig.yaml --context_name my-context\n
    "},{"location":"guides/deployment/#using-api-key-and-api-host","title":"Using API Key and API Host","text":"
    genius k8s <command> --api_key my-api-key --api_host https://api.k8s.my-cluster.com --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#managing-pods","title":"Managing Pods","text":""},{"location":"guides/deployment/#checking-pod-status","title":"Checking Pod Status","text":"

    To get the status of a specific pod:

    genius k8s status my-pod-name --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#listing-all-pods","title":"Listing All Pods","text":"

    To list all the pods in the current namespace:

    genius k8s show --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#describing-a-pod","title":"Describing a Pod","text":"

    To get detailed information about a specific pod:

    genius k8s describe my-pod-name --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#fetching-pod-logs","title":"Fetching Pod Logs","text":"

    To get the logs of a specific pod:

    genius k8s logs my-pod-name --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#managing-deployments","title":"Managing Deployments","text":""},{"location":"guides/deployment/#creating-a-new-deployment","title":"Creating a New Deployment","text":"

    To create a new deployment:

    genius deployment create --name my-deployment --image my-image --command \"echo hello\" --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#scaling-a-deployment","title":"Scaling a Deployment","text":"

    To scale a deployment:

    genius deployment scale --name my-deployment --replicas 3 --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#listing-all-deployments","title":"Listing All Deployments","text":"

    To list all deployments:

    genius deployment show\n
    "},{"location":"guides/deployment/#describing-a-deployment","title":"Describing a Deployment","text":"

    To describe a specific deployment:

    genius deployment describe my-deployment --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#deleting-a-deployment","title":"Deleting a Deployment","text":"

    To delete a deployment:

    genius deployment delete my-deployment --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#checking-deployment-status","title":"Checking Deployment Status","text":"

    To check the status of a deployment:

    genius deployment status my-deployment --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#advanced-features","title":"Advanced Features","text":""},{"location":"guides/deployment/#environment-variables","title":"Environment Variables","text":"

    You can pass environment variables to your pods and deployments like so:

    genius deployment create --name my-deployment --image my-image --command \"echo hello\" --env_vars '{\"MY_VAR\": \"value\"}' --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#resource-requirements","title":"Resource Requirements","text":"

    You can specify resource requirements for your pods and deployments:

    genius deployment create --name my-deployment --image my-image --command \"echo hello\" --cpu \"100m\" --memory \"256Mi\" --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#gpu-support","title":"GPU Support","text":"

    To allocate GPUs to your pods:

    genius deployment create --name my-deployment --image my-image --command \"echo hello\" --gpu \"1\" --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#managing-services","title":"Managing Services","text":""},{"location":"guides/deployment/#creating-a-new-service","title":"Creating a New Service","text":"

    To create a new service:

    genius service create --name example-service --image example-image --command \"echo hello\" --port 8080 --target_port 8080 --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#deleting-a-service","title":"Deleting a Service","text":"

    To delete a service:

    genius service delete --name example-service --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#describing-a-service","title":"Describing a Service","text":"

    To describe a specific service:

    genius service describe --name example-service --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#listing-all-services","title":"Listing All Services","text":"

    To list all services:

    genius service show --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#managing-jobs","title":"Managing Jobs","text":""},{"location":"guides/deployment/#creating-a-new-job","title":"Creating a New Job","text":"

    To create a new job:

    genius job create --name example-job --image example-image --command \"echo hello\" --cpu \"100m\" --memory \"256Mi\" --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#deleting-a-job","title":"Deleting a Job","text":"

    To delete a job:

    genius job delete --name example-job --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#checking-job-status","title":"Checking Job Status","text":"

    To check the status of a job:

    genius job status --name example-job --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#managing-cron-jobs","title":"Managing Cron Jobs","text":""},{"location":"guides/deployment/#creating-a-new-cron-job","title":"Creating a New Cron Job","text":"

    To create a new cron job, you can use the create_cronjob sub-command. You'll need to specify the name, Docker image, command to run, and the cron schedule.

    genius cronjob create_cronjob --name example-cronjob --image example-image --command \"echo hello\" --schedule \"*/5 * * * *\" --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#additional-options","title":"Additional Options","text":"
    • --env_vars: To set environment variables, pass them as a JSON string.
    • --cpu, --memory, --storage, --gpu: To set resource requirements.
    "},{"location":"guides/deployment/#deleting-a-cron-job","title":"Deleting a Cron Job","text":"

    To delete a cron job, use the delete_cronjob sub-command and specify the name of the cron job.

    genius cronjob delete_cronjob --name example-cronjob --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#checking-cron-job-status","title":"Checking Cron Job Status","text":"

    To check the status of a cron job, use the get_cronjob_status sub-command and specify the name of the cron job.

    genius cronjob get_cronjob_status --name example-cronjob --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#advanced-features-for-cron-jobs","title":"Advanced Features for Cron Jobs","text":""},{"location":"guides/deployment/#environment-variables_1","title":"Environment Variables","text":"

    You can pass environment variables to your cron jobs like so:

    genius cronjob create_cronjob --name example-cronjob --image example-image --command \"echo hello\" --schedule \"*/5 * * * *\" --env_vars '{\"MY_VAR\": \"value\"}' --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#resource-requirements_1","title":"Resource Requirements","text":"

    You can specify resource requirements for your cron jobs:

    genius cronjob create_cronjob --name example-cronjob --image example-image --command \"echo hello\" --schedule \"*/5 * * * *\" --cpu \"100m\" --memory \"256Mi\" --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/deployment/#gpu-support_1","title":"GPU Support","text":"

    To allocate GPUs to your cron jobs:

    genius cronjob create_cronjob --name example-cronjob --image example-image --command \"echo hello\" --schedule \"*/5 * * * *\" --gpu \"1\" --namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/dev_cycle/","title":"Dev Cycle","text":"

    This document describes one full local development cycle.

    Lets say we want to build a pipeline which tags medical terms in EHR documents.

    "},{"location":"guides/dev_cycle/#strategies","title":"Strategies","text":"

    Pre-requisites:

    1. SNOMED-CT: is a knowledge graph of standard medical terminology
    2. IHTSDO: a standards body for medical terminologies in a number of countries.
    3. UMLS: unified medical language system is a set of files and software that brings together many health and biomedical vocabularies and standards together.
    "},{"location":"guides/dev_cycle/#strategy-1-named-entity-recognition","title":"Strategy 1: Named entity recognition","text":""},{"location":"guides/dev_cycle/#1-create-a-labelled-dataset","title":"1. Create a labelled dataset","text":"

    We need a corpus of documents with medical terms labeled. For example, we could use wikipedia + wikidata to build such a dataset, given entities in wikipedia are linked and indexed in the wikidata knowledge graph. Reference: Building a Massive Corpus for Named Entity Recognition using Free Open Data Sources. We could also annotate medical datasets like MIMIC-III annotated with SNOMED-CT based MedCAT which is a medical annotation tool developed on the knowledge graph of medical terminology (SNOMED-CT), as it would be more pertinent to our usecase, reference: DNER Clinical (named entity recognition) from free clinical text to Snomed-CT concept

    "},{"location":"guides/dev_cycle/#2-train-a-model-on-the-ner-dataset","title":"2. Train a model on the NER dataset","text":"

    We could choose a large language model and train the model on the NER fine-tuning task. The model would then be able to recognize and tag medical terms in any given text data.

    "},{"location":"guides/dev_cycle/#strategy-2-vector-knowledge-graph-search","title":"Strategy 2: Vector knowledge graph search","text":""},{"location":"guides/dev_cycle/#1-create-a-vectorized-knowledge-graph","title":"1. Create a vectorized knowledge graph","text":"

    We use an LLM to create a vectorized layer over SNOMED-CT. This layer can be used to semantically search for \"seed\" nodes in the graph. We can then use these seed nodes to traverse nodes a few hops adjacent to the seed nodes.

    "},{"location":"guides/dev_cycle/#2-retrieval-augmented-ner","title":"2. Retrieval Augmented NER","text":"

    We use the knowledge graph search results to not only annotate each node seen in the EHR document, but also add additional information about those nodes derived from its adjacent nodes. But first, we also need to make sure that we query the right information instead of simply vectorized chunks and throwing it at semantic search. We would need a \"traditional\" pipeline for this - lemmatization followed by POS tagging. We use both proper nouns and out of vocabulary words as search query terms.

    "},{"location":"guides/dev_cycle/#boilerplate","title":"Boilerplate","text":"

    To setup a local geniusrise project, simply use the geniusrise project creator script:

    curl -L https://geniusrise.new | bash # TODO: host this or create a template github repo\n
    "},{"location":"guides/dev_cycle/#existing-project","title":"Existing project","text":"

    If you wish to add geniusrise to an existing project:

    pip install geniusrise\npip freeze > requirements.txt\n
    "},{"location":"guides/dev_cycle/#from-scratch","title":"From scratch","text":"

    Here is how to set up from scratch:

    #!/bin/bash\n# Prompt for project details\nread -p \"Enter your project name: \" project_name\nread -p \"Enter your name: \" author_name\nread -p \"Enter your email: \" author_email\nread -p \"Enter your GitHub username: \" github_username\nread -p \"Enter a brief description of your project: \" project_description\n# Create project structure\nmkdir $project_name\ncd $project_name\nmkdir $project_name tests\n# Create basic files\ntouch README.md\ntouch requirements.txt\ntouch setup.py\ntouch Makefile\ntouch $project_name/__init__.py\ntouch tests/__init__.py\n# Populate README.md\necho \"# $project_name\" > README.md\necho \"\\n$project_description\" >> README.md\n# Populate setup.py\ncat <<EOL > setup.py\nfrom setuptools import setup, find_packages\nwith open(\"README.md\", \"r\", encoding=\"utf-8\") as fh:\nlong_description = fh.read()\nsetup(\nname='$project_name',\nversion='0.1.0',\npackages=find_packages(exclude=[\"tests\", \"tests.*\"]),\ninstall_requires=[],\npython_requires='>=3.10',\nauthor='$author_name',\nauthor_email='$author_email',\ndescription='$project_description',\nlong_description=long_description,\nlong_description_content_type='text/markdown',\nurl='https://github.com/$github_username/$project_name',\nclassifiers=[\n'Programming Language :: Python :: 3',\n'License :: OSI Approved :: MIT License',\n'Operating System :: OS Independent',\n],\n)\nEOL\n# Populate Makefile\ncat <<EOL > Makefile\nsetup:\n@pip install -r ./requirements.txt\ntest:\n@coverage run -m pytest -v ./tests\npublish:\n@python setup.py sdist bdist_wheel\n@twine upload dist/$project_name-\\$${VERSION}-* --verbose\nEOL\n# Set up the virtual environment and install necessary packages\nvirtualenv venv -p `which python3.10`\nsource venv/bin/activate\npip install twine setuptools pytest coverage geniusrise\npip freeze > requirements.txt\n# Fetch .pre-commit-config.yaml and .gitignore from geniusrise/geniusrise\ncurl -O https://raw.githubusercontent.com/geniusrise/geniusrise/master/.pre-commit-config.yaml\ncurl -O https://raw.githubusercontent.com/geniusrise/geniusrise/master/.gitignore\necho \"Project $project_name initialized!\"\n

    Create a install script out of this and execute it:

    touch install.sh\nchmod +x ./install.sh\n./install.sh\n
    "},{"location":"guides/dev_cycle/#preparing-the-knowledge-graph","title":"Preparing the knowledge graph","text":"

    Lets prepare the knowledge graph by vectorizing each node's knowledge into a vectorized flat memory. This is a periodic activity that one needs to do whenever a new version of SNOMED-CT is released (typically bi-annually).

    We use the international version of SNOMED-CT from https://www.nlm.nih.gov/healthit/snomedct/international.html.

    mkdir data\ncd data\n

    Go to UMLS or IHTSDO website, register, agree to the agreements and after approval, download the knowledge graph.

    Unzip the file

    unzip SnomedCT_InternationalRF2_PRODUCTION_20230901T120000Z.zip\n
    "},{"location":"guides/dev_cycle/#todo","title":"TODO \ud83d\ude22","text":"

    Need to document https://github.com/geniusrise/geniusrise-healthcare

    "},{"location":"guides/installation/","title":"Installation","text":"

    Geniusrise is composed of the core framework and various plugins that implement specific tasks. The core has to be installed first, and after that selected plugins can be installed as and when required.

    "},{"location":"guides/installation/#installing-geniusrise","title":"Installing Geniusrise","text":""},{"location":"guides/installation/#using-pip","title":"Using pip","text":"

    To install the core framework using pip in local env, simply run:

    pip install geniusrise\n

    Or if you wish to install at user level:

    pip install generiusrise --user\n

    Or on a global level (might conflict with your OS package manager):

    sudo pip install geniusrise\n

    To verify the installation, you can check whether the geniusrise binary exists in PATH:

    which genius\n\ngenius --help\n
    <!--

    "},{"location":"guides/installation/#docker","title":"Docker","text":"

    Geniusrise containers are available on Docker hub.

    docker run -it --rm geniusrise/geniusrise:latest\n``` -->\n\n## Installing Plugins\n---\n\nGeniusrise offers a variety of plugins that act as composable lego blocks. To install a specific plugin, use the following format:\n\n```bash\npip install geniusrise-<plugin-name>\n

    Replace <plugin-name> with the name of the desired plugin.

    Available plugins are:

    1. geniusrise-text: bolts for text models
    2. geniusrise-vision: bolts for vision models
    3. geniusrise-audio: bolts for audio models
    4. geniusrise-openai: bolts for openai
    5. geniusrise-listeners: spouts for streaming event listeners
    6. geniusrise-databases: spouts for databases

    Please visit https://github.com/geniusrise for a complete list of available plugins.

    "},{"location":"guides/installation/#using-conda","title":"Using Conda","text":"
    1. Activate the environment:
    conda activate your-env\n
    1. Install Geniusrise:
    pip install geniusrise\n
    "},{"location":"guides/installation/#using-poetry","title":"Using Poetry","text":"
    1. Add Geniusrise as a dependency:
    poetry add geniusrise\n

    For plugins:

    poetry add geniusrise-<plugin-name>\n
    "},{"location":"guides/installation/#development","title":"Development","text":"

    For development, you may want to install from the repo:

    git clone git@github.com:geniusrise/geniusrise.git\ncd geniusrise\nvirtualenv venv -p `which python3.10`\nsource venv/bin/activate\npip install -r ./requirements.txt\n\nmake install # installs in your local venv directory\n

    That's it! You've successfully installed Geniusrise and its plugins. \ud83c\udf89

    "},{"location":"guides/installation/#alternative-methods-todo","title":"Alternative Methods: TODO \ud83d\ude2d","text":""},{"location":"guides/installation/#using-package-managers","title":"Using package managers","text":"

    Geniusrise is also available as native packages for some Linux distributions.

    "},{"location":"guides/installation/#aur","title":"AUR","text":"

    Geniusrise is available on the AUR for arch and derived distros.

    yay -S geniusrise\n

    or directly from git master:

    yay -S geniusrise-git\n
    "},{"location":"guides/installation/#ppa","title":"PPA","text":"

    Geniusrise is also available on the PPA for debian-based distros.

    Coming soon \ud83d\ude22

    "},{"location":"guides/installation/#brew-cask","title":"Brew (cask)","text":"

    Coming soon \ud83d\ude22

    "},{"location":"guides/installation/#nix","title":"Nix","text":"

    Coming soon \ud83d\ude22

    "},{"location":"guides/kubernetes/","title":"Kubernetes Runner","text":""},{"location":"guides/kubernetes/#overview","title":"Overview","text":"

    This runner module enables running spouts or bolts on Kubernetes. It provides the ability to:

    1. create
    2. delete
    3. scale
    4. describe

    various Kubernetes resources like

    1. Pods
    2. Deployments
    3. Services
    "},{"location":"guides/kubernetes/#command-line-interface","title":"Command-Line Interface","text":"

    The following commands are available:

    1. create: Create a Kubernetes resource.
    2. delete: Delete a Kubernetes resource.
    3. status: Get the status of a Kubernetes resource.
    4. logs: Get logs of a Kubernetes resource.
    5. pod: Describe a Kubernetes pod.
    6. pods: List all pods.
    7. service: Describe a Kubernetes service.
    8. services: List all services.
    9. deployment: Describe a Kubernetes deployment.
    10. deployments: List all deployments.
    11. scale: Scale a Kubernetes deployment.
    "},{"location":"guides/kubernetes/#common-arguments","title":"Common Arguments","text":"

    These arguments are common to all commands:

    • --kube_config_path: Path to the kubeconfig file.
    • --cluster_name: Name of the Kubernetes cluster.
    • --context_name: Name of the kubeconfig context.
    • --namespace: Kubernetes namespace (default is \"default\").
    • --labels: Labels for Kubernetes resources, as a JSON string.
    • --annotations: Annotations for Kubernetes resources, as a JSON string.
    • --api_key: API key for Kubernetes cluster.
    • --api_host: API host for Kubernetes cluster.
    • --verify_ssl: Whether to verify SSL certificates (default is True).
    • --ssl_ca_cert: Path to the SSL CA certificate.
    "},{"location":"guides/kubernetes/#create_resource","title":"create_resource","text":"

    Create a Kubernetes resource.

    • name: Name of the resource.
    • image: Docker image for the resource.
    • command: Command to run in the container.
    • --registry_creds: Credentials for Docker registry, as a JSON string.
    • --is_service: Whether this is a service (default is False).
    • --replicas: Number of replicas (default is 1).
    • --port: Service port (default is 80).
    • --target_port: Container target port (default is 8080).
    • --env_vars: Environment variables, as a JSON string.

    Example:

    python script.py create_resource my_resource nginx \"nginx -g 'daemon off;'\" --replicas=3\n
    "},{"location":"guides/kubernetes/#delete_resource","title":"delete_resource","text":"

    Delete a Kubernetes resource.

    • name: Name of the resource.
    • --is_service: Whether this is a service (default is False).

    Example:

    python script.py delete_resource my_resource\n
    "},{"location":"guides/kubernetes/#get_status","title":"get_status","text":"

    Get the status of a Kubernetes resource.

    • name: Name of the resource.

    Example:

    python script.py get_status my_resource\n
    "},{"location":"guides/kubernetes/#get_logs","title":"get_logs","text":"

    Get logs of a Kubernetes resource.

    • name: Name of the resource.
    • --tail_lines: Number of lines to tail (default is 10).

    Example:

    python script.py get_logs my_resource --tail_lines=20\n
    "},{"location":"guides/kubernetes/#scale","title":"scale","text":"

    Scale a Kubernetes deployment.

    • name: Name of the deployment.
    • replicas: Number of replicas.

    Example:

    python script.py scale my_resource 5\n
    "},{"location":"guides/kubernetes/#list_pods-list_services-list_deployments","title":"list_pods, list_services, list_deployments","text":"

    List all pods, services, or deployments.

    Example:

    python script.py list_pods\n
    "},{"location":"guides/kubernetes/#describe_pod-describe_service-describe_deployment","title":"describe_pod, describe_service, describe_deployment","text":"

    Describe a pod, service, or deployment.

    • name: Name of the resource.

    Example:

    python script.py describe_pod my_pod\n
    "},{"location":"guides/kubernetes/#yaml-configuration","title":"YAML Configuration","text":"

    You can also use a YAML configuration file to specify the common arguments. The command-specific arguments will still come from the command line.

    Example YAML:

    deploy:\ntype: \"k8s\"\nargs:\nkube_config_path: \"\"\ncluster_name: \"geniusrise\"\ncontext_name: \"eks\"\nnamespace: \"geniusrise_k8s_test\"\nlabels: { \"tag1\": \"lol\", \"tag2\": \"lel\" }\nannotations: {}\napi_key:\napi_host: localhost\nverify_ssl: true\nssl_ca_cert:\n

    To use the YAML configuration, you can read it in your Python script and pass the arguments to the K8sResourceManager methods.

    Example:

    python script.py --config=my_config.yaml create_resource my_resource nginx \"nginx -g 'daemon off;'\" --replicas=3\n

    In this example, the --config=my_config.yaml would be used to read the common arguments from the YAML file, and the rest of the arguments would be taken from the command line.

    "},{"location":"guides/local/","title":"Local setup","text":"

    Lets create a workspace for local experimentation. We will not build anything here, just try to use whatever components are available. This is what a low-code workflow could look like.

    Lets create a workflow in which:

    1. A web server listens for all kinds of HTTP events.
      1. Clients send the following information to the server:
        1. HTTP request
        2. Response and response status code
      2. The server buffers events in batches of 1000 and uploads them on to s3.
    2. Train a small LLM model on the data to be used to predict whether the request was valid.

    A representation of the process using a sequence diagram:

    e451c317-1904-43cc-ad92-e5146e70eb14

    This model could be used to predict if a request will fail before serving it. It could also be used to classify requests as malicious etc.

    "},{"location":"guides/local/#install","title":"Install","text":"

    Let's start by installing geniusrise and itc components in a local virtual environment.

    1. Create a directory:
    mkdir test\ncd test\n
    1. Create a virtualenv:
    virtualenv venv\nsource venv/bin/activate\n
    1. Install geniursise
    pip install geniusrise\npip install geniusrise-listeners\npip install geniusrise-huggingface\n
    1. Save the installed package versions
    pip freeze > requirements.txt\n
    1. Verify if everything is installed:
    $ genius list\n\n+--------------------------------------------+-------+\n| Name                                       | Type  |\n+--------------------------------------------+-------+\n| TestSpoutCtlSpout                          | Spout |\n| Kafka                                      | Spout |\n| MQTT                                       | Spout |\n| Quic                                       | Spout |\n| RESTAPIPoll                                | Spout |\n| RabbitMQ                                   | Spout |\n| RedisPubSub                                | Spout |\n| RedisStream                                | Spout |\n| SNS                                        | Spout |\n| SQS                                        | Spout |\n| Udp                                        | Spout |\n| Webhook                                    | Spout |\n| Websocket                                  | Spout |\n| TestBoltCtlBolt                            | Bolt  |\n| HuggingFaceClassificationFineTuner         | Bolt  |\n| HuggingFaceCommonsenseReasoningFineTuner   | Bolt  |\n| HuggingFaceFineTuner                       | Bolt  |\n| HuggingFaceInstructionTuningFineTuner      | Bolt  |\n| HuggingFaceLanguageModelingFineTuner       | Bolt  |\n| HuggingFaceNamedEntityRecognitionFineTuner | Bolt  |\n| HuggingFaceQuestionAnsweringFineTuner      | Bolt  |\n| HuggingFaceSentimentAnalysisFineTuner      | Bolt  |\n| HuggingFaceSummarizationFineTuner          | Bolt  |\n| HuggingFaceTranslationFineTuner            | Bolt  |\n| NamedEntityRecognitionFineTuner            | Bolt  |\n| OpenAIClassificationFineTuner              | Bolt  |\n| OpenAICommonsenseReasoningFineTuner        | Bolt  |\n| OpenAIFineTuner                            | Bolt  |\n| OpenAIInstructionFineTuner                 | Bolt  |\n| OpenAILanguageModelFineTuner               | Bolt  |\n| OpenAIQuestionAnsweringFineTuner           | Bolt  |\n| OpenAISentimentAnalysisFineTuner           | Bolt  |\n| OpenAISummarizationFineTuner               | Bolt  |\n| OpenAITranslationFineTuner                 | Bolt  |\n+--------------------------------------------+-------+\n
    "},{"location":"guides/local/#input-data","title":"Input Data","text":"

    Lets start with the server which has to listen for HTTP events. We can use the Webhook listener for this purpose.

    Next, we have to ask ourselves 2 things:

    1. Where do we want the output?
    2. A: in s3 in batches (output = stream_to_batch)
    3. Do we want monitoring?
    4. A: no (state = none)

    Let's run the listener:

    genius Webhook rise \\\nstream_to_batch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder train \\\nnone \\\nlisten \\\n--args port=8080\n

    The server should be up with:

        \ud83d\ude80 Initialized Task with ID: Webhookaca9cb67-5c41-420c-9445-cf0015d9d866\n    [17/Sep/2023:14:00:18] ENGINE Bus STARTING\nCherryPy Checker:\nThe Application mounted at '' has an empty config.\n\n[17/Sep/2023:14:00:18] ENGINE Started monitor thread 'Autoreloader'.\n    [17/Sep/2023:14:00:18] ENGINE Serving on http://0.0.0.0:8080\n    [17/Sep/2023:14:00:18] ENGINE Bus STARTED\n
    "},{"location":"guides/local/#data","title":"Data","text":"

    Lets create some data for testing:

    while true; do\n# Generate a random customer ID\ncustomer_id=$(( RANDOM % 10000001 ))\n# Determine the status code based on the customer ID\nif [ $customer_id -gt 10000000 ]; then\nstatus_code=\"1\"\nelif [ $customer_id -le 10000 ]; then\nstatus_code=\"1\"\nelse\nstatus_code=\"0\"\nfi\n# Make the API call\ncurl --header \"Content-Type: application/json\" \\\n--request POST \\\n--data \"{\\\"text\\\":\\\"GET /api/v1/customer/$customer_id\\\",\\\"label\\\":\\\"$status_code\\\"}\" \\\nhttp://localhost:8080/application-1-tag-a-tag-b-whatever\ndone\n

    Verify that the data is being dumped in the right place with the correct format:

    $ aws s3 ls s3://geniusrise-test/train/\n\n2023-08-11 14:02:47      28700 DGtx4KjVZw5C2gfWmTVCmD.json\n2023-08-11 14:02:50      28700 UYXAvn8JC2yk6pMuAjKMPq.json\n

    The Webhook spout generates data like this:

    {'data': {'text': 'GET /api/v1/customer/28546', 'label': '401'},\n'endpoint': 'http://localhost:8080/application-1-tag-a-tag-b-whatever',\n'headers': {'Remote-Addr': '127.0.0.1',\n'Host': 'localhost:8080',\n'User-Agent': 'curl/8.1.2',\n'Accept': '*/*',\n'Content-Type': 'application/json',\n'Content-Length': '51'}}\n

    We need to extract the data field from this data before training. This can be done by passing a lambda lambda x: x['data'] to the fine tuning bolt.

    More info on other arguments can be found with:

    genius Webhook rise --help\n
    "},{"location":"guides/local/#fine-tuning","title":"Fine-tuning","text":"

    Now lets test the second leg of this, the model. Since we want to use the model for predicting the status code given the data, we will use classification as our task for fine-tuning the model.

    Lets use the bert-base-uncased model for now, as it is small enough to run on a CPU on a laptop. We also create a model on huggingface hub to store the model once it is trained: ixaxaar/geniusrise-api-status-code-prediction.

    genius HuggingFaceClassificationFineTuner rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder train \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder api-prediction \\\nnone \\\nfine_tune \\\n--args \\\nmodel_name=\"bert-base-uncased\" \\\ntokenizer_name=\"bert-base-uncased\" \\\nnum_train_epochs=2 \\\nper_device_train_batch_size=64 \\\nmodel_class=BertForSequenceClassification \\\ntokenizer_class=BertTokenizer \\\ndata_masked=True \\\ndata_extractor_lambda=\"lambda x: x['data']\" \\\nhf_repo_id=ixaxaar/geniusrise-api-status-code-prediction \\\nhf_commit_message=\"initial local testing\" \\\nhf_create_pr=True \\\nhf_token=hf_lalala\n
        \ud83d\ude80 Initialized Task with ID: HuggingFaceClassificationFineTuner772627a0-43a5-4f9d-9b0f-4362d69ba08c\n    Found credentials in shared credentials file: ~/.aws/credentials\nSome weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']\nYou should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n    Loading dataset from /tmp/tmp3h3wav4h/train\n    New labels detected, ignore if fine-tuning\nMap: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 300/300 [00:00<00:00, 4875.76 examples/s]\n{'train_runtime': 13.3748, 'train_samples_per_second': 44.861, 'train_steps_per_second': 22.43, 'train_loss': 0.6400579833984374, 'epoch': 2.0}\n100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 300/300 [00:13<00:00, 22.43it/s]\npytorch_model.bin: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588| 438M/438M [01:29<00:00, 4.88MB/s]\n    Successfully executed the bolt method: fine_tune \ud83d\udc4d\n

    You'll see a progress bar at the bottom, on completion, a pull request will appear on huggingface hub. Here is the model we trained: https://huggingface.co/ixaxaar/geniusrise-api-status-code-prediction.

    "},{"location":"guides/local/#packaging","title":"Packaging","text":"

    Finally, lets package this workflow so that we can run it again and again.

    Create a genius.yml file, similar to the cli commands:

    version: 1\nspouts:\nhttp_listener:\nname: Webhook\nmethod: listen\nargs:\nport: 8080\nstate:\ntype: none\noutput:\ntype: stream_to_batch\nargs:\nbucket: geniusrise-test\nfolder: train\nhttp_classifier:\nname: HuggingFaceClassificationFineTuner\nmethod: fine_tune\nargs:\nmodel_name: \"bert-base-uncased\"\ntokenizer_name: \"bert-base-uncased\"\nnum_train_epochs: 2\nper_device_train_batch_size: 2\nmodel_class: BertForSequenceClassification\ntokenizer_class: BertTokenizer\ndata_masked: True\ndata_extractor_lambda: \"lambda x: x['data']\"\nhf_repo_id: ixaxaar/geniusrise-api-status-code-prediction\nhf_commit_message: \"initial local testing\"\nhf_create_pr: True\nhf_token: hf_lalala\ninput:\ntype: spout\nargs:\nname: http_listener\noutput:\ntype: batch\nargs:\nbucket: geniusrise-test\nfolder: model\n

    Finally run them:

    genius rise\n

    Or run them individually:

    genius rise --spout all\ngenius rise --bolt all\n

    Package this entire workspace into a docker container and upload to ECR:

    genius docker package geniusrise ecr \\\n--auth '{\"aws_region\": \"ap-south-1\"}' \\\n--packages geniusrise-listeners geniusrise-huggingface\n
    "},{"location":"guides/local/#deployment","title":"Deployment","text":"

    Delpoy the spout and bolt to kubernetes. We could use the command line to deploy:

    genius Webhook deploy \\\nstream_to_batch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder train \\\nnone \\\nk8s \\\n--k8s_kind service \\\n--k8s_namespace geniusrise \\\n--k8s_cluster_name geniusrise-dev \\\n--k8s_context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev \\\n--k8s_name webhook \\\n--k8s_image \"genius-dev.dkr.ecr.ap-south-1.amazonaws.com/geniusrise\" \\\n--k8s_env_vars '{\"AWS_DEFAULT_REGION\": \"ap-south-1\", \"AWS_SECRET_ACCESS_KEY\": \"your-key\", \"AWS_ACCESS_KEY_ID\": \"your-secret\"}' \\\n--k8s_port 8080 \\\n--k8s_target_port 8080 \\\nlisten \\\n--args port=8080\n

    Or we could simply use the yaml we created in the previous step:

    genius rise up\n

    See the status of the deployment:

    # Find the pod id\ngenius pod show \\\n--namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev 2>&1 | grep Running\n\ngenius pod describe \\\nwebhook-75c4bff67d-hbhts \\\n--namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n\ngenius deployment describe \\\nwebhook \\\n--namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n\ngenius service describe \\\nwebhook \\\n--namespace geniusrise \\\n--context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise-dev\n
    "},{"location":"guides/packaging/","title":"Packaging","text":""},{"location":"guides/packaging/#overview","title":"Overview","text":"

    Geniusrise uses docker for packaging and delivering modules.

    "},{"location":"guides/packaging/#capabilities","title":"Capabilities","text":"
    • Docker Image Creation: Create Docker images with custom base images, working directories, and local directories.
    • Package Installation: Install both OS-level and Python packages during the Docker image creation.
    • Environment Variables: Set environment variables in the Docker container.
    • Multi-Repository Support: Upload Docker images to multiple types of container repositories.
    • Authentication: Supports various authentication methods for different container repositories.
    "},{"location":"guides/packaging/#command-line-interface","title":"Command-Line Interface","text":""},{"location":"guides/packaging/#syntax","title":"Syntax","text":"
    genius docker package <image_name> <repository> [options]\n
    "},{"location":"guides/packaging/#parameters","title":"Parameters","text":"
    • <image_name>: The name of the Docker image to build and upload.
    • <repository>: The container repository to upload to (e.g., \"ECR\", \"DockerHub\", \"Quay\", \"ACR\", \"GCR\").
    "},{"location":"guides/packaging/#options","title":"Options","text":"
    • --auth: Authentication credentials as a JSON string. Default is an empty JSON object.
    • --base_image: The base image to use for the Docker container. Default is \"nvidia/cuda:12.2.0-runtime-ubuntu20.04\".
    • --workdir: The working directory in the Docker container. Default is \"/app\".
    • --local_dir: The local directory to copy into the Docker container. Default is \".\".
    • --packages: List of Python packages to install in the Docker container. Default is an empty list.
    • --os_packages: List of OS packages to install in the Docker container. Default is an empty list.
    • --env_vars: Environment variables to set in the Docker container. Default is an empty dictionary.
    "},{"location":"guides/packaging/#authentication-details","title":"Authentication Details","text":"
    • ECR: {\"aws_region\": \"ap-south-1\", \"aws_secret_access_key\": \"aws_key\", \"aws_access_key_id\": \"aws_secret\"}
    • DockerHub: {\"dockerhub_username\": \"username\", \"dockerhub_password\": \"password\"}
    • ACR: {\"acr_username\": \"username\", \"acr_password\": \"password\", \"acr_login_server\": \"login_server\"}
    • GCR: {\"gcr_key_file_path\": \"/path/to/keyfile.json\", \"gcr_repository\": \"repository\"}
    • Quay: {\"quay_username\": \"username\", \"quay_password\": \"password\"}
    "},{"location":"guides/packaging/#examples","title":"Examples","text":""},{"location":"guides/packaging/#uploading-to-ecr-amazon-elastic-container-registry","title":"Uploading to ECR (Amazon Elastic Container Registry)","text":"
    genius docker package geniusrise ecr --auth '{\"aws_region\": \"ap-south-1\"}'\n
    "},{"location":"guides/packaging/#uploading-to-dockerhub","title":"Uploading to DockerHub","text":"
    genius docker package geniusrise dockerhub --auth '{\"dockerhub_username\": \"username\", \"dockerhub_password\": \"password\"}'\n
    "},{"location":"guides/packaging/#uploading-to-acr-azure-container-registry","title":"Uploading to ACR (Azure Container Registry)","text":"
    genius docker package geniusrise acr --auth '{\"acr_username\": \"username\", \"acr_password\": \"password\", \"acr_login_server\": \"login_server\"}'\n
    "},{"location":"guides/packaging/#uploading-to-gcr-google-container-registry","title":"Uploading to GCR (Google Container Registry)","text":"
    genius docker package geniusrise gcr --auth '{\"gcr_key_file_path\": \"/path/to/keyfile.json\", \"gcr_repository\": \"repository\"}'\n
    "},{"location":"guides/packaging/#uploading-to-quay","title":"Uploading to Quay","text":"
    genius docker package geniusrise quay --auth '{\"quay_username\": \"username\", \"quay_password\": \"password\"}'\n
    "},{"location":"guides/packaging/#uploading-with-custom-packages-and-os-packages","title":"Uploading with Custom Packages and OS Packages","text":"
    genius docker package geniusrise dockerhub \\\n--packages geniusrise-listeners geniusrise-databases geniusrise-huggingface geniusrise-openai \\\n--os_packages libmysqlclient-dev libldap2-dev libsasl2-dev libssl-dev\n
    "},{"location":"guides/packaging/#uploading-with-environment-variables","title":"Uploading with Environment Variables","text":"
    genius docker package geniusrise dockerhub --env_vars '{\"API_KEY\": \"123456\", \"ENV\": \"production\"}'\n
    "},{"location":"guides/packaging/#complex-examples","title":"Complex Examples","text":""},{"location":"guides/packaging/#1-uploading-to-ecr-with-custom-base-image-and-packages","title":"1. Uploading to ECR with Custom Base Image and Packages","text":"

    This example demonstrates how to upload a Docker image to ECR with a custom base image and additional Python packages.

    genius docker package my_custom_image ecr \\\n--auth '{\"aws_region\": \"us-west-2\", \"aws_secret_access_key\": \"aws_key\", \"aws_access_key_id\": \"aws_secret\"}' \\\n--base_image \"python:3.9-slim\" \\\n--packages \"numpy pandas scikit-learn\" \\\n--os_packages \"gcc g++\"\n
    "},{"location":"guides/packaging/#2-uploading-to-dockerhub-with-environment-variables-and-working-directory","title":"2. Uploading to DockerHub with Environment Variables and Working Directory","text":"

    This example shows how to upload a Docker image to DockerHub with custom environment variables and a specific working directory.

    genius docker package my_app dockerhub \\\n--auth '{\"dockerhub_username\": \"username\", \"dockerhub_password\": \"password\"}' \\\n--env_vars '{\"DEBUG\": \"True\", \"SECRET_KEY\": \"mysecret\"}' \\\n--workdir \"/my_app\"\n
    "},{"location":"guides/packaging/#3-uploading-to-acr-with-multiple-local-directories","title":"3. Uploading to ACR with Multiple Local Directories","text":"

    In this example, we upload a Docker image to Azure Container Registry (ACR) and specify multiple local directories to be copied into the Docker container.

    # First, create a Dockerfile that copies multiple directories\n# Then use the following command\ngenius docker package multi_dir_app acr \\\n--auth '{\"acr_username\": \"username\", \"acr_password\": \"password\", \"acr_login_server\": \"login_server\"}' \\\n--local_dir \"./app ./config\"\n
    "},{"location":"guides/packaging/#4-uploading-to-gcr-with-custom-base-image-packages-and-os-packages","title":"4. Uploading to GCR with Custom Base Image, Packages, and OS Packages","text":"

    This example demonstrates how to upload a Docker image to Google Container Registry (GCR) with a custom base image, Python packages, and OS packages.

    genius docker package my_ml_model gcr \\\n--auth '{\"gcr_key_file_path\": \"/path/to/keyfile.json\", \"gcr_repository\": \"repository\"}' \\\n--base_image \"tensorflow/tensorflow:latest-gpu\" \\\n--packages \"scipy keras\" \\\n--os_packages \"libsm6 libxext6 libxrender-dev\"\n
    "},{"location":"guides/packaging/#5-uploading-to-quay-with-all-customizations","title":"5. Uploading to Quay with All Customizations","text":"

    This example shows how to upload a Docker image to Quay with all available customizations like base image, working directory, local directory, Python packages, OS packages, and environment variables.

    genius docker package full_custom quay \\\n--auth '{\"quay_username\": \"username\", \"quay_password\": \"password\"}' \\\n--base_image \"alpine:latest\" \\\n--workdir \"/custom_app\" \\\n--local_dir \"./src\" \\\n--packages \"flask gunicorn\" \\\n--os_packages \"bash curl\" \\\n--env_vars '{\"FLASK_ENV\": \"production\", \"PORT\": \"8000\"}'\n
    "},{"location":"guides/pin/","title":"Bulding an AI pin","text":"

    Lets do an end to end project where we build an AI-pin to talk to a multi-modal language model.

    The system consists of two parts:

    1. Device: A low-power network device with camera, speaker and microphone
    2. Desktop: A central machine hosting the LLM, possibly a desktop computer running geniusrise
    c40377db-7b95-4b2b-86c5-2e4dccbb6e2c

    We start with a ESP32 based platform as there are many these days. Lets look at two of them:

    "},{"location":"guides/pin/#ttgo","title":"TTGO","text":"

    The TTGO T-Camera Plus is a unique ESP32 module featuring a built-in camera and display. It's designed for applications that require direct image capture and display capabilities without the need for external screens or cameras.

    • CPU: Dual-core Tensilica LX6 microprocessor up to 240 MHz
    • Memory: 520 KB SRAM, 4 MB PSRAM
    • Connectivity: Wi-Fi (802.11 b/g/n), Bluetooth (Classic and BLE)
    • Camera: OV2640 camera module, 2 Megapixels
    • Display: 1.3-inch OLED display
    • Extras: Fish-eye lens, optional MPU6050 module for motion sensing

    "},{"location":"guides/pin/#seeed-studio-xiao","title":"Seeed Studio XIAO","text":"

    Seeed Studio XIAO ESP32C3 is a mini but powerful module. It's part of the Seeed Studio XIAO series, known for its compact design and reliability in various IoT projects.

    • CPU: RISC-V single-core processor, up to 160 MHz
    • Memory: 400 KB SRAM, 4 MB Flash
    • Connectivity: Wi-Fi (802.11 b/g/n), Bluetooth 5 (LE)
    • I/O Pins: Rich set of peripherals including GPIOs, UART, SPI, I2C, and more.
    • Size: Ultra-small form factor suitable for wearable devices and compact projects

    "},{"location":"guides/pin/#peripherals","title":"Peripherals","text":"

    We used a bunch of these peripherals wherever the boards did not have them. We usually chose a platform with at least a screen and a camera included and added these peripherals to them.

    "},{"location":"guides/pin/#microphone","title":"Microphone","text":"
    • Model: INMP441 I2S
    • Features: High precision, omnidirectional, MEMS microphone module, Digital I2S interface
    • Usage: Ideal for high-quality audio input and voice command projects

    product-page.

    "},{"location":"guides/pin/#speaker","title":"Speaker","text":"
    • Model: SeeedStudio Grove Speaker
    • Features: Programmable, with built-in amplifier, capable of playing various tones and sounds
    • Usage: Suitable for projects requiring audio output like alarms, voice notifications, and music playback

    product-page

    "},{"location":"guides/pin/#touchscreen","title":"Touchscreen","text":"
    • Model: SeeedStudio Round Display for XIAO
    • Features: Touchscreen capability, round display, perfect for user interface projects
    • Usage: Excellent for compact and wearable devices requiring user interaction

    product-page

    "},{"location":"guides/pin/#connections","title":"Connections","text":"

    Now lets get connected. The following lists all connections, some soldering of headers may be required.

    "},{"location":"guides/pin/#seeed-studio-xiao-connections","title":"Seeed Studio XIAO Connections","text":"

    For the Seeed Studio XIAO, we'll connect a touchscreen display, an INMP441 I2S microphone, and a SeeedStudio Grove Speaker.

    "},{"location":"guides/pin/#touchscreen-display","title":"Touchscreen Display","text":"
    • Display Model: Seeed Studio Round Display for XIAO
    • Connection Type: SPI
    • Required Pins:
    • SCL (Serial Clock) to XIAO's SCL (GPIO18 for SPI clock)
    • SDA (Serial Data) to XIAO's SDA (GPIO19 for SPI MOSI)
    • RES (Reset) to any available GPIO pin (e.g., GPIO21) for display reset
    • DC (Data/Command) to any available GPIO pin (e.g., GPIO22) for data/command selection
    • CS (Chip Select) to any available GPIO pin (e.g., GPIO5) for SPI chip select

    Very easy to connect, xiao sits on the display.

    "},{"location":"guides/pin/#microphone-inmp441-i2s","title":"Microphone (INMP441 I2S)","text":"
    • Connection Type: I2S
    • Required Pins:
    • WS (Word Select/LRCLK) to GPIO23
    • SCK (Serial Clock) to GPIO18
    • SD (Serial Data) to GPIO19
    "},{"location":"guides/pin/#speaker-seeedstudio-grove","title":"Speaker (SeeedStudio Grove)","text":"
    • Connection Type: Digital I/O
    • Required Pins:
    • SIG to any PWM-capable GPIO pin (e.g., GPIO25) for audio signal
    • GND to GND
    "},{"location":"guides/pin/#ttgo-t-camera-plus-connections","title":"TTGO T-Camera Plus Connections","text":"

    For the TTGO T-Camera Plus, we're connecting an INMP441 I2S microphone and a SeeedStudio Grove Speaker since it already includes a camera and display.

    "},{"location":"guides/pin/#microphone-inmp441-i2s_1","title":"Microphone (INMP441 I2S)","text":"
    • Connection Type: I2S
    • Required Pins:
    • WS (Word Select/LRCLK) to GPIO32
    • SCK (Serial Clock) to GPIO14
    • SD (Serial Data) to GPIO27
    "},{"location":"guides/pin/#speaker-seeedstudio-grove_1","title":"Speaker (SeeedStudio Grove)","text":"
    • Connection Type: Digital I/O
    • Required Pins:
    • SIG to any PWM-capable GPIO pin (e.g., GPIO33) for audio signal
    • GND to GND
    "},{"location":"guides/pin/#general-tips","title":"General Tips","text":"
    • Power Supply: Ensure that all devices are powered appropriately. The XIAO and TTGO can be powered via USB or an external 3.3V power supply.
    • Common Ground: Make sure all components share a common ground connection.
    • Programming: Use the Arduino IDE or ESP-IDF for programming the ESP32 devices. Libraries specific to the peripherals (e.g., display, I2S microphone, and speaker) will be required.
    • I2S Library: For the INMP441 microphone, an I2S library suitable for ESP32 should be used to handle audio input.
    • Display Library: For the touchscreen display, a library compatible with the specific model will be needed for interfacing and graphics rendering.
    "},{"location":"guides/usage/","title":"Usage","text":"

    The easiest way to use geniusrise is to host an API over a desired model. Use one of the examples from text, vision or audio.

    "},{"location":"guides/usage/#run-on-local","title":"Run on Local","text":"

    Say, we are interested in running an API over a vision / multi-modal model such as bakLlava from huggingface:

    "},{"location":"guides/usage/#1-install-geniusrise-and-vision","title":"1. Install geniusrise and vision","text":"
    pip install torch\npip install geniusrise\npip install geniusrise-vision # vision multi-modal models\n# pip install geniusrise-text # text models, LLMs\n# pip install geniusrise-audio # audio models\n
    "},{"location":"guides/usage/#2-use-the-genius-cli-to-run-bakllava","title":"2. Use the genius cli to run bakLlava","text":"
    genius VisualQAAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\nlisten \\\n--args \\\nmodel_name=\"llava-hf/bakLlava-v1-hf\" \\\nmodel_class=\"LlavaForConditionalGeneration\" \\\nprocessor_class=\"AutoProcessor\" \\\ndevice_map=\"cuda:0\" \\\nuse_cuda=True \\\nprecision=\"bfloat16\" \\\nquantization=0 \\\nmax_memory=None \\\ntorchscript=False \\\ncompile=False \\\nflash_attention=False \\\nbetter_transformers=False \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n
    "},{"location":"guides/usage/#3-test-the-api","title":"3. Test the API","text":"
    MY_IMAGE=/path/to/test/image\n\n(base64 -w 0 $MY_IMAGE | awk '{print \"{\\\"image_base64\\\": \\\"\"$0\"\\\", \\\"question\\\": \\\"<image>\\nUSER: Whats the content of the image?\\nASSISTANT:\\\", \\\"do_sample\\\": false, \\\"max_new_tokens\\\": 128}\"}' > /tmp/image_payload.json)\ncurl -X POST http://localhost:3000/api/v1/answer_question \\\n-H \"Content-Type: application/json\" \\\n-u user:password \\\n-d @/tmp/image_payload.json | jq\n
    "},{"location":"guides/usage/#4-save-your-work","title":"4. Save your work","text":"

    Save what you did to be replicated later as genius.yml file:

    version: '1'\nbolts:\nmy_bolt:\nname: VisualQAAPI\nstate:\ntype: none\ninput:\ntype: batch\nargs:\ninput_folder: ./input\noutput:\ntype: batch\nargs:\noutput_folder: ./output\nmethod: listen\nargs:\nmodel_name: 'llava-hf/bakLlava-v1-hf'\nmodel_class: 'LlavaForConditionalGeneration'\nprocessor_class: 'AutoProcessor'\ndevice_map: 'cuda:0'\nuse_cuda: True\nprecision: 'bfloat16'\nquantization: 0\nmax_memory: None\ntorchscript: False\ncompile: False\nflash_attention: False\nbetter_transformers: False\nendpoint: '*'\nport: 3000\ncors_domain: 'http://localhost:3000'\nusername: 'user'\npassword: 'password'\n

    To later re-run the same, simply navigate to the directory of this file and do:

    genius rise\n
    "},{"location":"guides/usage/#advanced-usage","title":"Advanced Usage","text":"

    For having a set of APIs, say for voice -> text -> text -> voice pipeline, create a genius.yml file like this:

    version: \"1\"\nbolts:\nspeech_to_text_bolt:\nname: SpeechToTextAPI\nstate:\ntype: none\ninput:\ntype: batch\nargs:\ninput_folder: ./input\noutput:\ntype: batch\nargs:\noutput_folder: ./output\nmethod: listen\nargs:\nmodel_name: openai/whisper-large-v3\nmodel_class: WhisperForConditionalGeneration\nprocessor_class: AutoProcessor\nuse_cuda: true\nprecision: float\nquantization: 0\ndevice_map: cuda:0\nmax_memory: null\ntorchscript: false\ncompile: false\nflash_attention: False\nbetter_transformers: False\nendpoint: \"0.0.0.0\"\nport: 3001\ncors_domain: http://localhost:3001\nusername: user\npassword: password\nchat_bolt:\nname: InstructionAPI\nstate:\ntype: none\ninput:\ntype: batch\nargs:\ninput_folder: ./input\noutput:\ntype: batch\nargs:\noutput_folder: ./output\nmethod: listen\nargs:\nmodel_name: TheBloke/Mistral-7B-Instruct-v0.1-GPTQ:gptq-4bit-32g-actorder_True\nmodel_class: AutoModelForCausalLM\ntokenizer_class: AutoTokenizer\nuse_cuda: true\nprecision: float16\nquantization: 0\ndevice_map: auto\nmax_memory: null\ntorchscript: false\ncompile: false\nflash_attention: False\nbetter_transformers: False\nawq_enabled: False\nendpoint: \"0.0.0.0\"\nport: 3002\ncors_domain: http://localhost:3002\nusername: user\npassword: password\ntext_to_speech_bolt:\nname: TextToSpeechAPI\nstate:\ntype: none\ninput:\ntype: batch\nargs:\ninput_folder: ./input\noutput:\ntype: batch\nargs:\noutput_folder: ./output\nmethod: listen\nargs:\nmodel_name: suno/bark\nmodel_class: BarkModel\nprocessor_class: BarkProcessor\nuse_cuda: true\nprecision: float32\nquantization: 0\ndevice_map: cuda:0\nmax_memory: null\ntorchscript: false\ncompile: false\nflash_attention: False\nbetter_transformers: False\nendpoint: \"0.0.0.0\"\nport: 3003\ncors_domain: http://localhost:3003\nusername: user\npassword: password\n

    and run:

    genius rise\n

    (like docker-compose etc).

    then try it out:

    # Step 1: Transcribe audio file\nTRANSCRIPTION=$(echo $(base64 -w 0 sample.mp3) | awk '{print \"{\\\"audio_file\\\": \\\"\"$0\"\\\", \\\"model_sampling_rate\\\": 16000}\"}' | \\\ncurl -s -X POST http://localhost:3001/api/v1/transcribe \\\n-H \"Content-Type: application/json\" \\\n-u user:password \\\n-d @- | jq -r '.transcriptions.transcription')\necho \"Transcription: $TRANSCRIPTION\"\n# Step 2: Send a prompt to the text completion API\nPROMPT_JSON=$(jq -n --arg prompt \"$TRANSCRIPTION\" '{\"prompt\": $prompt, \"decoding_strategy\": \"generate\", \"max_new_tokens\": 100, \"do_sample\": true, \"pad_token_id\": 0}')\nCOMPLETION=$(echo $PROMPT_JSON | curl -s -X POST \"http://localhost:3002/api/v1/complete\" \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d @- | jq -r '.completion')\necho \"Completion: $COMPLETION\"\n# Step 3: Synthesize speech from text and play the output\nSYNTH_JSON=$(jq -n --arg text \"$COMPLETION\" '{\"text\": $text, \"output_type\": \"mp3\", \"voice_preset\": \"v2/en_speaker_6\"}')\ncurl -s -X POST \"http://localhost:3003/api/v1/synthesize\" \\\n-H \"Content-Type: application/json\" \\\n-u \"user:password\" \\\n-d \"$SYNTH_JSON\" | jq -r '.audio_file' | base64 -d > output.mp3\n\nvlc output.mp3 &>/dev/null\n
    "},{"location":"guides/usage/#run-on-remote","title":"Run on Remote","text":"

    If we are running on a remote machine instead, perhaps we want to use our own model stored in S3?

    genius VisualQAAPI rise \\\nbatch \\\n--input_s3_bucket my-s3-bucket \\\n--input_s3_folder model \\\nbatch \\\n--output_s3_bucket my-s3-bucket \\\n--output_s3_folder output-<partition/keys> \\\nnone \\\nlisten \\\n--args \\\nmodel_name=\"local\" \\\nmodel_class=\"LlavaForConditionalGeneration\" \\\nprocessor_class=\"AutoProcessor\" \\\ndevice_map=\"cuda:0\" \\\nuse_cuda=True \\\nprecision=\"bfloat16\" \\\nquantization=0 \\\nmax_memory=None \\\ntorchscript=False \\\ncompile=False \\\nflash_attention=False \\\nbetter_transformers=False \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n

    or in YAML:

    version: '1'\nbolts:\nmy_bolt:\nname: VisualQAAPI\nstate:\ntype: none\ninput:\ntype: batch\nargs:\nbucket: my-s3-bucket\nfolder: model\noutput:\ntype: batch\nargs:\nbucket: my-s3-bucket\nfolder: output-<partition/keys>\nmethod: listen\nargs:\nmodel_name: 'llava-hf/bakLlava-v1-hf'\nmodel_class: 'LlavaForConditionalGeneration'\nprocessor_class: 'AutoProcessor'\ndevice_map: 'cuda:0'\nuse_cuda: True\nprecision: 'bfloat16'\nquantization: 0\nmax_memory: None\ntorchscript: False\ncompile: False\nflash_attention: False\nbetter_transformers: False\nendpoint: '*'\nport: 3000\ncors_domain: 'http://localhost:3000'\nusername: 'user'\npassword: 'password'\n
    "},{"location":"guides/usage/#docker-packaging","title":"Docker packaging","text":"

    Perhaps we also want to now use docker to package?

    Refer Packaging

    "},{"location":"guides/usage/#to-production","title":"To Production","text":"

    And finally deploy as a replicaset on a kubernetes cluster for going to prod!

    Refer Deployment

    "},{"location":"guides/usage/#observability","title":"Observability","text":"

    We have prometheus integrated, just integrate with your prometheus cluster! Prometheus runs on PROMETHEUS_PORT ENV variable or 8282 by default.

    "},{"location":"guides/yaml/","title":"YAML Structure and Operations","text":"

    The YAML file for Geniusrise is called Geniusfile.yaml and it has the following structure:

    version: 1\nspouts:\n<spout_name>:\nname: <spout_name>\nmethod: <method_name>\nargs:\n<key>: <value>\noutput:\ntype: <output_type>\nargs:\n<key>: <value>\nstate:\ntype: <state_type>\nargs:\n<key>: <value>\ndeploy:\ntype: <deploy_type>\nargs:\n<key>: <value>\nbolts:\n<bolt_name>:\nname: <bolt_name>\nmethod: <method_name>\nargs:\n<key>: <value>\ninput:\ntype: <input_type>\nargs:\n<key>: <value>\noutput:\ntype: <output_type>\nargs:\n<key>: <value>\nstate:\ntype: <state_type>\nargs:\n<key>: <value>\ndeploy:\ntype: <deploy_type>\nargs:\n<key>: <value>\n
    "},{"location":"guides/yaml/#example-yaml-files","title":"Example YAML Files","text":""},{"location":"guides/yaml/#example-1-basic-spout-and-bolt","title":"Example 1: Basic Spout and Bolt","text":"
    version: 1\nspouts:\nTestSpout:\nname: TestSpout\nmethod: listen\nargs:\nport: 8080\noutput:\ntype: batch\nargs:\nbucket: geniusrise-test\nfolder: train\nstate:\ntype: none\ndeploy:\ntype: k8s\nargs:\nkind: job\nname: coretest\nnamespace: geniusrise\nimage: \"geniusrise/geniusrise-core\"\nbolts:\nTestBolt:\nname: TestBolt\nmethod: process\nargs:\nfactor: 2\ninput:\ntype: batch\nargs:\nbucket: geniusrise-test\nfolder: train\noutput:\ntype: batch\nargs:\nbucket: geniusrise-test\nfolder: output\nstate:\ntype: none\ndeploy:\ntype: k8s\nargs:\nkind: job\nname: coretest\nnamespace: geniusrise\nimage: \"geniusrise/geniusrise-core\"\n
    "},{"location":"guides/yaml/#example-2-spout-with-redis-state","title":"Example 2: Spout with Redis State","text":"
    version: 1\nspouts:\nRedisSpout:\nname: RedisSpout\nmethod: listen\nargs:\nport: 8080\noutput:\ntype: streaming\nargs:\noutput_topic: geniusrise-stream\nkafka_servers: \"localhost:9092\"\nstate:\ntype: redis\nargs:\nredis_host: \"localhost\"\nredis_port: 6379\nredis_db: 0\ndeploy:\ntype: k8s\nargs:\nkind: service\nname: redisspout\nnamespace: geniusrise\nimage: \"geniusrise/geniusrise-core\"\n
    "},{"location":"guides/yaml/#example-3-bolt-with-postgres-state-and-ecs-deployment","title":"Example 3: Bolt with Postgres State and ECS Deployment","text":"
    version: 1\nbolts:\nPostgresBolt:\nname: PostgresBolt\nmethod: process\nargs:\nfactor: 2\ninput:\ntype: streaming\nargs:\ninput_topic: geniusrise-stream\nkafka_servers: \"localhost:9092\"\noutput:\ntype: batch\nargs:\nbucket: geniusrise-test\nfolder: output\nstate:\ntype: postgres\nargs:\npostgres_host: \"localhost\"\npostgres_port: 5432\npostgres_user: \"postgres\"\npostgres_password: \"password\"\npostgres_database: \"geniusrise\"\npostgres_table: \"state_table\"\ndeploy:\ntype: ecs\nargs:\nname: postgresbolt\naccount_id: \"123456789012\"\ncluster: \"geniusrise-cluster\"\nsubnet_ids: [\"subnet-abc123\", \"subnet-def456\"]\nsecurity_group_ids: [\"sg-abc123\"]\nlog_group: \"geniusrise-logs\"\nimage: \"geniusrise/geniusrise-core\"\n
    "},{"location":"guides/yaml/#example-4-spout-with-s3-state-and-lambda-deployment","title":"Example 4: Spout with S3 State and Lambda Deployment","text":"
    version: 1\nspouts:\nS3Spout:\nname: S3Spout\nmethod: listen\nargs:\ns3_bucket: geniusrise-data\ns3_prefix: input/\noutput:\ntype: streaming\nargs:\noutput_topic: geniusrise-s3-stream\nkafka_servers: \"localhost:9092\"\nstate:\ntype: s3\nargs:\nstate_bucket: geniusrise-state\nstate_prefix: s3spout/\ndeploy:\ntype: lambda\nargs:\nfunction_name: S3SpoutFunction\nrole_arn: arn:aws:iam::123456789012:role/execution_role\nruntime: python3.8\nhandler: s3spout.handler\n
    "},{"location":"guides/yaml/#example-5-bolt-with-dynamodb-state-and-fargate-deployment","title":"Example 5: Bolt with DynamoDB State and Fargate Deployment","text":"
    version: 1\nbolts:\nDynamoBolt:\nname: DynamoBolt\nmethod: process\nargs:\noperation: multiply\nfactor: 3\ninput:\ntype: streaming\nargs:\ninput_topic: geniusrise-s3-stream\nkafka_servers: \"localhost:9092\"\noutput:\ntype: batch\nargs:\nbucket: geniusrise-output\nfolder: dynamo/\nstate:\ntype: dynamodb\nargs:\ntable_name: DynamoStateTable\nregion: us-east-1\ndeploy:\ntype: fargate\nargs:\ncluster: geniusrise-fargate\ntask_definition: DynamoBoltTask\nlaunch_type: FARGATE\nsubnets: [\"subnet-xyz789\", \"subnet-uvw456\"]\n
    "},{"location":"guides/yaml/#example-6-spout-and-bolt-with-azure-blob-storage-and-azure-functions","title":"Example 6: Spout and Bolt with Azure Blob Storage and Azure Functions","text":"
    version: 1\nspouts:\nAzureBlobSpout:\nname: AzureBlobSpout\nmethod: listen\nargs:\ncontainer_name: geniusrise-input\nstorage_account: geniusriseaccount\nstorage_key: \"your_storage_key_here\"\noutput:\ntype: streaming\nargs:\noutput_topic: geniusrise-azure-stream\nkafka_servers: \"localhost:9092\"\nstate:\ntype: azure_blob\nargs:\ncontainer_name: geniusrise-state\nstorage_account: geniusriseaccount\nstorage_key: \"your_storage_key_here\"\ndeploy:\ntype: azure_function\nargs:\nfunction_name: AzureBlobSpoutFunction\nresource_group: geniusrise-rg\nstorage_account: geniusriseaccount\nplan: Consumption\nbolts:\nAzureBlobBolt:\nname: AzureBlobBolt\nmethod: process\nargs:\noperation: add\nvalue: 5\ninput:\ntype: streaming\nargs:\ninput_topic: geniusrise-azure-stream\nkafka_servers: \"localhost:9092\"\noutput:\ntype: azure_blob\nargs:\ncontainer_name: geniusrise-output\nstorage_account: geniusriseaccount\nstorage_key: \"your_storage_key_here\"\nstate:\ntype: azure_blob\nargs:\ncontainer_name: geniusrise-state\nstorage_account: geniusriseaccount\nstorage_key: \"your_storage_key_here\"\ndeploy:\ntype: azure_function\nargs:\nfunction_name: AzureBlobBoltFunction\nresource_group: geniusrise-rg\nstorage_account: geniusriseaccount\nplan: Consumption\n
    "},{"location":"guides/yaml/#running-and-deploying-yaml-files","title":"Running and Deploying YAML Files","text":"

    To run the YAML file:

    genius rise\n

    To deploy the YAML file:

    genius rise up\n
    "},{"location":"guides/yaml/#managing-kubernetes-deployments","title":"Managing Kubernetes Deployments","text":"

    You can manage Kubernetes deployments using the genius CLI. Here are some example commands:

    # Show pods in a namespace\ngenius pod show --namespace geniusrise --context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise\n\n# Scale a deployment\ngenius pod scale --namespace geniusrise --context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise --name testspout --replicas 3\n# Delete a deployment\ngenius pod delete --namespace geniusrise --context_name arn:aws:eks:us-east-1:genius-dev:cluster/geniusrise --name testspout\n
    "},{"location":"guides/yaml/#managing-ecs-deployments","title":"Managing ECS Deployments","text":"

    You can manage ECS deployments using the genius CLI. Here are some example commands:

    # Show tasks in a cluster\ngenius ecs show --cluster geniusrise-cluster --account_id 123456789012\n# Scale a service\ngenius ecs scale --cluster geniusrise-cluster --account_id 123456789012 --name postgresbolt --desired_count 3\n# Delete a service\ngenius ecs delete --cluster geniusrise-cluster --account_id 123456789012 --name postgresbolt\n
    "},{"location":"listeners/activemq/","title":"ActiveMQ","text":"

    Spout for ActiveMQ

    Bases: Spout

    "},{"location":"listeners/activemq/#activemq.ActiveMQ.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the ActiveMQ class.

    Parameters:

    Name Type Description Default output StreamingOutput

    An instance of the StreamingOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"listeners/activemq/#activemq.ActiveMQ.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius ActiveMQ rise \\\nstreaming \\\n--output_kafka_topic activemq_test \\\n--output_kafka_cluster_connection_string localhost:9094 \\\nnone \\\nlisten \\\n--args host=localhost port=61613 destination=my_queue\n
    "},{"location":"listeners/activemq/#activemq.ActiveMQ.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_activemq_spout:\nname: \"ActiveMQ\"\nmethod: \"listen\"\nargs:\nhost: \"localhost\"\nport: 61613\ndestination: \"my_queue\"\noutput:\ntype: \"streaming\"\nargs:\noutput_topic: \"activemq_test\"\nkafka_servers: \"localhost:9094\"\n
    "},{"location":"listeners/activemq/#activemq.ActiveMQ.listen","title":"listen(host, port, destination, username=None, password=None)","text":"

    \ud83d\udcd6 Start listening for data from the ActiveMQ server.

    Parameters:

    Name Type Description Default host str

    The ActiveMQ server host.

    required port int

    The ActiveMQ server port.

    required destination str

    The ActiveMQ destination (queue or topic).

    required username Optional[str]

    The username for authentication. Defaults to None.

    None password Optional[str]

    The password for authentication. Defaults to None.

    None

    Raises:

    Type Description Exception

    If unable to connect to the ActiveMQ server.

    "},{"location":"listeners/amqp/","title":"ActiveMQ","text":"

    Spout for AMQP

    Bases: Spout

    "},{"location":"listeners/amqp/#amqp.RabbitMQ.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the RabbitMQ class.

    Parameters:

    Name Type Description Default output StreamingOutput

    An instance of the StreamingOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"listeners/amqp/#amqp.RabbitMQ.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius RabbitMQ rise \\\nstreaming \\\n--output_kafka_topic rabbitmq_test \\\n--output_kafka_cluster_connection_string localhost:9094 \\\nnone \\\nlisten \\\n--args queue_name=my_queue host=localhost\n
    "},{"location":"listeners/amqp/#amqp.RabbitMQ.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_rabbitmq_spout:\nname: \"RabbitMQ\"\nmethod: \"listen\"\nargs:\nqueue_name: \"my_queue\"\nhost: \"localhost\"\noutput:\ntype: \"streaming\"\nargs:\noutput_topic: \"rabbitmq_test\"\nkafka_servers: \"localhost:9094\"\n
    "},{"location":"listeners/amqp/#amqp.RabbitMQ.listen","title":"listen(queue_name, host='localhost', username=None, password=None)","text":"

    \ud83d\udcd6 Start listening for data from the RabbitMQ server.

    Parameters:

    Name Type Description Default queue_name str

    The RabbitMQ queue name to listen to.

    required host str

    The RabbitMQ server host. Defaults to \"localhost\".

    'localhost' username Optional[str]

    The username for authentication. Defaults to None.

    None password Optional[str]

    The password for authentication. Defaults to None.

    None

    Raises:

    Type Description Exception

    If unable to connect to the RabbitMQ server.

    "},{"location":"listeners/grpc/","title":"GRPC","text":"

    Spout for gRPC

    Bases: Spout

    "},{"location":"listeners/grpc/#grpc.Grpc.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Grpc class.

    Parameters:

    Name Type Description Default output StreamingOutput

    An instance of the StreamingOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"listeners/grpc/#grpc.Grpc.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Grpc rise \\\nstreaming \\\n--output_kafka_topic grpc_test \\\n--output_kafka_cluster_connection_string localhost:9094 \\\nnone \\\nlisten \\\n--args server_address=localhost:50051 request_data=my_request syntax=proto3\n
    "},{"location":"listeners/grpc/#grpc.Grpc.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_grpc_spout:\nname: \"Grpc\"\nmethod: \"listen\"\nargs:\nserver_address: \"localhost:50051\"\nrequest_data: \"my_request\"\nsyntax: \"proto3\"\noutput:\ntype: \"streaming\"\nargs:\noutput_topic: \"grpc_test\"\nkafka_servers: \"localhost:9094\"\n
    "},{"location":"listeners/grpc/#grpc.Grpc.listen","title":"listen(server_address, request_data, syntax, certificate=None, client_key=None, client_cert=None)","text":"

    \ud83d\udcd6 Start listening for data from the gRPC server.

    Parameters:

    Name Type Description Default server_address str

    The address of the gRPC server.

    required request_data str

    Data to send in the request.

    required syntax str

    The syntax to be used (e.g., \"proto3\").

    required certificate Optional[str]

    Optional server certificate for SSL/TLS.

    None client_key Optional[str]

    Optional client key for SSL/TLS.

    None client_cert Optional[str]

    Optional client certificate for SSL/TLS.

    None

    Raises:

    Type Description grpc.RpcError

    If there is an error while processing gRPC messages.

    "},{"location":"listeners/http_polling/","title":"HTTP polling","text":"

    Spout for HTTP polling

    Bases: Spout

    "},{"location":"listeners/http_polling/#http_polling.RESTAPIPoll.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the RESTAPIPoll class.

    Parameters:

    Name Type Description Default output StreamingOutput

    An instance of the StreamingOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"listeners/http_polling/#http_polling.RESTAPIPoll.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius RESTAPIPoll rise \\\nstreaming \\\n--output_kafka_topic restapi_test \\\n--output_kafka_cluster_connection_string localhost:9094 \\\nnone \\\nlisten \\\n--args url=https://api.example.com method=GET interval=60\n
    "},{"location":"listeners/http_polling/#http_polling.RESTAPIPoll.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_restapi_poll:\nname: \"RESTAPIPoll\"\nmethod: \"listen\"\nargs:\nurl: \"https://api.example.com\"\nmethod: \"GET\"\ninterval: 60\noutput:\ntype: \"streaming\"\nargs:\noutput_topic: \"restapi_test\"\nkafka_servers: \"localhost:9094\"\n
    "},{"location":"listeners/http_polling/#http_polling.RESTAPIPoll.listen","title":"listen(url, method, interval=60, body=None, headers=None, params=None)","text":"

    Start polling the REST API for data.

    Parameters:

    Name Type Description Default url str

    The API endpoint.

    required method str

    The HTTP method (GET, POST, etc.).

    required interval int

    The polling interval in seconds. Defaults to 60.

    60 body Optional[Dict]

    The request body. Defaults to None.

    None headers Optional[Dict[str, str]]

    The request headers. Defaults to None.

    None params Optional[Dict[str, str]]

    The request query parameters. Defaults to None.

    None"},{"location":"listeners/http_polling/#http_polling.RESTAPIPoll.poll_api","title":"poll_api(url, method, body=None, headers=None, params=None)","text":"

    \ud83d\udcd6 Start polling the REST API for data.

    Parameters:

    Name Type Description Default url str

    The API endpoint.

    required method str

    The HTTP method (GET, POST, etc.).

    required interval int

    The polling interval in seconds.

    required body Optional[Dict]

    The request body. Defaults to None.

    None headers Optional[Dict[str, str]]

    The request headers. Defaults to None.

    None params Optional[Dict[str, str]]

    The request query parameters. Defaults to None.

    None

    Raises:

    Type Description Exception

    If unable to connect to the REST API server.

    "},{"location":"listeners/kafka/","title":"Kafka","text":"

    Spout for Kafka

    Bases: Spout

    "},{"location":"listeners/kafka/#kafka.Kafka.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Kafka class.

    Parameters:

    Name Type Description Default output StreamingOutput

    An instance of the StreamingOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"listeners/kafka/#kafka.Kafka.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Kafka rise \\\nstreaming \\\n--output_kafka_topic kafka_test \\\n--output_kafka_cluster_connection_string localhost:9094 \\\nnone \\\nlisten \\\n--args topic=my_topic group_id=my_group\n
    "},{"location":"listeners/kafka/#kafka.Kafka.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_kafka_spout:\nname: \"Kafka\"\nmethod: \"listen\"\nargs:\ntopic: \"my_topic\"\ngroup_id: \"my_group\"\noutput:\ntype: \"streaming\"\nargs:\noutput_topic: \"kafka_test\"\nkafka_servers: \"localhost:9094\"\n
    "},{"location":"listeners/kafka/#kafka.Kafka.listen","title":"listen(topic, group_id, bootstrap_servers='localhost:9092', username=None, password=None)","text":"

    \ud83d\udcd6 Start listening for data from the Kafka topic.

    Parameters:

    Name Type Description Default topic str

    The Kafka topic to listen to.

    required group_id str

    The Kafka consumer group ID.

    required bootstrap_servers str

    The Kafka bootstrap servers. Defaults to \"localhost:9092\".

    'localhost:9092' username Optional[str]

    The username for SASL/PLAIN authentication. Defaults to None.

    None password Optional[str]

    The password for SASL/PLAIN authentication. Defaults to None.

    None

    Raises:

    Type Description Exception

    If unable to connect to the Kafka server.

    "},{"location":"listeners/kinesis/","title":"Kinesis","text":"

    Spout for Kinesis

    Bases: Spout

    "},{"location":"listeners/kinesis/#kinesis.Kinesis.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Kinesis class.

    Parameters:

    Name Type Description Default output StreamingOutput

    An instance of the StreamingOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"listeners/kinesis/#kinesis.Kinesis.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Kinesis rise \\\nstreaming \\\n--output_kafka_topic kinesis_test \\\n--output_kafka_cluster_connection_string localhost:9094 \\\nnone \\\nlisten \\\n--args stream_name=my_stream shard_id=shardId-000000000000\n
    "},{"location":"listeners/kinesis/#kinesis.Kinesis.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_kinesis_spout:\nname: \"Kinesis\"\nmethod: \"listen\"\nargs:\nstream_name: \"my_stream\"\nshard_id: \"shardId-000000000000\"\noutput:\ntype: \"streaming\"\nargs:\noutput_topic: \"kinesis_test\"\nkafka_servers: \"localhost:9094\"\n
    "},{"location":"listeners/kinesis/#kinesis.Kinesis.listen","title":"listen(stream_name, shard_id='shardId-000000000000', region_name=None, aws_access_key_id=None, aws_secret_access_key=None)","text":"

    \ud83d\udcd6 Start listening for data from the Kinesis stream.

    Parameters:

    Name Type Description Default stream_name str

    The name of the Kinesis stream.

    required shard_id str

    The shard ID to read from. Defaults to \"shardId-000000000000\".

    'shardId-000000000000' region_name str

    The AWS region name.

    None aws_access_key_id str

    AWS access key ID for authentication.

    None aws_secret_access_key str

    AWS secret access key for authentication.

    None

    Raises:

    Type Description Exception

    If there is an error while processing Kinesis records.

    "},{"location":"listeners/mqtt/","title":"MQTT","text":"

    Spout for MQTT

    Bases: Spout

    "},{"location":"listeners/mqtt/#mqtt.MQTT.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the MQTT class.

    Parameters:

    Name Type Description Default output StreamingOutput

    An instance of the StreamingOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"listeners/mqtt/#mqtt.MQTT.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius MQTT rise \\\nstreaming \\\n--output_kafka_topic mqtt_test \\\n--output_kafka_cluster_connection_string localhost:9094 \\\nnone \\\nlisten \\\n--args host=localhost port=1883 topic=my_topic\n
    "},{"location":"listeners/mqtt/#mqtt.MQTT.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_mqtt_spout:\nname: \"MQTT\"\nmethod: \"listen\"\nargs:\nhost: \"localhost\"\nport: 1883\ntopic: \"my_topic\"\noutput:\ntype: \"streaming\"\nargs:\noutput_topic: \"mqtt_test\"\nkafka_servers: \"localhost:9094\"\n
    "},{"location":"listeners/mqtt/#mqtt.MQTT.listen","title":"listen(host='localhost', port=1883, topic='#', username=None, password=None)","text":"

    Start listening for data from the MQTT broker.

    Parameters:

    Name Type Description Default host str

    The MQTT broker host. Defaults to \"localhost\".

    'localhost' port int

    The MQTT broker port. Defaults to 1883.

    1883 topic str

    The MQTT topic to subscribe to. Defaults to \"#\".

    '#' username Optional[str]

    The username for authentication. Defaults to None.

    None password Optional[str]

    The password for authentication. Defaults to None.

    None"},{"location":"listeners/quic/","title":"Quic","text":"

    Spout for Quic

    Bases: Spout

    "},{"location":"listeners/quic/#quic.Quic.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Quic class.

    Parameters:

    Name Type Description Default output StreamingOutput

    An instance of the StreamingOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"listeners/quic/#quic.Quic.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Quic rise \\\nstreaming \\\n--output_kafka_topic quic_test \\\n--output_kafka_cluster_connection_string localhost:9094 \\\nnone \\\nlisten \\\n--args cert_path=/path/to/cert.pem key_path=/path/to/key.pem host=localhost port=4433\n
    "},{"location":"listeners/quic/#quic.Quic.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_quic_spout:\nname: \"Quic\"\nmethod: \"listen\"\nargs:\ncert_path: \"/path/to/cert.pem\"\nkey_path: \"/path/to/key.pem\"\nhost: \"localhost\"\nport: 4433\noutput:\ntype: \"streaming\"\nargs:\noutput_topic: \"quic_test\"\nkafka_servers: \"localhost:9094\"\n
    "},{"location":"listeners/quic/#quic.Quic.handle_stream_data","title":"handle_stream_data(data, stream_id) async","text":"

    Handle incoming stream data.

    :param data: The incoming data. :param stream_id: The ID of the stream.

    "},{"location":"listeners/quic/#quic.Quic.listen","title":"listen(cert_path, key_path, host='localhost', port=4433)","text":"

    \ud83d\udcd6 Start listening for data from the QUIC server.

    Parameters:

    Name Type Description Default cert_path str

    Path to the certificate file.

    required key_path str

    Path to the private key file.

    required host str

    Hostname to listen on. Defaults to \"localhost\".

    'localhost' port int

    Port to listen on. Defaults to 4433.

    4433

    Raises:

    Type Description Exception

    If unable to start the QUIC server.

    "},{"location":"listeners/redis_pubsub/","title":"Redis pubsub","text":"

    Spout for Redis pubsub

    Bases: Spout

    "},{"location":"listeners/redis_pubsub/#redis_pubsub.RedisPubSub.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the RedisPubSub class.

    Parameters:

    Name Type Description Default output StreamingOutput

    An instance of the StreamingOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"listeners/redis_pubsub/#redis_pubsub.RedisPubSub.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius RedisPubSub rise \\\nstreaming \\\n--output_kafka_topic redis_test \\\n--output_kafka_cluster_connection_string localhost:9094 \\\nnone \\\nlisten \\\n--args channel=my_channel host=localhost port=6379 db=0\n
    "},{"location":"listeners/redis_pubsub/#redis_pubsub.RedisPubSub.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_redis_spout:\nname: \"RedisPubSub\"\nmethod: \"listen\"\nargs:\nchannel: \"my_channel\"\nhost: \"localhost\"\nport: 6379\ndb: 0\noutput:\ntype: \"streaming\"\nargs:\noutput_topic: \"redis_test\"\nkafka_servers: \"localhost:9094\"\n
    "},{"location":"listeners/redis_pubsub/#redis_pubsub.RedisPubSub.listen","title":"listen(channel, host='localhost', port=6379, db=0, password=None)","text":"

    \ud83d\udcd6 Start listening for data from the Redis Pub/Sub channel.

    Parameters:

    Name Type Description Default channel str

    The Redis Pub/Sub channel to listen to.

    required host str

    The Redis server host. Defaults to \"localhost\".

    'localhost' port int

    The Redis server port. Defaults to 6379.

    6379 db int

    The Redis database index. Defaults to 0.

    0 password Optional[str]

    The password for authentication. Defaults to None.

    None

    Raises:

    Type Description Exception

    If unable to connect to the Redis server.

    "},{"location":"listeners/redis_streams/","title":"Redis streams","text":"

    Spout for Redis streams

    Bases: Spout

    "},{"location":"listeners/redis_streams/#redis_streams.RedisStream.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the RedisStream class.

    Parameters:

    Name Type Description Default output StreamingOutput

    An instance of the StreamingOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"listeners/redis_streams/#redis_streams.RedisStream.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius RedisStream rise \\\nstreaming \\\n--output_kafka_topic redis_stream_test \\\n--output_kafka_cluster_connection_string localhost:9094 \\\nnone \\\nlisten \\\n--args stream_key=my_stream host=localhost port=6379 db=0\n
    "},{"location":"listeners/redis_streams/#redis_streams.RedisStream.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_redis_stream:\nname: \"RedisStream\"\nmethod: \"listen\"\nargs:\nstream_key: \"my_stream\"\nhost: \"localhost\"\nport: 6379\ndb: 0\noutput:\ntype: \"streaming\"\nargs:\noutput_topic: \"redis_stream_test\"\nkafka_servers: \"localhost:9094\"\n
    "},{"location":"listeners/redis_streams/#redis_streams.RedisStream.listen","title":"listen(stream_key, host='localhost', port=6379, db=0, password=None)","text":"

    \ud83d\udcd6 Start the asyncio event loop to listen for data from the Redis stream.

    Parameters:

    Name Type Description Default stream_key str

    The Redis stream key to listen to.

    required host str

    The Redis server host. Defaults to \"localhost\".

    'localhost' port int

    The Redis server port. Defaults to 6379.

    6379 db int

    The Redis database index. Defaults to 0.

    0 password Optional[str]

    The password for authentication. Defaults to None.

    None"},{"location":"listeners/sns/","title":"SNS","text":"

    Spout for SNS

    Bases: Spout

    "},{"location":"listeners/sns/#sns.SNS.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the SNS class.

    Parameters:

    Name Type Description Default output StreamingOutput

    An instance of the StreamingOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"listeners/sns/#sns.SNS.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius SNS rise \\\nstreaming \\\n--output_kafka_topic sns_test \\\n--output_kafka_cluster_connection_string localhost:9094 \\\nnone \\\nlisten\n
    "},{"location":"listeners/sns/#sns.SNS.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_sns_spout:\nname: \"SNS\"\nmethod: \"listen\"\noutput:\ntype: \"streaming\"\nargs:\noutput_topic: \"sns_test\"\nkafka_servers: \"localhost:9094\"\n
    "},{"location":"listeners/sns/#sns.SNS.listen","title":"listen()","text":"

    \ud83d\udcd6 Start the asyncio event loop to listen for data from AWS SNS.

    "},{"location":"listeners/socket.io/","title":"Socket.io","text":"

    Spout for socket.io

    Bases: Spout

    "},{"location":"listeners/socket.io/#socketio.SocketIo.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the SocketIo class.

    Parameters:

    Name Type Description Default output StreamingOutput

    An instance of the StreamingOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"listeners/socket.io/#socketio.SocketIo.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius SocketIo rise \\\nstreaming \\\n--output_kafka_topic socketio_test \\\n--output_kafka_cluster_connection_string localhost:9094 \\\nnone \\\nlisten \\\n--args url=http://localhost:3000 namespace=/chat\n
    "},{"location":"listeners/socket.io/#socketio.SocketIo.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_socketio_spout:\nname: \"SocketIo\"\nmethod: \"listen\"\nargs:\nurl: \"http://localhost:3000\"\nnamespace: \"/chat\"\noutput:\ntype: \"streaming\"\nargs:\noutput_topic: \"socketio_test\"\nkafka_servers: \"localhost:9094\"\n
    "},{"location":"listeners/socket.io/#socketio.SocketIo.listen","title":"listen(url, namespace=None, event='message', auth=None)","text":"

    \ud83d\udcd6 Start listening for data from the Socket.io server.

    Parameters:

    Name Type Description Default url str

    The Socket.io server URL.

    required namespace Optional[str]

    The Socket.io namespace. Defaults to None.

    None event str

    The Socket.io event to listen to. Defaults to \"message\".

    'message' auth Optional[dict]

    Authentication dictionary. Defaults to None.

    None

    Raises:

    Type Description Exception

    If unable to connect to the Socket.io server.

    "},{"location":"listeners/sqs/","title":"SQS","text":"

    Spout for SQS

    Bases: Spout

    "},{"location":"listeners/sqs/#sqs.SQS.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the SQS class.

    Parameters:

    Name Type Description Default output StreamingOutput

    An instance of the StreamingOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"listeners/sqs/#sqs.SQS.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius SQS rise \\\nstreaming \\\n--output_kafka_topic sqs_test \\\n--output_kafka_cluster_connection_string localhost:9094 \\\nnone \\\nlisten \\\n--args queue_url=https://sqs.us-east-1.amazonaws.com/123456789012/my-queue batch_size=10 batch_interval=10\n
    "},{"location":"listeners/sqs/#sqs.SQS.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_sqs_spout:\nname: \"SQS\"\nmethod: \"listen\"\nargs:\nqueue_url: \"https://sqs.us-east-1.amazonaws.com/123456789012/my-queue\"\nbatch_size: 10\nbatch_interval: 10\noutput:\ntype: \"streaming\"\nargs:\noutput_topic: \"sqs_test\"\nkafka_servers: \"localhost:9094\"\n
    "},{"location":"listeners/sqs/#sqs.SQS.listen","title":"listen(queue_url, batch_size=10, batch_interval=10)","text":"

    \ud83d\udcd6 Start listening for new messages in the SQS queue.

    Parameters:

    Name Type Description Default queue_url str

    The URL of the SQS queue to listen to.

    required batch_size int

    The maximum number of messages to receive in each batch. Defaults to 10.

    10 batch_interval int

    The time in seconds to wait for a new message if the queue is empty. Defaults to 10.

    10

    Raises:

    Type Description Exception

    If unable to connect to the SQS service.

    "},{"location":"listeners/udp/","title":"UDP","text":"

    Spout for UDP

    Bases: Spout

    "},{"location":"listeners/udp/#udp.Udp.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Udp class.

    Parameters:

    Name Type Description Default output StreamingOutput

    An instance of the StreamingOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"listeners/udp/#udp.Udp.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Udp rise \\\nstreaming \\\n--output_kafka_topic udp_test \\\n--output_kafka_cluster_connection_string localhost:9094 \\\nnone \\\nlisten \\\n--args host=localhost port=12345\n
    "},{"location":"listeners/udp/#udp.Udp.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_udp_spout:\nname: \"Udp\"\nmethod: \"listen\"\nargs:\nhost: \"localhost\"\nport: 12345\noutput:\ntype: \"streaming\"\nargs:\noutput_topic: \"udp_test\"\nkafka_servers: \"localhost:9094\"\n
    "},{"location":"listeners/udp/#udp.Udp.listen","title":"listen(host='localhost', port=12345)","text":"

    \ud83d\udcd6 Start listening for data from the UDP server.

    Parameters:

    Name Type Description Default host str

    The UDP server host. Defaults to \"localhost\".

    'localhost' port int

    The UDP server port. Defaults to 12345.

    12345

    Raises:

    Type Description Exception

    If unable to connect to the UDP server.

    "},{"location":"listeners/webhook/","title":"Webhook","text":"

    Spout for Webhook

    Bases: Spout

    "},{"location":"listeners/webhook/#webhook.Webhook.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Webhook class.

    Parameters:

    Name Type Description Default output StreamingOutput

    An instance of the StreamingOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"listeners/webhook/#webhook.Webhook.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Webhook rise \\\nstreaming \\\n--output_kafka_topic webhook_test \\\n--output_kafka_cluster_connection_string localhost:9094 \\\nnone \\\nlisten \\\n--args endpoint=* port=3000\n
    "},{"location":"listeners/webhook/#webhook.Webhook.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_webhook_spout:\nname: \"Webhook\"\nmethod: \"listen\"\nargs:\nendpoint: \"*\"\nport: 3000\noutput:\ntype: \"streaming\"\nargs:\noutput_topic: \"webhook_test\"\nkafka_servers: \"localhost:9094\"\n
    "},{"location":"listeners/webhook/#webhook.Webhook.listen","title":"listen(endpoint='*', port=3000, username=None, password=None)","text":"

    \ud83d\udcd6 Start listening for data from the webhook.

    Parameters:

    Name Type Description Default endpoint str

    The webhook endpoint to listen to. Defaults to \"*\".

    '*' port int

    The port to listen on. Defaults to 3000.

    3000 username Optional[str]

    The username for basic authentication. Defaults to None.

    None password Optional[str]

    The password for basic authentication. Defaults to None.

    None

    Raises:

    Type Description Exception

    If unable to start the CherryPy server.

    "},{"location":"listeners/websocket/","title":"Websocket","text":"

    Spout for Websocket

    Bases: Spout

    "},{"location":"listeners/websocket/#websocket.Websocket.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the Websocket class.

    Parameters:

    Name Type Description Default output StreamingOutput

    An instance of the StreamingOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"listeners/websocket/#websocket.Websocket.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius Websocket rise \\\nstreaming \\\n--output_kafka_topic websocket_test \\\n--output_kafka_cluster_connection_string localhost:9094 \\\nnone \\\nlisten \\\n--args host=localhost port=8765\n
    "},{"location":"listeners/websocket/#websocket.Websocket.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_websocket_spout:\nname: \"Websocket\"\nmethod: \"listen\"\nargs:\nhost: \"localhost\"\nport: 8765\noutput:\ntype: \"streaming\"\nargs:\noutput_topic: \"websocket_test\"\nkafka_servers: \"localhost:9094\"\n
    "},{"location":"listeners/websocket/#websocket.Websocket.__listen","title":"__listen(host, port) async","text":"

    Start listening for data from the WebSocket server.

    "},{"location":"listeners/websocket/#websocket.Websocket.listen","title":"listen(host='localhost', port=8765)","text":"

    \ud83d\udcd6 Start the WebSocket server.

    Parameters:

    Name Type Description Default host str

    The WebSocket server host. Defaults to \"localhost\".

    'localhost' port int

    The WebSocket server port. Defaults to 8765.

    8765

    Raises:

    Type Description Exception

    If unable to start the WebSocket server.

    "},{"location":"listeners/websocket/#websocket.Websocket.receive_message","title":"receive_message(websocket, path) async","text":"

    Receive a message from a WebSocket client and save it along with metadata.

    Parameters:

    Name Type Description Default websocket

    WebSocket client connection.

    required path

    WebSocket path.

    required"},{"location":"listeners/zeromq/","title":"ZeroMQ","text":"

    Spout for ZeroMQ

    Bases: Spout

    "},{"location":"listeners/zeromq/#zeromq.ZeroMQ.__init__","title":"__init__(output, state, **kwargs)","text":"

    Initialize the ZeroMQ class.

    Parameters:

    Name Type Description Default output StreamingOutput

    An instance of the StreamingOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"listeners/zeromq/#zeromq.ZeroMQ.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius ZeroMQ rise \\\nstreaming \\\n--output_kafka_topic zmq_test \\\n--output_kafka_cluster_connection_string localhost:9094 \\\nnone \\\nlisten \\\n--args endpoint=tcp://localhost:5555 topic=my_topic syntax=json\n
    "},{"location":"listeners/zeromq/#zeromq.ZeroMQ.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nmy_zmq_spout:\nname: \"ZeroMQ\"\nmethod: \"listen\"\nargs:\nendpoint: \"tcp://localhost:5555\"\ntopic: \"my_topic\"\nsyntax: \"json\"\noutput:\ntype: \"streaming\"\nargs:\noutput_topic: \"zmq_test\"\nkafka_servers: \"localhost:9094\"\n
    "},{"location":"listeners/zeromq/#zeromq.ZeroMQ.listen","title":"listen(endpoint, topic, syntax, socket_type='SUB')","text":"

    \ud83d\udcd6 Start listening for data from the ZeroMQ server.

    Parameters:

    Name Type Description Default endpoint str

    The endpoint to connect to (e.g., \"tcp://localhost:5555\").

    required topic str

    The topic to subscribe to.

    required syntax str

    The syntax to be used (e.g., \"json\").

    required socket_type Optional[str]

    The type of ZeroMQ socket (default is \"SUB\").

    'SUB'

    Raises:

    Type Description Exception

    If unable to connect to the ZeroMQ server or process messages.

    "},{"location":"ocr/ConvertImage/","title":"Convert Images","text":"

    Bases: Bolt

    "},{"location":"ocr/ConvertImage/#geniusrise_ocr.readers.image.ConvertImage.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    The ConvertImage class is designed to convert images from one format to another. It takes an input folder containing images and an output format as arguments. The class iterates through each image file in the specified folder and converts it to the desired format. Additional options like quality and subsampling can be specified for lossy formats like 'JPG'.

    Parameters:

    Name Type Description Default input BatchInput

    An instance of the BatchInput class for reading the data.

    required output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"ocr/ConvertImage/#geniusrise_ocr.readers.image.ConvertImage.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius ConvertImage rise \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/input \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/output \\\nnone \\\nprocess \\\n--args input_folder=/path/to/image/folder output_format=PNG quality=95 subsampling=0\n
    "},{"location":"ocr/ConvertImage/#geniusrise_ocr.readers.image.ConvertImage.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nconvert_images:\nname: \"ConvertImage\"\nmethod: \"process\"\nargs:\noutput_format: \"PNG\"\nquality: 95\nsubsampling: 0\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/input\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/output\"\n
    "},{"location":"ocr/ConvertImage/#geniusrise_ocr.readers.image.ConvertImage.process","title":"process(output_format, quality=None, subsampling=0)","text":"

    \ud83d\udcd6 Convert images in the given input folder to the specified output format.

    Parameters:

    Name Type Description Default output_format str

    The format to convert images to ('PNG' or 'JPG').

    required quality Optional[int]

    The quality of the output image for lossy formats like 'JPG'. Defaults to None.

    None subsampling Optional[int]

    The subsampling factor for JPEG compression. Defaults to 0.

    0

    This method iterates through each image file in the specified folder, reads the image, and converts it to the specified output format. Additional parameters like quality and subsampling can be set for lossy formats.

    "},{"location":"ocr/FineTunePix2Struct/","title":"Fine-tune pix2struct","text":"

    Bases: Bolt

    "},{"location":"ocr/FineTunePix2Struct/#geniusrise_ocr.ocr.pix2struct.fine_tune.FineTunePix2Struct.__init__","title":"__init__(input, output, state, model_name='google/pix2struct-large', **kwargs)","text":"

    The FineTunePix2Struct class is designed to fine-tune the Pix2Struct model on a custom OCR dataset. It supports three popular OCR dataset formats: COCO, ICDAR, and SynthText.

    Parameters:

    Name Type Description Default input BatchInput

    An instance of the BatchInput class for reading the data.

    required output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required model_name str

    The name of the Pix2Struct model to use. Default is \"google/pix2struct-large\".

    'google/pix2struct-large' **kwargs

    Additional keyword arguments.

    {} Dataset Formats
    • COCO: Assumes a folder structure with an 'annotations.json' file containing image and text annotations.
    • ICDAR: Assumes a folder structure with 'Images' and 'Annotations' folders containing image files and XML annotation files respectively.
    • SynthText: Assumes a folder with image files and corresponding '.txt' files containing ground truth text.
    "},{"location":"ocr/FineTunePix2Struct/#geniusrise_ocr.ocr.pix2struct.fine_tune.FineTunePix2Struct.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius FineTunePix2Struct rise \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/input \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/output \\\nnone \\\nprocess \\\n--args epochs=3 batch_size=32 learning_rate=0.001 dataset_format=coco use_cuda=true\n
    "},{"location":"ocr/FineTunePix2Struct/#geniusrise_ocr.ocr.pix2struct.fine_tune.FineTunePix2Struct.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nfine_tune_pix2struct:\nname: \"FineTunePix2Struct\"\nmethod: \"process\"\nargs:\nepochs: 3\nbatch_size: 32\nlearning_rate: 0.001\ndataset_format: coco\nuse_cuda: true\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/input\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/output\"\n
    "},{"location":"ocr/FineTunePix2Struct/#geniusrise_ocr.ocr.pix2struct.fine_tune.FineTunePix2Struct.process","title":"process(epochs, batch_size, learning_rate, dataset_format, use_cuda=False)","text":"

    \ud83d\udcd6 Fine-tune the Pix2Struct model on a custom OCR dataset.

    Parameters:

    Name Type Description Default epochs int

    Number of training epochs.

    required batch_size int

    Batch size for training.

    required learning_rate float

    Learning rate for the optimizer.

    required dataset_format str

    Format of the OCR dataset. Supported formats are \"coco\", \"icdar\", and \"synthtext\".

    required use_cuda bool

    Whether to use CUDA for training. Default is False.

    False

    This method fine-tunes the Pix2Struct model using the images and annotations in the dataset specified by dataset_format. The fine-tuned model is saved to the specified output path.

    "},{"location":"ocr/FineTuneTROCR/","title":"OCR API using trocr","text":"

    Bases: Bolt

    "},{"location":"ocr/FineTuneTROCR/#geniusrise_ocr.ocr.trocr.fine_tune.FineTuneTROCR.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    The FineTuneTROCR class is designed to fine-tune the TROCR model on a custom OCR dataset. It supports three popular OCR dataset formats: COCO, ICDAR, and SynthText.

    Parameters:

    Name Type Description Default input BatchInput

    An instance of the BatchInput class for reading the data.

    required output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {} Dataset Formats
    • COCO: Assumes a folder structure with an 'annotations.json' file containing image and text annotations.
    • ICDAR: Assumes a folder structure with 'Images' and 'Annotations' folders containing image files and XML annotation files respectively.
    • SynthText: Assumes a folder with image files and corresponding '.txt' files containing ground truth text.
    "},{"location":"ocr/FineTuneTROCR/#geniusrise_ocr.ocr.trocr.fine_tune.FineTuneTROCR.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius FineTuneTROCR rise \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/input \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/output \\\nnone \\\nprocess \\\n--args epochs=3 batch_size=32 learning_rate=0.001 dataset_format=coco use_cuda=true\n
    "},{"location":"ocr/FineTuneTROCR/#geniusrise_ocr.ocr.trocr.fine_tune.FineTuneTROCR.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nfine_tune_trocr:\nname: \"FineTuneTROCR\"\nmethod: \"process\"\nargs:\nepochs: 3\nbatch_size: 32\nlearning_rate: 0.001\ndataset_format: coco\nuse_cuda: true\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/input\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/output\"\n
    "},{"location":"ocr/FineTuneTROCR/#geniusrise_ocr.ocr.trocr.fine_tune.FineTuneTROCR.process","title":"process(epochs, batch_size, learning_rate, dataset_format, use_cuda=False)","text":"

    \ud83d\udcd6 Fine-tune the TROCR model on a custom OCR dataset.

    Parameters:

    Name Type Description Default epochs int

    Number of training epochs.

    required batch_size int

    Batch size for training.

    required learning_rate float

    Learning rate for the optimizer.

    required dataset_format str

    Format of the OCR dataset. Supported formats are \"coco\", \"icdar\", and \"synthtext\".

    required use_cuda bool

    Whether to use CUDA for training. Default is False.

    False

    This method fine-tunes the TROCR model using the images and annotations in the dataset specified by dataset_format. The fine-tuned model is saved to the specified output path.

    "},{"location":"ocr/ImageClassPredictor/","title":"Predict image classes","text":"

    Bases: Bolt

    "},{"location":"ocr/ImageClassPredictor/#geniusrise_ocr.classification.predict.ImageClassPredictor.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    The ImageClassPredictor class classifies images using a pre-trained PyTorch model. It assumes that the input.input_folder contains sub-folders of images to be classified. The classified images are saved in output.output_folder, organized by their predicted labels.

    Parameters:

    Name Type Description Default input BatchInput

    Instance of BatchInput for reading data.

    required output BatchOutput

    Instance of BatchOutput for saving data.

    required state State

    Instance of State for maintaining state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"ocr/ImageClassPredictor/#geniusrise_ocr.classification.predict.ImageClassPredictor.__init__--command-line-invocation-with-geniusrise","title":"Command Line Invocation with geniusrise","text":"
    genius ImageClassPredictor rise \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/input \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/output \\\nnone \\\npredict \\\n--args classes='{\"0\": \"cat\", \"1\": \"dog\"}' model_path=/path/to/model.pth\n
    "},{"location":"ocr/ImageClassPredictor/#geniusrise_ocr.classification.predict.ImageClassPredictor.__init__--yaml-configuration-with-geniusrise","title":"YAML Configuration with geniusrise","text":"
    version: \"1\"\nspouts:\nimage_classification:\nname: \"ImageClassPredictor\"\nmethod: \"predict\"\nargs:\nclasses: '{\"0\": \"cat\", \"1\": \"dog\"}'\nmodel_path: \"/path/to/model.pth\"\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/input\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/output\"\n
    "},{"location":"ocr/ImageClassPredictor/#geniusrise_ocr.classification.predict.ImageClassPredictor.get_label","title":"get_label(class_idx)","text":"

    \ud83d\udcd6 Get the label corresponding to the class index.

    Parameters:

    Name Type Description Default class_idx int

    The class index.

    required

    Returns:

    Name Type Description str str

    The label corresponding to the class index.

    This method returns the label that corresponds to a given class index based on the classes dictionary.

    "},{"location":"ocr/ImageClassPredictor/#geniusrise_ocr.classification.predict.ImageClassPredictor.predict","title":"predict(classes, model_path, use_cuda=False)","text":"

    \ud83d\udcd6 Classify images in the input sub-folders using a pre-trained PyTorch model.

    Parameters:

    Name Type Description Default classes str

    JSON string mapping class indices to labels.

    required model_path str

    Path to the pre-trained PyTorch model.

    required use_cuda bool

    Whether to use CUDA for model inference. Default is False.

    False

    This method iterates through each image file in the specified sub-folders, applies the model, and classifies the image. The classified images are then saved in an output folder, organized by their predicted labels.

    "},{"location":"ocr/ParseCBZCBR/","title":"Parse CBZCBR files","text":"

    Bases: Bolt

    "},{"location":"ocr/ParseCBZCBR/#geniusrise_ocr.readers.cbz_cbr.ParseCBZCBR.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    The ParseCBZCBR class is designed to process CBZ and CBR files, which are commonly used for comic books. It takes an input folder containing CBZ/CBR files as an argument and iterates through each file. For each file, it extracts the images and saves them in a designated output folder.

    Parameters:

    Name Type Description Default input BatchInput

    An instance of the BatchInput class for reading the data.

    required output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"ocr/ParseCBZCBR/#geniusrise_ocr.readers.cbz_cbr.ParseCBZCBR.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius ParseCBZCBR rise \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/input \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/output \\\nnone \\\nprocess\n
    "},{"location":"ocr/ParseCBZCBR/#geniusrise_ocr.readers.cbz_cbr.ParseCBZCBR.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nparse_cbzcbr:\nname: \"ParseCBZCBR\"\nmethod: \"process\"\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/input\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/output\"\n
    "},{"location":"ocr/ParseCBZCBR/#geniusrise_ocr.readers.cbz_cbr.ParseCBZCBR.process","title":"process(input_folder=None)","text":"

    \ud83d\udcd6 Process CBZ and CBR files in the given input folder and extract images.

    Parameters:

    Name Type Description Default input_folder str

    The folder containing CBZ/CBR files to process.

    None

    This method iterates through each CBZ/CBR file in the specified folder and extracts the images.

    "},{"location":"ocr/ParseDjvu/","title":"Parse Djvu files","text":"

    Bases: Bolt

    "},{"location":"ocr/ParseDjvu/#geniusrise_ocr.readers.djvu.ParseDjvu.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    The ParseDjvu class is designed to process DJVU files and classify them as either text-based or image-based. It takes an input folder containing DJVU files as an argument and iterates through each file. For each DJVU, it samples a few pages to determine the type of content it primarily contains. If the DJVU is text-based, the class extracts the text from each page and saves it as a JSON file. If the DJVU is image-based, it converts each page to a PNG image and saves them in a designated output folder.

    Parameters:

    Name Type Description Default input BatchInput

    An instance of the BatchInput class for reading the data.

    required output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"ocr/ParseDjvu/#geniusrise_ocr.readers.djvu.ParseDjvu.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius ParseDjvu rise \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/input \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/output \\\nnone \\\nprocess\n
    "},{"location":"ocr/ParseDjvu/#geniusrise_ocr.readers.djvu.ParseDjvu.process","title":"process(input_folder=None)","text":"

    \ud83d\udcd6 Process DJVU files in the given input folder and classify them as text-based or image-based.

    Parameters:

    Name Type Description Default input_folder str

    The folder containing DJVU files to process.

    None

    This method iterates through each DJVU file in the specified folder, reads a sample of pages, and determines whether the DJVU is text-based or image-based. It then delegates further processing to _process_text_djvu or _process_image_djvu based on this determination.

    "},{"location":"ocr/ParseEpub/","title":"Parse Epub files","text":"

    Bases: Bolt

    "},{"location":"ocr/ParseEpub/#geniusrise_ocr.readers.epub.ParseEpub.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    The ParseEpub class is designed to process EPUB files and classify them as either text-based or image-based. It takes an input folder containing EPUB files as an argument and iterates through each file. For each EPUB, it samples a few items to determine the type of content it primarily contains. If the EPUB is text-based, the class extracts the text from each item and saves it as a JSON file. If the EPUB is image-based, it saves the images in a designated output folder.

    Parameters:

    Name Type Description Default input BatchInput

    An instance of the BatchInput class for reading the data.

    required output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"ocr/ParseEpub/#geniusrise_ocr.readers.epub.ParseEpub.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius ParseEpub rise \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/input \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/output \\\nnone \\\nprocess\n
    "},{"location":"ocr/ParseEpub/#geniusrise_ocr.readers.epub.ParseEpub.process","title":"process(input_folder=None)","text":"

    \ud83d\udcd6 Process EPUB files in the given input folder and classify them as text-based or image-based.

    Parameters:

    Name Type Description Default input_folder str

    The folder containing EPUB files to process.

    None

    This method iterates through each EPUB file in the specified folder, reads a sample of items, and determines whether the EPUB is text-based or image-based. It then delegates further processing to _process_text_epub or _process_image_epub based on this determination.

    "},{"location":"ocr/ParseMOBI/","title":"Parse MOBI files","text":"

    Bases: Bolt

    "},{"location":"ocr/ParseMOBI/#geniusrise_ocr.readers.mobi.ParseMOBI.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    The ParseMOBI class is designed to process MOBI files. It takes an input folder containing MOBI files as an argument and iterates through each file. For each file, it extracts the images and saves them in a designated output folder.

    Parameters:

    Name Type Description Default input BatchInput

    An instance of the BatchInput class for reading the data.

    required output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"ocr/ParseMOBI/#geniusrise_ocr.readers.mobi.ParseMOBI.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius ParseMOBI rise \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/input \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/output \\\nnone \\\nprocess\n
    "},{"location":"ocr/ParseMOBI/#geniusrise_ocr.readers.mobi.ParseMOBI.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nparse_mobi:\nname: \"ParseMOBI\"\nmethod: \"process\"\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/input\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/output\"\n
    "},{"location":"ocr/ParseMOBI/#geniusrise_ocr.readers.mobi.ParseMOBI.process","title":"process(input_folder=None)","text":"

    \ud83d\udcd6 Process MOBI files in the given input folder and extract images.

    Parameters:

    Name Type Description Default input_folder str

    The folder containing MOBI files to process.

    None

    This method iterates through each MOBI file in the specified folder and extracts the images.

    "},{"location":"ocr/ParsePdf/","title":"Parse PDF files","text":"

    Bases: Bolt

    "},{"location":"ocr/ParsePdf/#geniusrise_ocr.readers.pdf.ParsePdf.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    The ParsePdf class is designed to process PDF files and classify them as either text-based or image-based. It takes an input folder containing PDF files as an argument and iterates through each file. For each PDF, it samples a few pages to determine the type of content it primarily contains. If the PDF is text-based, the class extracts the text from each page and saves it as a JSON file. If the PDF is image-based, it converts each page to a PNG image and saves them in a designated output folder.

    Args:\n    input (BatchInput): An instance of the BatchInput class for reading the data.\n    output (BatchOutput): An instance of the BatchOutput class for saving the data.\n    state (State): An instance of the State class for maintaining the state.\n    **kwargs: Additional keyword arguments.\n
    "},{"location":"ocr/ParsePdf/#geniusrise_ocr.readers.pdf.ParsePdf.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius ParsePdf rise \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/input \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/output \\\nnone \\\nprocess\n
    "},{"location":"ocr/ParsePdf/#geniusrise_ocr.readers.pdf.ParsePdf.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nparse_pdfs:\nname: \"ParsePdf\"\nmethod: \"process\"\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/input\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/outupt\"\n
    "},{"location":"ocr/ParsePdf/#geniusrise_ocr.readers.pdf.ParsePdf.process","title":"process(input_folder=None)","text":"

    \ud83d\udcd6 Process PDF files in the given input folder and classify them as text-based or image-based.

    Parameters:

    Name Type Description Default input_folder str

    The folder containing PDF files to process.

    None

    This method iterates through each PDF file in the specified folder, reads a sample of pages, and determines whether the PDF is text-based or image-based. It then delegates further processing to _process_text_pdf or _process_image_pdf based on this determination.

    "},{"location":"ocr/ParsePostScript/","title":"Parse PostScript files","text":"

    Bases: Bolt

    "},{"location":"ocr/ParsePostScript/#geniusrise_ocr.readers.postscript.ParsePostScript.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    The ParsePostScript class is designed to process PostScript files and classify them as either text-based or image-based. It takes an input folder containing PostScript files as an argument and iterates through each file. For each PostScript file, it converts it to PDF and samples a few pages to determine the type of content it primarily contains. If the PostScript is text-based, the class extracts the text from each page and saves it as a JSON file. If the PostScript is image-based, it converts each page to a PNG image and saves them in a designated output folder.

    Parameters:

    Name Type Description Default input BatchInput

    An instance of the BatchInput class for reading the data.

    required output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"ocr/ParsePostScript/#geniusrise_ocr.readers.postscript.ParsePostScript.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius ParsePostScript rise \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/input \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/output \\\nnone \\\nprocess\n
    "},{"location":"ocr/ParsePostScript/#geniusrise_ocr.readers.postscript.ParsePostScript.process","title":"process(input_folder=None)","text":"

    \ud83d\udcd6 Process PostScript files in the given input folder and classify them as text-based or image-based.

    Parameters:

    Name Type Description Default input_folder str

    The folder containing PostScript files to process.

    None

    This method iterates through each PostScript file in the specified folder, converts it to PDF, reads a sample of pages, and determines whether the PostScript is text-based or image-based. It then delegates further processing to _process_text_ps or _process_image_ps based on this determination.

    "},{"location":"ocr/ParseXPS/","title":"Parse XPS files","text":"

    Bases: Bolt

    "},{"location":"ocr/ParseXPS/#geniusrise_ocr.readers.xps.ParseXPS.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    The ParseXPS class is designed to process XPS files. It takes an input folder containing XPS files as an argument and iterates through each file. For each file, it extracts the images and saves them in a designated output folder.

    Parameters:

    Name Type Description Default input BatchInput

    An instance of the BatchInput class for reading the data.

    required output BatchOutput

    An instance of the BatchOutput class for saving the data.

    required state State

    An instance of the State class for maintaining the state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"ocr/ParseXPS/#geniusrise_ocr.readers.xps.ParseXPS.__init__--using-geniusrise-to-invoke-via-command-line","title":"Using geniusrise to invoke via command line","text":"
    genius ParseXPS rise \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/input \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/output \\\nnone \\\nprocess\n
    "},{"location":"ocr/ParseXPS/#geniusrise_ocr.readers.xps.ParseXPS.__init__--using-geniusrise-to-invoke-via-yaml-file","title":"Using geniusrise to invoke via YAML file","text":"
    version: \"1\"\nspouts:\nparse_xps:\nname: \"ParseXPS\"\nmethod: \"process\"\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/input\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/output\"\n
    "},{"location":"ocr/ParseXPS/#geniusrise_ocr.readers.xps.ParseXPS.process","title":"process(input_folder=None)","text":"

    \ud83d\udcd6 Process XPS files in the given input folder and extract images.

    Parameters:

    Name Type Description Default input_folder str

    The folder containing XPS files to process.

    None

    This method iterates through each XPS file in the specified folder and extracts the images.

    "},{"location":"ocr/Pix2StructImageOCR/","title":"OCR using pix2struct","text":"

    Bases: Bolt

    "},{"location":"ocr/Pix2StructImageOCR/#geniusrise_ocr.ocr.pix2struct.bulk.Pix2StructImageOCR.__init__","title":"__init__(input, output, state, model_name='google/pix2struct-large', **kwargs)","text":"

    The Pix2StructImageOCR class performs OCR on images using Google's Pix2Struct model. It expects the input.input_folder to contain the images for OCR and saves the OCR results as JSON files in output.output_folder.

    Parameters:

    Name Type Description Default input BatchInput

    Instance of BatchInput for reading data.

    required output BatchOutput

    Instance of BatchOutput for saving data.

    required state State

    Instance of State for maintaining state.

    required model_name str

    The name of the Pix2Struct model to use. Default is \"google/pix2struct-large\".

    'google/pix2struct-large' **kwargs

    Additional keyword arguments.

    {}"},{"location":"ocr/Pix2StructImageOCR/#geniusrise_ocr.ocr.pix2struct.bulk.Pix2StructImageOCR.__init__--command-line-invocation-with-geniusrise","title":"Command Line Invocation with geniusrise","text":"
    genius Pix2StructImageOCR rise \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/input \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/output \\\nnone \\\nprocess\n
    "},{"location":"ocr/Pix2StructImageOCR/#geniusrise_ocr.ocr.pix2struct.bulk.Pix2StructImageOCR.__init__--yaml-configuration-with-geniusrise","title":"YAML Configuration with geniusrise","text":"
    version: \"1\"\nspouts:\nocr_processing:\nname: \"Pix2StructImageOCR\"\nmethod: \"process\"\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/input\"\nuse_cuda: true\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/output\"\nuse_cuda: true\n
    "},{"location":"ocr/Pix2StructImageOCR/#geniusrise_ocr.ocr.pix2struct.bulk.Pix2StructImageOCR.process","title":"process(use_cuda=True)","text":"

    \ud83d\udcd6 Perform OCR on images in the input folder and save the OCR results as JSON files in the output folder.

    Parameters:

    Name Type Description Default use_cuda bool

    Whether to use CUDA for model inference. Default is True.

    True"},{"location":"ocr/Pix2StructImageOCRAPI/","title":"OCR API using pix2struct","text":"

    Bases: Bolt

    "},{"location":"ocr/Pix2StructImageOCRAPI/#geniusrise_ocr.ocr.pix2struct.api.Pix2StructImageOCRAPI.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    The Pix2StructImageOCRAPI class performs OCR on images using Google's Pix2Struct model. The class exposes an API endpoint for OCR on single images. The endpoint is accessible at /api/v1/ocr. The API takes a POST request with a JSON payload containing a base64 encoded image under the key image_base64. It returns a JSON response containing the OCR result under the key ocr_text.

    Parameters:

    Name Type Description Default input BatchInput

    Instance of BatchInput for reading data.

    required output BatchOutput

    Instance of BatchOutput for saving data.

    required state State

    Instance of State for maintaining state.

    required model_name str

    The name of the Pix2Struct model to use. Default is \"google/pix2struct-large\".

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"ocr/Pix2StructImageOCRAPI/#geniusrise_ocr.ocr.pix2struct.api.Pix2StructImageOCRAPI.__init__--command-line-invocation-with-geniusrise","title":"Command Line Invocation with geniusrise","text":"
    genius Pix2StructImageOCRAPI rise \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/input \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/output \\\nnone \\\nlisten \\\n--args endpoint=* port=3000 cors_domain=* use_cuda=True\n
    "},{"location":"ocr/Pix2StructImageOCRAPI/#geniusrise_ocr.ocr.pix2struct.api.Pix2StructImageOCRAPI.__init__--yaml-configuration-with-geniusrise","title":"YAML Configuration with geniusrise","text":"
    version: \"1\"\nspouts:\nocr_processing:\nname: \"Pix2StructImageOCRAPI\"\nmethod: \"listen\"\nargs:\nendpoint: *\nport: 3000\ncors_domain: *\nuse_cuda: true\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/input\"\nuse_cuda: true\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/output\"\nuse_cuda: true\n
    "},{"location":"ocr/TROCRImageOCR/","title":"OCR using trocr","text":"

    Bases: Bolt

    "},{"location":"ocr/TROCRImageOCR/#geniusrise_ocr.ocr.trocr.bulk.TROCRImageOCR.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    The TROCRImageOCR class performs OCR (Optical Character Recognition) on images using Microsoft's TROCR model. It expects the input.input_folder to contain the images for OCR and saves the OCR results as JSON files in output.output_folder.

    Parameters:

    Name Type Description Default input BatchInput

    Instance of BatchInput for reading data.

    required output BatchOutput

    Instance of BatchOutput for saving data.

    required state State

    Instance of State for maintaining state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"ocr/TROCRImageOCR/#geniusrise_ocr.ocr.trocr.bulk.TROCRImageOCR.__init__--command-line-invocation-with-geniusrise","title":"Command Line Invocation with geniusrise","text":"
    genius TROCRImageOCR rise \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/input \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/output \\\nnone \\\nprocess\n
    "},{"location":"ocr/TROCRImageOCR/#geniusrise_ocr.ocr.trocr.bulk.TROCRImageOCR.__init__--yaml-configuration-with-geniusrise","title":"YAML Configuration with geniusrise","text":"
    version: \"1\"\nspouts:\nocr_processing:\nname: \"TROCRImageOCR\"\nmethod: \"process\"\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/input\"\nuse_cuda: true\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/output\"\nuse_cuda: true\n
    "},{"location":"ocr/TROCRImageOCR/#geniusrise_ocr.ocr.trocr.bulk.TROCRImageOCR.process","title":"process(kind='printed', use_cuda=True)","text":"

    \ud83d\udcd6 Perform OCR on images in the input folder and save the OCR results as JSON files in the output folder.

    This method iterates through each image file in input.input_folder, performs OCR using the TROCR model, and saves the OCR results as JSON files in output.output_folder.

    Parameters:

    Name Type Description Default kind str

    The kind of TROCR model to use. Default is \"printed\". Options are \"printed\" or \"handwritten\".

    'printed' use_cuda bool

    Whether to use CUDA for model inference. Default is True.

    True"},{"location":"ocr/TROCRImageOCRAPI/","title":"OCR API using trocr","text":"

    Bases: Bolt

    "},{"location":"ocr/TROCRImageOCRAPI/#geniusrise_ocr.ocr.trocr.api.TROCRImageOCRAPI.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    The TROCRImageOCR class performs OCR (Optical Character Recognition) on images using Microsoft's TROCR model. The class exposes an API endpoint for OCR on single images. The endpoint is accessible at /api/v1/ocr. The API takes a POST request with a JSON payload containing a base64 encoded image under the key image_base64. It returns a JSON response containing the OCR result under the key ocr_text.

    Parameters:

    Name Type Description Default input BatchInput

    Instance of BatchInput for reading data.

    required output BatchOutput

    Instance of BatchOutput for saving data.

    required state State

    Instance of State for maintaining state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"ocr/TROCRImageOCRAPI/#geniusrise_ocr.ocr.trocr.api.TROCRImageOCRAPI.__init__--command-line-invocation-with-geniusrise","title":"Command Line Invocation with geniusrise","text":"
    genius TROCRImageOCR rise \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/input \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/output \\\nnone \\\nlisten \\\n--args endpoint=* port=3000 cors_domain=* kind=handwriting use_cuda=True\n
    "},{"location":"ocr/TROCRImageOCRAPI/#geniusrise_ocr.ocr.trocr.api.TROCRImageOCRAPI.__init__--yaml-configuration-with-geniusrise","title":"YAML Configuration with geniusrise","text":"
    version: \"1\"\nspouts:\nocr_processing:\nname: \"TROCRImageOCR\"\nmethod: \"listen\"\nargs:\nendpoint: *\nport: 3000\ncors_domain: *\nkind: handwriting\nuse_cuda: true\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/input\"\nuse_cuda: true\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/output\"\nuse_cuda: true\n
    "},{"location":"ocr/TROCRImageOCRAPI/#geniusrise_ocr.ocr.trocr.api.TROCRImageOCRAPI.__init__--api-example","title":"API Example","text":"
    curl -X POST \"http://localhost:3000/api/v1/ocr\" -H \"Content-Type: application/json\" -d '{\"image_base64\": \"your_base64_encoded_image_here\"}'\n
    "},{"location":"ocr/TROCRImageOCRAPI/#geniusrise_ocr.ocr.trocr.api.TROCRImageOCRAPI.preprocess_and_detect_boxes","title":"preprocess_and_detect_boxes(image)","text":"

    Preprocess the image and detect text bounding boxes using the EAST model.

    Parameters:

    Name Type Description Default image Image.Image

    PIL Image object.

    required

    Returns:

    Type Description List[Tuple[int, int, int, int]]

    List[Tuple[int, int, int, int]]: List of bounding boxes (x, y, w, h).

    "},{"location":"ocr/TrainImageClassifier/","title":"Train image classifier","text":"

    Bases: Bolt

    "},{"location":"ocr/TrainImageClassifier/#geniusrise_ocr.classification.train.TrainImageClassifier.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    The TrainImageClassifier class trains an image classifier using a ResNet-152 model. It assumes that the input.input_folder contains sub-folders named 'train' and 'test'. Each of these sub-folders should contain class-specific folders with images. The trained model is saved as 'model.pth' in output.output_folder.

    Parameters:

    Name Type Description Default input BatchInput

    Instance of BatchInput for reading data.

    required output BatchOutput

    Instance of BatchOutput for saving data.

    required state State

    Instance of State for maintaining state.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"ocr/TrainImageClassifier/#geniusrise_ocr.classification.train.TrainImageClassifier.__init__--command-line-invocation-with-geniusrise","title":"Command Line Invocation with geniusrise","text":"
    genius TrainImageClassifier rise \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/input \\\nbatch \\\n--bucket my_bucket \\\n--s3_folder s3/output \\\nnone \\\nprocess \\\n--args num_classes=4 epochs=10 batch_size=32 learning_rate=0.001\n
    "},{"location":"ocr/TrainImageClassifier/#geniusrise_ocr.classification.train.TrainImageClassifier.__init__--yaml-configuration-with-geniusrise","title":"YAML Configuration with geniusrise","text":"
    version: \"1\"\nspouts:\nimage_training:\nname: \"TrainImageClassifier\"\nmethod: \"process\"\nargs:\nnum_classes: 4\nepochs: 10\nbatch_size: 32\nlearning_rate: 0.001\ninput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/input\"\noutput:\ntype: \"batch\"\nargs:\nbucket: \"my_bucket\"\ns3_folder: \"s3/output\"\n
    "},{"location":"ocr/TrainImageClassifier/#geniusrise_ocr.classification.train.TrainImageClassifier.process","title":"process(num_classes=4, epochs=10, batch_size=32, learning_rate=0.001, use_cuda=False)","text":"

    \ud83d\udcd6 Train an image classifier using a ResNet-152 model.

    Parameters:

    Name Type Description Default num_classes int

    Number of classes of the images.

    4 epochs int

    Number of training epochs. Default is 10.

    10 batch_size int

    Batch size for training. Default is 32.

    32 learning_rate float

    Learning rate for the optimizer. Default is 0.001.

    0.001 use_cuda bool

    Whether to use CUDA for model training. Default is False.

    False

    This method trains a ResNet-152 model using the images in the 'train' and 'test' sub-folders of input.input_folder. Each of these sub-folders should contain class-specific folders with images. The trained model is saved as 'model.pth' in output.output_folder.

    "},{"location":"text/api/base/","title":"Base Fine Tuner","text":"

    Bases: TextBulk

    A class representing a Hugging Face API for generating text using a pre-trained language model.

    Attributes:

    Name Type Description model Any

    The pre-trained language model.

    tokenizer Any

    The tokenizer used to preprocess input text.

    model_name str

    The name of the pre-trained language model.

    model_revision Optional[str]

    The revision of the pre-trained language model.

    tokenizer_name str

    The name of the tokenizer used to preprocess input text.

    tokenizer_revision Optional[str]

    The revision of the tokenizer used to preprocess input text.

    model_class str

    The name of the class of the pre-trained language model.

    tokenizer_class str

    The name of the class of the tokenizer used to preprocess input text.

    use_cuda bool

    Whether to use a GPU for inference.

    quantization int

    The level of quantization to use for the pre-trained language model.

    precision str

    The precision to use for the pre-trained language model.

    device_map str | Dict | None

    The mapping of devices to use for inference.

    max_memory Dict[int, str]

    The maximum memory to use for inference.

    torchscript bool

    Whether to use a TorchScript-optimized version of the pre-trained language model.

    model_args Any

    Additional arguments to pass to the pre-trained language model.

    Methods

    text(**kwargs: Any) -> Dict[str, Any]: Generates text based on the given prompt and decoding strategy.

    listen(model_name: str, model_class: str = \"AutoModelForCausalLM\", tokenizer_class: str = \"AutoTokenizer\", use_cuda: bool = False, precision: str = \"float16\", quantization: int = 0, device_map: str | Dict | None = \"auto\", max_memory={0: \"24GB\"}, torchscript: bool = True, endpoint: str = \"\", port: int = 3000, cors_domain: str = \"http://localhost:3000\", username: Optional[str] = None, password: Optional[str] = None, *model_args: Any) -> None: Starts a CherryPy server to listen for requests to generate text.

    "},{"location":"text/api/base/#geniusrise_text.base.api.TextAPI.__init__","title":"__init__(input, output, state)","text":"

    Initializes a new instance of the TextAPI class.

    Parameters:

    Name Type Description Default input BatchInput

    The input data to process.

    required output BatchOutput

    The output data to process.

    required state State

    The state of the API.

    required"},{"location":"text/api/base/#geniusrise_text.base.api.TextAPI.listen","title":"listen(model_name, model_class='AutoModelForCausalLM', tokenizer_class='AutoTokenizer', use_cuda=False, precision='float16', quantization=0, device_map='auto', max_memory={0: '24GB'}, torchscript=False, compile=False, awq_enabled=False, flash_attention=False, concurrent_queries=False, use_vllm=False, use_llama_cpp=False, vllm_tokenizer_mode='auto', vllm_download_dir=None, vllm_load_format='auto', vllm_seed=42, vllm_max_model_len=1024, vllm_enforce_eager=False, vllm_max_context_len_to_capture=8192, vllm_block_size=16, vllm_gpu_memory_utilization=0.9, vllm_swap_space=4, vllm_sliding_window=None, vllm_pipeline_parallel_size=1, vllm_tensor_parallel_size=1, vllm_worker_use_ray=False, vllm_max_parallel_loading_workers=None, vllm_disable_custom_all_reduce=False, vllm_max_num_batched_tokens=None, vllm_max_num_seqs=64, vllm_max_paddings=512, vllm_max_lora_rank=None, vllm_max_loras=None, vllm_max_cpu_loras=None, vllm_lora_extra_vocab_size=0, vllm_placement_group=None, vllm_log_stats=False, llama_cpp_filename=None, llama_cpp_n_gpu_layers=0, llama_cpp_split_mode=llama_cpp.LLAMA_SPLIT_LAYER, llama_cpp_tensor_split=None, llama_cpp_vocab_only=False, llama_cpp_use_mmap=True, llama_cpp_use_mlock=False, llama_cpp_kv_overrides=None, llama_cpp_seed=llama_cpp.LLAMA_DEFAULT_SEED, llama_cpp_n_ctx=2048, llama_cpp_n_batch=512, llama_cpp_n_threads=None, llama_cpp_n_threads_batch=None, llama_cpp_rope_scaling_type=llama_cpp.LLAMA_ROPE_SCALING_UNSPECIFIED, llama_cpp_rope_freq_base=0.0, llama_cpp_rope_freq_scale=0.0, llama_cpp_yarn_ext_factor=-1.0, llama_cpp_yarn_attn_factor=1.0, llama_cpp_yarn_beta_fast=32.0, llama_cpp_yarn_beta_slow=1.0, llama_cpp_yarn_orig_ctx=0, llama_cpp_mul_mat_q=True, llama_cpp_logits_all=False, llama_cpp_embedding=False, llama_cpp_offload_kqv=True, llama_cpp_last_n_tokens_size=64, llama_cpp_lora_base=None, llama_cpp_lora_scale=1.0, llama_cpp_lora_path=None, llama_cpp_numa=False, llama_cpp_chat_format=None, llama_cpp_draft_model=None, llama_cpp_verbose=True, endpoint='*', port=3000, cors_domain='http://localhost:3000', username=None, password=None, **model_args)","text":"

    Starts a CherryPy server to listen for requests to generate text.

    Parameters:

    Name Type Description Default model_name str

    Name or identifier of the pre-trained model to be used.

    required model_class str

    Class name of the model to be used from the transformers library.

    'AutoModelForCausalLM' tokenizer_class str

    Class name of the tokenizer to be used from the transformers library.

    'AutoTokenizer' use_cuda bool

    Flag to enable CUDA for GPU acceleration.

    False precision str

    Specifies the precision configuration for PyTorch tensors, e.g., \"float16\".

    'float16' quantization int

    Level of model quantization to reduce model size and inference time.

    0 device_map Union[str, Dict, None]

    Maps model layers to specific devices for distributed inference.

    'auto' max_memory Dict[int, str]

    Maximum memory allocation for the model on each device.

    {0: '24GB'} torchscript bool

    Enables the use of TorchScript for model optimization.

    False compile bool

    Enables model compilation for further optimization.

    False awq_enabled bool

    Enables Adaptive Weight Quantization (AWQ) for model optimization.

    False flash_attention bool

    Utilizes Flash Attention optimizations for faster processing.

    False concurrent_queries bool

    Allows the server to handle multiple requests concurrently if True.

    False use_vllm bool

    Flag to use Very Large Language Models (VLLM) integration.

    False use_llama_cpp bool

    Flag to use llama.cpp integration for language model inference.

    False llama_cpp_filename Optional[str]

    The filename of the model file for llama.cpp.

    None llama_cpp_n_gpu_layers int

    Number of layers to offload to GPU in llama.cpp configuration.

    0 llama_cpp_split_mode int

    Defines how the model is split across multiple GPUs in llama.cpp.

    llama_cpp.LLAMA_SPLIT_LAYER llama_cpp_tensor_split Optional[List[float]]

    Custom tensor split configuration for llama.cpp.

    None llama_cpp_vocab_only bool

    Loads only the vocabulary part of the model in llama.cpp.

    False llama_cpp_use_mmap bool

    Enables memory-mapped files for model loading in llama.cpp.

    True llama_cpp_use_mlock bool

    Locks the model in RAM to prevent swapping in llama.cpp.

    False llama_cpp_kv_overrides Optional[Dict[str, Union[bool, int, float]]]

    Key-value pairs for overriding default llama.cpp model parameters.

    None llama_cpp_seed int

    Seed for random number generation in llama.cpp.

    llama_cpp.LLAMA_DEFAULT_SEED llama_cpp_n_ctx int

    The number of context tokens for the model in llama.cpp.

    2048 llama_cpp_n_batch int

    Batch size for processing prompts in llama.cpp.

    512 llama_cpp_n_threads Optional[int]

    Number of threads for generation in llama.cpp.

    None llama_cpp_n_threads_batch Optional[int]

    Number of threads for batch processing in llama.cpp.

    None llama_cpp_rope_scaling_type Optional[int]

    Specifies the RoPE (Rotary Positional Embeddings) scaling type in llama.cpp.

    llama_cpp.LLAMA_ROPE_SCALING_UNSPECIFIED llama_cpp_rope_freq_base float

    Base frequency for RoPE in llama.cpp.

    0.0 llama_cpp_rope_freq_scale float

    Frequency scaling factor for RoPE in llama.cpp.

    0.0 llama_cpp_yarn_ext_factor float

    Extrapolation mix factor for YaRN in llama.cpp.

    -1.0 llama_cpp_yarn_attn_factor float

    Attention factor for YaRN in llama.cpp.

    1.0 llama_cpp_yarn_beta_fast float

    Beta fast parameter for YaRN in llama.cpp.

    32.0 llama_cpp_yarn_beta_slow float

    Beta slow parameter for YaRN in llama.cpp.

    1.0 llama_cpp_yarn_orig_ctx int

    Original context size for YaRN in llama.cpp.

    0 llama_cpp_mul_mat_q bool

    Flag to enable matrix multiplication for queries in llama.cpp.

    True llama_cpp_logits_all bool

    Returns logits for all tokens when set to True in llama.cpp.

    False llama_cpp_embedding bool

    Enables embedding mode only in llama.cpp.

    False llama_cpp_offload_kqv bool

    Offloads K, Q, V matrices to GPU in llama.cpp.

    True llama_cpp_last_n_tokens_size int

    Size for the last_n_tokens buffer in llama.cpp.

    64 llama_cpp_lora_base Optional[str]

    Base model path for LoRA adjustments in llama.cpp.

    None llama_cpp_lora_scale float

    Scale factor for LoRA adjustments in llama.cpp.

    1.0 llama_cpp_lora_path Optional[str]

    Path to LoRA adjustments file in llama.cpp.

    None llama_cpp_numa Union[bool, int]

    NUMA configuration for llama.cpp.

    False llama_cpp_chat_format Optional[str]

    Specifies the chat format for llama.cpp.

    None llama_cpp_draft_model Optional[llama_cpp.LlamaDraftModel]

    Draft model for speculative decoding in llama.cpp.

    None endpoint str

    Network interface to bind the server to.

    '*' port int

    Port number to listen on for incoming requests.

    3000 cors_domain str

    Specifies the domain to allow for Cross-Origin Resource Sharing (CORS).

    'http://localhost:3000' username Optional[str]

    Username for basic authentication, if required.

    None password Optional[str]

    Password for basic authentication, if required.

    None **model_args Any

    Additional arguments to pass to the pre-trained language model or llama.cpp configuration.

    {}"},{"location":"text/api/base/#geniusrise_text.base.api.TextAPI.text","title":"text(**kwargs)","text":"

    Generates text based on the given prompt and decoding strategy.

    Parameters:

    Name Type Description Default **kwargs Any

    Additional arguments to pass to the pre-trained language model.

    {}

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the prompt, arguments, and generated text.

    "},{"location":"text/api/base/#geniusrise_text.base.api.TextAPI.validate_password","title":"validate_password(realm, username, password)","text":"

    Validate the username and password against expected values.

    Parameters:

    Name Type Description Default realm str

    The authentication realm.

    required username str

    The provided username.

    required password str

    The provided password.

    required

    Returns:

    Name Type Description bool

    True if credentials are valid, False otherwise.

    "},{"location":"text/api/classification/","title":"Classification","text":"

    Bases: TextAPI

    TextClassificationAPI leveraging Hugging Face's transformers for text classification tasks. This API provides an interface to classify text into various categories like sentiment, topic, intent, etc.

    Attributes:

    Name Type Description model AutoModelForSequenceClassification

    A Hugging Face model for sequence classification.

    tokenizer AutoTokenizer

    A tokenizer for preprocessing text.

    hf_pipeline Pipeline

    A Hugging Face pipeline for text classification.

    Methods

    classify(self): Classifies text using the model and tokenizer. classification_pipeline(self): Classifies text using the Hugging Face pipeline. initialize_pipeline(self): Lazy initialization of the classification pipeline.

    Example CLI Usage:

    genius TextClassificationAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\n--id cardiffnlp/twitter-roberta-base-hate-multiclass-latest-lol \\\nlisten \\\n--args \\\nmodel_name=\"cardiffnlp/twitter-roberta-base-hate-multiclass-latest\" \\\nmodel_class=\"AutoModelForSequenceClassification\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nprecision=\"float\" \\\nquantization=0 \\\ndevice_map=\"cuda:0\" \\\nmax_memory=None \\\ntorchscript=False \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n

    "},{"location":"text/api/classification/#geniusrise_text.classification.api.TextClassificationAPI.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the TextClassificationAPI with the necessary configurations for input, output, and state management.

    Parameters:

    Name Type Description Default input BatchInput

    Configuration for the input data.

    required output BatchOutput

    Configuration for the output data.

    required state State

    State management for the API.

    required **kwargs

    Additional keyword arguments for extended functionality.

    {}"},{"location":"text/api/classification/#geniusrise_text.classification.api.TextClassificationAPI.classification_pipeline","title":"classification_pipeline()","text":"

    Accepts text input and returns classification results using the Hugging Face pipeline.

    This method uses the Hugging Face pipeline for efficient and robust text classification. It's suitable for various classification tasks such as sentiment analysis, topic classification, and intent recognition.

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the original input text and the classification results.

    Example CURL Request for text classification:

    /usr/bin/curl -X POST localhost:3000/api/v1/classification_pipeline             -H \"Content-Type: application/json\"             -d '{\"text\": \"The movie was fantastic, with great acting and plot.\"}' | jq\n

    "},{"location":"text/api/classification/#geniusrise_text.classification.api.TextClassificationAPI.classify","title":"classify()","text":"

    Accepts text input and returns classification results. The method uses the model and tokenizer to classify the text and provide the likelihood of each class label.

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the original input text and the classification scores for each label.

    Example CURL Request for text classification:

    /usr/bin/curl -X POST localhost:3000/api/v1/classify             -H \"Content-Type: application/json\"             -d '{\n        \"text\": \"tata sons lost a major contract to its rival mahindra motors\"\n    }' | jq\n

    "},{"location":"text/api/classification/#geniusrise_text.classification.api.TextClassificationAPI.initialize_pipeline","title":"initialize_pipeline()","text":"

    Lazy initialization of the Hugging Face pipeline for classification.

    "},{"location":"text/api/instruction_tuning/","title":"Instruction Tuning","text":"

    Bases: TextAPI

    InstructionAPI is designed for generating text based on prompts using instruction-tuned language models. It serves as an interface to Hugging Face's pre-trained instruction-tuned models, providing a flexible API for various text generation tasks. It can be used in scenarios ranging from generating creative content to providing instructions or answers based on the prompts.

    Attributes:

    Name Type Description model Any

    The loaded instruction-tuned language model.

    tokenizer Any

    The tokenizer for processing text suitable for the model.

    Methods

    complete(**kwargs: Any) -> Dict[str, Any]: Generates text based on the given prompt and decoding strategy.

    listen(**model_args: Any) -> None: Starts a server to listen for text generation requests.

    CLI Usage Example:

    genius InstructionAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\nlisten \\\n--args \\\nmodel_name=\"TheBloke/Mistral-7B-OpenOrca-AWQ\" \\\nmodel_class=\"AutoModelForCausalLM\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nprecision=\"float16\" \\\nquantization=0 \\\ndevice_map=\"auto\" \\\nmax_memory=None \\\ntorchscript=False \\\nawq_enabled=True \\\nflash_attention=True \\\nendpoint=\"*\" \\\nport=3001 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n

    Or using VLLM:

    genius InstructionAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\n--id mistralai/Mistral-7B-Instruct-v0.1 \\\nlisten \\\n--args \\\nmodel_name=\"mistralai/Mistral-7B-Instruct-v0.1\" \\\nmodel_class=\"AutoModelForCausalLM\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nprecision=\"bfloat16\" \\\nquantization=0 \\\ndevice_map=\"auto\" \\\nmax_memory=None \\\ntorchscript=False \\\nuse_vllm=True \\\nvllm_enforce_eager=True \\\nvllm_max_model_len=1024 \\\nconcurrent_queries=False \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n

    or using llama.cpp:

    genius InstructionAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\nlisten \\\n--args \\\nmodel_name=\"TheBloke/Mistral-7B-Instruct-v0.2-GGUF\" \\\nmodel_class=\"AutoModelForCausalLM\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nuse_llama_cpp=True \\\nllama_cpp_filename=\"mistral-7b-instruct-v0.2.Q4_K_M.gguf\" \\\nllama_cpp_n_gpu_layers=35 \\\nllama_cpp_n_ctx=32768 \\\nconcurrent_queries=False \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n

    "},{"location":"text/api/instruction_tuning/#geniusrise_text.instruction.api.InstructionAPI.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes a new instance of the InstructionAPI class, setting up the necessary configurations for input, output, and state.

    Parameters:

    Name Type Description Default input BatchInput

    Configuration for the input data.

    required output BatchOutput

    Configuration for the output data.

    required state State

    The state of the API.

    required **kwargs Any

    Additional keyword arguments for extended functionality.

    {}"},{"location":"text/api/instruction_tuning/#geniusrise_text.instruction.api.InstructionAPI.chat","title":"chat(**kwargs)","text":"

    Handles chat interaction using the Hugging Face pipeline. This method enables conversational text generation, simulating a chat-like interaction based on user and system prompts.

    Parameters:

    Name Type Description Default **kwargs Any

    Arbitrary keyword arguments containing 'user_prompt' and 'system_prompt'.

    {}

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the user prompt, system prompt, and chat interaction results.

    Example CURL Request for chat interaction:

    /usr/bin/curl -X POST localhost:3001/api/v1/chat             -H \"Content-Type: application/json\"             -d '{\n        \"user_prompt\": \"What is the capital of France?\",\n        \"system_prompt\": \"The capital of France is\"\n    }' | jq\n

    "},{"location":"text/api/instruction_tuning/#geniusrise_text.instruction.api.InstructionAPI.chat_llama_cpp","title":"chat_llama_cpp(**kwargs)","text":"

    Handles POST requests to generate chat completions using the llama.cpp engine. This method accepts various parameters for customizing the chat completion request, including messages, sampling settings, and more.

    Parameters:

    Name Type Description Default messages List[Dict[str, str]]

    The chat messages for generating a response.

    required functions Optional[List[Dict]]

    A list of functions to use for the chat completion (advanced usage).

    required function_call Optional[Dict]

    A function call to use for the chat completion (advanced usage).

    required tools Optional[List[Dict]]

    A list of tools to use for the chat completion (advanced usage).

    required tool_choice Optional[Dict]

    A tool choice option for the chat completion (advanced usage).

    required temperature float

    The temperature to use for sampling, controlling randomness.

    required top_p float

    The nucleus sampling's top-p parameter, controlling diversity.

    required top_k int

    The top-k sampling parameter, limiting the token selection pool.

    required min_p float

    The minimum probability threshold for sampling.

    required typical_p float

    The typical-p parameter for locally typical sampling.

    required stream bool

    Flag to stream the results.

    required stop Optional[Union[str, List[str]]]

    Tokens or sequences where generation should stop.

    required seed Optional[int]

    Seed for random number generation to ensure reproducibility.

    required response_format Optional[Dict]

    Specifies the format of the generated response.

    required max_tokens Optional[int]

    Maximum number of tokens to generate.

    required presence_penalty float

    Penalty for token presence to discourage repetition.

    required frequency_penalty float

    Penalty for token frequency to discourage common tokens.

    required repeat_penalty float

    Penalty applied to tokens that are repeated.

    required tfs_z float

    Tail-free sampling parameter to adjust the likelihood of tail tokens.

    required mirostat_mode int

    Mirostat sampling mode for dynamic adjustments.

    required mirostat_tau float

    Tau parameter for mirostat sampling, controlling deviation.

    required mirostat_eta float

    Eta parameter for mirostat sampling, controlling adjustment speed.

    required model Optional[str]

    Specifies the model to use for generation.

    required logits_processor Optional[List]

    List of logits processors for advanced generation control.

    required grammar Optional[Dict]

    Specifies grammar rules for the generated text.

    required logit_bias Optional[Dict[str, float]]

    Adjustments to the logits of specified tokens.

    required logprobs Optional[bool]

    Whether to include log probabilities in the output.

    required top_logprobs Optional[int]

    Number of top log probabilities to include.

    required

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the chat completion response or an error message.

    Example CURL Request:

    curl -X POST \"http://localhost:3000/api/v1/chat_llama_cpp\"             -H \"Content-Type: application/json\"             -d '{\n        \"messages\": [\n            {\"role\": \"user\", \"content\": \"What is the capital of France?\"},\n            {\"role\": \"system\", \"content\": \"The capital of France is\"}\n        ],\n        \"temperature\": 0.2,\n        \"top_p\": 0.95,\n        \"top_k\": 40,\n        \"max_tokens\": 50,\n    }'\n

    "},{"location":"text/api/instruction_tuning/#geniusrise_text.instruction.api.InstructionAPI.chat_vllm","title":"chat_vllm(**kwargs)","text":"

    Handles POST requests to generate chat completions using the VLLM (Versatile Language Learning Model) engine. This method accepts various parameters for customizing the chat completion request, including message content, generation settings, and more.

    Parameters:

    Name Type Description Default messages List[Dict[str, str]]

    The chat messages for generating a response. Each message should include a 'role' (either 'user' or 'system') and 'content'.

    required temperature float

    The sampling temperature. Defaults to 0.7. Higher values generate more random completions.

    required top_p float

    The nucleus sampling probability. Defaults to 1.0. A smaller value leads to higher diversity.

    required n int

    The number of completions to generate. Defaults to 1.

    required max_tokens int

    The maximum number of tokens to generate. Controls the length of the generated response.

    required stop Union[str, List[str]]

    Sequence(s) where the generation should stop. Can be a single string or a list of strings.

    required stream bool

    Whether to stream the response. Streaming may be useful for long completions.

    required presence_penalty float

    Adjusts the likelihood of tokens based on their presence in the conversation so far. Defaults to 0.0.

    required frequency_penalty float

    Adjusts the likelihood of tokens based on their frequency in the conversation so far. Defaults to 0.0.

    required logit_bias Dict[str, float]

    Adjustments to the logits of specified tokens, identified by token IDs as keys and adjustment values as values.

    required user str

    An identifier for the user making the request. Can be used for logging or customization.

    required best_of int

    Generates 'n' completions server-side and returns the best one. Higher values incur more computation cost.

    required top_k int

    Filters the generated tokens to the top-k tokens with the highest probabilities. Defaults to -1, which disables top-k filtering.

    required ignore_eos bool

    Whether to ignore the end-of-sentence token in generation. Useful for more fluid continuations.

    required use_beam_search bool

    Whether to use beam search instead of sampling for generation. Beam search can produce more coherent results.

    required stop_token_ids List[int]

    List of token IDs that should cause generation to stop.

    required skip_special_tokens bool

    Whether to skip special tokens (like padding or end-of-sequence tokens) in the output.

    required spaces_between_special_tokens bool

    Whether to insert spaces between special tokens in the output.

    required add_generation_prompt bool

    Whether to prepend the generation prompt to the output.

    required echo bool

    Whether to include the input prompt in the output.

    required repetition_penalty float

    Penalty applied to tokens that have been generated previously. Defaults to 1.0, which applies no penalty.

    required min_p float

    Sets a minimum threshold for token probabilities. Tokens with probabilities below this threshold are filtered out.

    required include_stop_str_in_output bool

    Whether to include the stop string(s) in the output.

    required length_penalty float

    Exponential penalty to the length for beam search. Only relevant if use_beam_search is True.

    required

    Dict[str, Any]: A dictionary with the chat completion response or an error message.

    Example CURL Request:

    curl -X POST \"http://localhost:3000/api/v1/chat_vllm\"             -H \"Content-Type: application/json\"             -d '{\n        \"messages\": [\n            {\"role\": \"user\", \"content\": \"Whats the weather like in London?\"}\n        ],\n        \"temperature\": 0.7,\n        \"top_p\": 1.0,\n        \"n\": 1,\n        \"max_tokens\": 50,\n        \"stream\": false,\n        \"presence_penalty\": 0.0,\n        \"frequency_penalty\": 0.0,\n        \"logit_bias\": {},\n        \"user\": \"example_user\"\n    }'\n
    This request asks the VLLM engine to generate a completion for the provided chat context, with specified generation settings.

    "},{"location":"text/api/instruction_tuning/#geniusrise_text.instruction.api.InstructionAPI.complete","title":"complete(**kwargs)","text":"
        Handles POST requests to generate text based on the given prompt and decoding strategy. It uses the pre-trained\n    model specified in the setup to generate a completion for the input prompt.\n\n    Args:\n        **kwargs (Any): Arbitrary keyword arguments containing the 'prompt' and other parameters for text generation.\n\n    Returns:\n        Dict[str, Any]: A dictionary containing the original prompt and the generated completion.\n\n    Example CURL Requests:\n    ```bash\n    /usr/bin/curl -X POST localhost:3001/api/v1/complete             -H \"Content-Type: application/json\"             -d '{\n            \"prompt\": \"<|system|>\n

    <|end|> <|user|> How do I sort a list in Python?<|end|> <|assistant|>\", \"decoding_strategy\": \"generate\", \"max_new_tokens\": 100, \"do_sample\": true, \"temperature\": 0.7, \"top_k\": 50, \"top_p\": 0.95 }' | jq ```

    "},{"location":"text/api/instruction_tuning/#geniusrise_text.instruction.api.InstructionAPI.initialize_pipeline","title":"initialize_pipeline()","text":"

    Lazy initialization of the Hugging Face pipeline for chat interaction.

    "},{"location":"text/api/language_model/","title":"Language Model","text":"

    Bases: TextAPI

    LanguageModelAPI is a class for interacting with pre-trained language models to generate text. It allows for customizable text generation via a CherryPy web server, handling requests and generating responses using a specified language model. This class is part of the GeniusRise ecosystem for facilitating NLP tasks.

    Attributes:

    Name Type Description model Any

    The loaded language model used for text generation.

    tokenizer Any

    The tokenizer corresponding to the language model, used for processing input text.

    Methods

    complete(**kwargs: Any) -> Dict[str, Any]: Generates text based on provided prompts and model parameters.

    CLI Usage Example:

    genius LanguageModelAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\n--id mistralai/Mistral-7B-v0.1-lol \\\nlisten \\\n--args \\\nmodel_name=\"mistralai/Mistral-7B-v0.1\" \\\nmodel_class=\"AutoModelForCausalLM\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nprecision=\"float16\" \\\nquantization=0 \\\ndevice_map=\"auto\" \\\nmax_memory=None \\\ntorchscript=False \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n

    or using VLLM:

    genius LanguageModelAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\n--id mistralai/Mistral-7B-v0.1 \\\nlisten \\\n--args \\\nmodel_name=\"mistralai/Mistral-7B-v0.1\" \\\nmodel_class=\"AutoModelForCausalLM\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nprecision=\"bfloat16\" \\\nuse_vllm=True \\\nvllm_enforce_eager=True \\\nvllm_max_model_len=2048 \\\nconcurrent_queries=False \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n

    or using llama.cpp:

    genius LanguageModelAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\nlisten \\\n--args \\\nmodel_name=\"TheBloke/Mistral-7B-v0.1-GGUF\" \\\nmodel_class=\"AutoModelForCausalLM\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nuse_llama_cpp=True \\\nllama_cpp_filename=\"mistral-7b-v0.1.Q4_K_M.gguf\" \\\nllama_cpp_n_gpu_layers=35 \\\nllama_cpp_n_ctx=32768 \\\nconcurrent_queries=False \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n

    "},{"location":"text/api/language_model/#geniusrise_text.language_model.api.LanguageModelAPI.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the LanguageModelAPI with configurations for the input, output, and state management, along with any additional model-specific parameters.

    Parameters:

    Name Type Description Default input BatchInput

    The configuration for input data handling.

    required output BatchOutput

    The configuration for output data handling.

    required state State

    The state management for the API.

    required **kwargs Any

    Additional keyword arguments for model configuration and API setup.

    {}"},{"location":"text/api/language_model/#geniusrise_text.language_model.api.LanguageModelAPI.complete","title":"complete(**kwargs)","text":"

    Handles POST requests to generate text based on a given prompt and model-specific parameters. This method is exposed as a web endpoint through CherryPy and returns a JSON response containing the original prompt, the generated text, and any additional returned information from the model.

    Parameters:

    Name Type Description Default **kwargs Any

    Arbitrary keyword arguments containing the prompt, and any additional parameters

    {}

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary with the original prompt, generated text, and other model-specific information.

    Example CURL Request:

    /usr/bin/curl -X POST localhost:3000/api/v1/complete \\\n-H \"Content-Type: application/json\" \\\n-d '{\n        \"prompt\": \"Below is an instruction that describes a task. Write a response that appropriately completes the request.\\n\\n### Instruction:\\nWrite a PRD for Oauth auth using keycloak\\n\\n### Response:\",\n        \"decoding_strategy\": \"generate\",\n        \"max_new_tokens\": 1024,\n        \"do_sample\": true\n    }' | jq\n

    "},{"location":"text/api/language_model/#geniusrise_text.language_model.api.LanguageModelAPI.complete_llama_cpp","title":"complete_llama_cpp(**kwargs)","text":"

    Handles POST requests to generate chat completions using the llama.cpp engine. This method accepts various parameters for customizing the chat completion request, including messages, sampling settings, and more.

    Parameters:

    Name Type Description Default prompt

    The prompt to generate text from.

    required suffix

    A suffix to append to the generated text. If None, no suffix is appended.

    required max_tokens

    The maximum number of tokens to generate. If max_tokens <= 0 or None, the maximum number of tokens to generate is unlimited and depends on n_ctx.

    required temperature

    The temperature to use for sampling.

    required top_p

    The top-p value to use for nucleus sampling. Nucleus sampling described in academic paper \"The Curious Case of Neural Text Degeneration\" https://arxiv.org/abs/1904.09751

    required min_p

    The min-p value to use for minimum p sampling. Minimum P sampling as described in https://github.com/ggerganov/llama.cpp/pull/3841

    required typical_p

    The typical-p value to use for sampling. Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666.

    required logprobs

    The number of logprobs to return. If None, no logprobs are returned.

    required echo

    Whether to echo the prompt.

    required stop

    A list of strings to stop generation when encountered.

    required frequency_penalty

    The penalty to apply to tokens based on their frequency in the prompt.

    required presence_penalty

    The penalty to apply to tokens based on their presence in the prompt.

    required repeat_penalty

    The penalty to apply to repeated tokens.

    required top_k

    The top-k value to use for sampling. Top-K sampling described in academic paper \"The Curious Case of Neural Text Degeneration\" https://arxiv.org/abs/1904.09751

    required stream

    Whether to stream the results.

    required seed

    The seed to use for sampling.

    required tfs_z

    The tail-free sampling parameter. Tail Free Sampling described in https://www.trentonbricken.com/Tail-Free-Sampling/.

    required mirostat_mode

    The mirostat sampling mode.

    required mirostat_tau

    The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text.

    required mirostat_eta

    The learning rate used to update mu based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause mu to be updated more quickly, while a smaller learning rate will result in slower updates.

    required model

    The name to use for the model in the completion object.

    required stopping_criteria

    A list of stopping criteria to use.

    required logits_processor

    A list of logits processors to use.

    required grammar

    A grammar to use for constrained sampling.

    required logit_bias

    A logit bias to use.

    required

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the chat completion response or an error message.

    Example CURL Request:

    curl -X POST \"http://localhost:3001/api/v1/complete_llama_cpp\"             -H \"Content-Type: application/json\"             -d '{\n        \"prompt\": \"Whats the weather like in London?\",\n        \"temperature\": 0.7,\n        \"top_p\": 0.95,\n        \"top_k\": 40,\n        \"max_tokens\": 50,\n        \"repeat_penalty\": 1.1\n    }'\n

    "},{"location":"text/api/language_model/#geniusrise_text.language_model.api.LanguageModelAPI.complete_vllm","title":"complete_vllm(**kwargs)","text":"

    Handles POST requests to generate chat completions using the VLLM (Versatile Language Learning Model) engine. This method accepts various parameters for customizing the chat completion request, including message content, generation settings, and more.

    • **kwargs (Any): Arbitrary keyword arguments. Expects data in JSON format containing any of the following keys:
      • messages (Union[str, List[Dict[str, str]]]): The messages for the chat context.
      • temperature (float, optional): The sampling temperature. Defaults to 0.7.
      • top_p (float, optional): The nucleus sampling probability. Defaults to 1.0.
      • n (int, optional): The number of completions to generate. Defaults to 1.
      • max_tokens (int, optional): The maximum number of tokens to generate.
      • stop (Union[str, List[str]], optional): Stop sequence to end generation.
      • stream (bool, optional): Whether to stream the response. Defaults to False.
      • presence_penalty (float, optional): The presence penalty. Defaults to 0.0.
      • frequency_penalty (float, optional): The frequency penalty. Defaults to 0.0.
      • logit_bias (Dict[str, float], optional): Adjustments to the logits of specified tokens.
      • user (str, optional): An identifier for the user making the request.
      • (Additional model-specific parameters)

    Dict[str, Any]: A dictionary with the chat completion response or an error message.

    Example CURL Request:

    curl -v -X POST \"http://localhost:3000/api/v1/complete_vllm\"             -H \"Content-Type: application/json\"             -u \"user:password\"             -d '{\n        \"messages\": [\"Whats the weather like in London?\"],\n        \"temperature\": 0.7,\n        \"top_p\": 1.0,\n        \"n\": 1,\n        \"max_tokens\": 50,\n        \"stream\": false,\n        \"presence_penalty\": 0.0,\n        \"frequency_penalty\": 0.0,\n        \"logit_bias\": {},\n        \"user\": \"example_user\"\n    }'\n
    This request asks the VLLM engine to generate a completion for the provided chat context, with specified generation settings.

    "},{"location":"text/api/ner/","title":"Named Entity Recognition","text":"

    Bases: TextAPI

    NamedEntityRecognitionAPI serves a Named Entity Recognition (NER) model using the Hugging Face transformers library. It is designed to recognize and classify named entities in text into predefined categories such as the names of persons, organizations, locations, expressions of times, quantities, monetary values, percentages, etc.

    Attributes:

    Name Type Description model Any

    The loaded NER model, typically a Hugging Face transformer model specialized for token classification.

    tokenizer Any

    The tokenizer for preprocessing text compatible with the loaded model.

    Example CLI Usage:

    genius NamedEntityRecognitionAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\n--id dslim/bert-large-NER-lol \\\nlisten \\\n--args \\\nmodel_name=\"dslim/bert-large-NER\" \\\nmodel_class=\"AutoModelForTokenClassification\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nprecision=\"float\" \\\nquantization=0 \\\ndevice_map=\"cuda:0\" \\\nmax_memory=None \\\ntorchscript=False \\\nendpoint=\"0.0.0.0\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n

    "},{"location":"text/api/ner/#geniusrise_text.ner.api.NamedEntityRecognitionAPI.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the NamedEntityRecognitionAPI class.

    Parameters:

    Name Type Description Default input BatchInput

    The input data.

    required output BatchOutput

    The output data.

    required state State

    The state data.

    required **kwargs Any

    Additional keyword arguments.

    {}"},{"location":"text/api/ner/#geniusrise_text.ner.api.NamedEntityRecognitionAPI.initialize_pipeline","title":"initialize_pipeline()","text":"

    Lazy initialization of the NER Hugging Face pipeline.

    "},{"location":"text/api/ner/#geniusrise_text.ner.api.NamedEntityRecognitionAPI.ner_pipeline","title":"ner_pipeline(**kwargs)","text":"

    Recognizes named entities in the input text using the Hugging Face pipeline.

    This method leverages a pre-trained NER model to identify and classify entities in text into categories such as names, organizations, locations, etc. It's suitable for processing various types of text content.

    Parameters:

    Name Type Description Default **kwargs Any

    Arbitrary keyword arguments, typically containing 'text' for the input text.

    {}

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the original input text and a list of recognized entities.

    Example CURL Request for NER:

    curl -X POST localhost:3000/api/v1/ner_pipeline             -H \"Content-Type: application/json\"             -d '{\"text\": \"John Doe works at OpenAI in San Francisco.\"}' | jq\n

    "},{"location":"text/api/ner/#geniusrise_text.ner.api.NamedEntityRecognitionAPI.recognize_entities","title":"recognize_entities(**kwargs)","text":"

    Endpoint for recognizing named entities in the input text using the loaded NER model.

    Parameters:

    Name Type Description Default **kwargs Any

    Arbitrary keyword arguments, typically containing 'text' for the input text.

    {}

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the original input text and a list of recognized entities with their respective types.

    Example CURL Requests:

    curl -X POST localhost:3000/api/v1/recognize_entities \\\n-H \"Content-Type: application/json\" \\\n-d '{\"text\": \"John Doe works at OpenAI in San Francisco.\"}' | jq\n

    curl -X POST localhost:3000/api/v1/recognize_entities \\\n-H \"Content-Type: application/json\" \\\n-d '{\"text\": \"Alice is going to visit the Eiffel Tower in Paris next summer.\"}' | jq\n
    "},{"location":"text/api/nli/","title":"Natural Language Inference","text":"

    Bases: TextAPI

    Represents a Natural Language Inference (NLI) API leveraging Hugging Face's transformer models. This class is capable of handling various NLI tasks such as entailment, classification, similarity checking, and more. Utilizes CherryPy for exposing API endpoints that can be interacted with via standard HTTP requests.

    Attributes:

    Name Type Description model AutoModelForSequenceClassification

    The loaded Hugging Face model for sequence classification tasks.

    tokenizer AutoTokenizer

    The tokenizer corresponding to the model, used for processing input text.

    CLI Usage Example: For interacting with the NLI API, you would typically start the server using a command similar to one listed in the provided examples. After the server is running, you can use CURL commands to interact with the different endpoints.

    Example:

    genius NLIAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\n--id \"MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7-lol\" \\\nlisten \\\n--args \\\nmodel_name=\"MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7\" \\\nmodel_class=\"AutoModelForSequenceClassification\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nprecision=\"float\" \\\nquantization=0 \\\ndevice_map=\"cuda:0\" \\\nmax_memory=None \\\ntorchscript=False \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n
    "},{"location":"text/api/nli/#geniusrise_text.nli.api.NLIAPI.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the NLIAPI with configurations for handling input, output, and state management.

    Parameters:

    Name Type Description Default input BatchInput

    Configuration for the input data.

    required output BatchOutput

    Configuration for the output data.

    required state State

    State management for the API.

    required **kwargs Any

    Additional keyword arguments for extended functionality.

    {}"},{"location":"text/api/nli/#geniusrise_text.nli.api.NLIAPI.classify","title":"classify(**kwargs)","text":"

    Endpoint for classifying the input text into one of the provided candidate labels using zero-shot classification.

    Parameters:

    Name Type Description Default **kwargs Any

    Arbitrary keyword arguments, typically containing 'text' and 'candidate_labels'.

    {}

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the input text, candidate labels, and classification scores.

    Example CURL Request:

    curl -X POST localhost:3000/api/v1/classify \\\n-H \"Content-Type: application/json\" \\\n-d '{\n        \"text\": \"The new movie is a thrilling adventure in space\",\n        \"candidate_labels\": [\"entertainment\", \"politics\", \"business\"]\n    }'\n

    "},{"location":"text/api/nli/#geniusrise_text.nli.api.NLIAPI.detect_intent","title":"detect_intent(**kwargs)","text":"

    Detects the intent of the input text from a list of possible intents.

    Parameters:

    Name Type Description Default text str

    The input text.

    required intents List[str]

    A list of possible intents.

    required

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the input text and detected intent with its score.

    Example CURL Request:

    /usr/bin/curl -X POST localhost:3000/api/v1/detect_intent \\\n-H \"Content-Type: application/json\" \\\n-d '{\n        \"text\": \"Theres something magical about Recurrent Neural Networks (RNNs). I still remember when I trained my first recurrent network for Image Captioning. Within a few dozen minutes of training my first baby model (with rather arbitrarily-chosen hyperparameters) started to generate very nice looking descriptions of images that were on the edge of making sense. Sometimes the ratio of how simple your model is to the quality of the results you get out of it blows past your expectations, and this was one of those times. What made this result so shocking at the time was that the common wisdom was that RNNs were supposed to be difficult to train (with more experience Ive in fact reached the opposite conclusion). Fast forward about a year: Im training RNNs all the time and Ive witnessed their power and robustness many times, and yet their magical outputs still find ways of amusing me.\",\n        \"intents\": [\"teach\",\"sell\",\"note\",\"advertise\",\"promote\"]\n    }' | jq\n

    "},{"location":"text/api/nli/#geniusrise_text.nli.api.NLIAPI.entailment","title":"entailment(**kwargs)","text":"

    Endpoint for evaluating the entailment relationship between a premise and a hypothesis. It returns the relationship scores across possible labels like entailment, contradiction, and neutral.

    Parameters:

    Name Type Description Default **kwargs Any

    Arbitrary keyword arguments, typically containing 'premise' and 'hypothesis'.

    {}

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the premise, hypothesis, and their relationship scores.

    Example CURL Request:

    /usr/bin/curl -X POST localhost:3000/api/v1/entailment \\\n-H \"Content-Type: application/json\" \\\\\\\n-d '{\n        \"premise\": \"This a very good entry level smartphone, battery last 2-3 days after fully charged when connected to the internet. No memory lag issue when playing simple hidden object games. Performance is beyond my expectation, i bought it with a good bargain, couldnt ask for more!\",\n        \"hypothesis\": \"the phone has an awesome battery life\"\n    }' | jq\n
    ```

    "},{"location":"text/api/nli/#geniusrise_text.nli.api.NLIAPI.fact_checking","title":"fact_checking(**kwargs)","text":"

    Performs fact checking on a statement given a context.

    Parameters:

    Name Type Description Default context str

    The context or background information.

    required statement str

    The statement to fact check.

    required

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing fact checking scores.

    Example CURL Request:

    /usr/bin/curl -X POST localhost:3000/api/v1/fact_checking \\\n-H \"Content-Type: application/json\" \\\n-d '{\n        \"context\": \"Theres something magical about Recurrent Neural Networks (RNNs). I still remember when I trained my first recurrent network for Image Captioning. Within a few dozen minutes of training my first baby model (with rather arbitrarily-chosen hyperparameters) started to generate very nice looking descriptions of images that were on the edge of making sense. Sometimes the ratio of how simple your model is to the quality of the results you get out of it blows past your expectations, and this was one of those times. What made this result so shocking at the time was that the common wisdom was that RNNs were supposed to be difficult to train (with more experience Ive in fact reached the opposite conclusion). Fast forward about a year: Im training RNNs all the time and Ive witnessed their power and robustness many times, and yet their magical outputs still find ways of amusing me.\",\n        \"statement\": \"The author is looking for a home loan\"\n    }' | jq\n

    "},{"location":"text/api/nli/#geniusrise_text.nli.api.NLIAPI.initialize_pipeline","title":"initialize_pipeline()","text":"

    Lazy initialization of the NLI Hugging Face pipeline.

    "},{"location":"text/api/nli/#geniusrise_text.nli.api.NLIAPI.question_answering","title":"question_answering(**kwargs)","text":"

    Performs question answering for multiple choice questions.

    Parameters:

    Name Type Description Default question str

    The question text.

    required choices List[str]

    A list of possible answers.

    required

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the scores for each answer choice.

    Example CURL Request:

    /usr/bin/curl -X POST localhost:3000/api/v1/question_answering \\\n-H \"Content-Type: application/json\" \\\n-d '{\n        \"question\": \"[ML-1T-2] is the dimensional formula of\",\n        \"choices\": [\"force\", \"coefficient of friction\", \"modulus of elasticity\", \"energy\"]\n    }' | jq\n

    "},{"location":"text/api/nli/#geniusrise_text.nli.api.NLIAPI.textual_similarity","title":"textual_similarity(**kwargs)","text":"

    Evaluates the textual similarity between two texts.

    Parameters:

    Name Type Description Default text1 str

    The first text.

    required text2 str

    The second text.

    required

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing similarity score.

    Example CURL Request:

    /usr/bin/curl -X POST localhost:3000/api/v1/textual_similarity \\\n-H \"Content-Type: application/json\" \\\n-d '{\n        \"text1\": \"Theres something magical about Recurrent Neural Networks (RNNs). I still remember when I trained my first recurrent network for Image Captioning. Within a few dozen minutes of training my first baby model (with rather arbitrarily-chosen hyperparameters) started to generate very nice looking descriptions of images that were on the edge of making sense. Sometimes the ratio of how simple your model is to the quality of the results you get out of it blows past your expectations, and this was one of those times. What made this result so shocking at the time was that the common wisdom was that RNNs were supposed to be difficult to train (with more experience Ive in fact reached the opposite conclusion). Fast forward about a year: Im training RNNs all the time and Ive witnessed their power and robustness many times, and yet their magical outputs still find ways of amusing me.\",\n        \"text2\": \"There is something magical about training neural networks. Their simplicity coupled with their power is astonishing.\"\n    }' | jq\n

    "},{"location":"text/api/nli/#geniusrise_text.nli.api.NLIAPI.zero_shot_classification","title":"zero_shot_classification(**kwargs)","text":"

    Performs zero-shot classification using the Hugging Face pipeline. It allows classification of text without explicitly provided labels.

    Parameters:

    Name Type Description Default **kwargs Any

    Arbitrary keyword arguments, typically containing 'premise' and 'hypothesis'.

    {}

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the premise, hypothesis, and their classification scores.

    Example CURL Request for zero-shot classification:

    curl -X POST localhost:3000/api/v1/zero_shot_classification             -H \"Content-Type: application/json\"             -d '{\n        \"premise\": \"A new study shows that the Mediterranean diet is good for heart health.\",\n        \"hypothesis\": \"The study is related to diet and health.\"\n    }' | jq\n

    "},{"location":"text/api/question_answering/","title":"Question Answering","text":"

    Bases: TextAPI

    "},{"location":"text/api/question_answering/#geniusrise_text.qa.api.QAAPI.tokenizer","title":"tokenizer: AutoTokenizer instance-attribute","text":"

    A class for handling different types of QA models, including traditional QA, TAPAS (Table-based QA), and TAPEX. It utilizes the Hugging Face transformers library to provide state-of-the-art question answering capabilities across various formats of data including plain text and tabular data.

    Attributes:

    Name Type Description model AutoModelForQuestionAnswering | AutoModelForTableQuestionAnswering

    The pre-trained QA model (traditional, TAPAS, or TAPEX).

    tokenizer AutoTokenizer

    The tokenizer used to preprocess input text.

    Methods

    answer(self, **kwargs: Any) -> Dict[str, Any]: Answers questions based on the provided context (text or table).

    CLI Usage Example:

    genius QAAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\n--id distilbert-base-uncased-distilled-squad-lol \\\nlisten \\\n--args \\\nmodel_name=\"distilbert-base-uncased-distilled-squad\" \\\nmodel_class=\"AutoModelForQuestionAnswering\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nprecision=\"float\" \\\nquantization=0 \\\ndevice_map=\"cuda:0\" \\\nmax_memory=None \\\ntorchscript=False \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n

    genius QAAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\n--id google/tapas-base-finetuned-wtq-lol \\\nlisten \\\n--args \\\nmodel_name=\"google/tapas-base-finetuned-wtq\" \\\nmodel_class=\"AutoModelForTableQuestionAnswering\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nprecision=\"float\" \\\nquantization=0 \\\ndevice_map=\"cuda:0\" \\\nmax_memory=None \\\ntorchscript=False \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n
    genius QAAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\n--id microsoft/tapex-large-finetuned-wtq-lol \\\nlisten \\\n--args \\\nmodel_name=\"microsoft/tapex-large-finetuned-wtq\" \\\nmodel_class=\"AutoModelForSeq2SeqLM\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nprecision=\"float\" \\\nquantization=0 \\\ndevice_map=\"cuda:0\" \\\nmax_memory=None \\\ntorchscript=False \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n
    "},{"location":"text/api/question_answering/#geniusrise_text.qa.api.QAAPI.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the QAAPI with configurations for input, output, and state management.

    Parameters:

    Name Type Description Default input BatchInput

    Configuration for the input data.

    required output BatchOutput

    Configuration for the output data.

    required state State

    State management for the API.

    required **kwargs Any

    Additional keyword arguments for extended functionality.

    {}"},{"location":"text/api/question_answering/#geniusrise_text.qa.api.QAAPI.answer","title":"answer(**kwargs)","text":"

    Answers questions based on the provided context (text or table). It adapts to the model type (traditional, TAPAS, TAPEX) and provides answers accordingly.

    Parameters:

    Name Type Description Default **kwargs Any

    Arbitrary keyword arguments, typically containing the 'question' and 'data' (context or table).

    {}

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the question, context/table, and answer(s).

    Example CURL Request for Text-based QA:

    curl -X POST localhost:3000/api/v1/answer \\\n-H \"Content-Type: application/json\" \\\n-d '{\"question\": \"What is the capital of France?\", \"data\": \"France is a country in Europe. Its capital is Paris.\"}'\n

    Example CURL Requests:

    /usr/bin/curl -X POST localhost:3000/api/v1/answer \\\n-H \"Content-Type: application/json\" \\\n-d '{\n        \"data\": \"Theres something magical about Recurrent Neural Networks (RNNs). I still remember when I trained my first recurrent network for Image Captioning. Within a few dozen minutes of training my first baby model (with rather arbitrarily-chosen hyperparameters) started to generate very nice looking descriptions of images that were on the edge of making sense. Sometimes the ratio of how simple your model is to the quality of the results you get out of it blows past your expectations, and this was one of those times. What made this result so shocking at the time was that the common wisdom was that RNNs were supposed to be difficult to train (with more experience Ive in fact reached the opposite conclusion). Fast forward about a year: Im training RNNs all the time and Ive witnessed their power and robustness many times, and yet their magical outputs still find ways of amusing me.\",\n        \"question\": \"What is the common wisdom about RNNs?\"\n    }' | jq\n

    /usr/bin/curl -X POST localhost:3000/api/v1/answer \\\n-H \"Content-Type: application/json\" \\\n-d '{\n    \"data\": [\n        {\"Name\": \"Alice\", \"Age\": \"30\"},\n        {\"Name\": \"Bob\", \"Age\": \"25\"}\n    ],\n    \"question\": \"what is their total age?\"\n}\n' | jq\n
    /usr/bin/curl -X POST localhost:3000/api/v1/answer \\\n-H \"Content-Type: application/json\" \\\n-d '{\n    \"data\": {\"Actors\": [\"Brad Pitt\", \"Leonardo Di Caprio\", \"George Clooney\"], \"Number of movies\": [\"87\", \"53\", \"69\"]},\n    \"question\": \"how many movies does Leonardo Di Caprio have?\"\n}\n' | jq\n
    "},{"location":"text/api/question_answering/#geniusrise_text.qa.api.QAAPI.answer_pipeline","title":"answer_pipeline(**kwargs)","text":"

    Answers questions using the Hugging Face pipeline based on the provided context.

    Parameters:

    Name Type Description Default **kwargs Any

    Arbitrary keyword arguments, typically containing 'question' and 'data'.

    {}

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the question, context, and the answer.

    Example CURL Request for QA:

    curl -X POST localhost:3000/api/v1/answer_pipeline             -H \"Content-Type: application/json\"             -d '{\"question\": \"Who is the CEO of Tesla?\", \"data\": \"Elon Musk is the CEO of Tesla.\"}'\n

    "},{"location":"text/api/question_answering/#geniusrise_text.qa.api.QAAPI.answer_table_question","title":"answer_table_question(data, question, model_type)","text":"

    Answers a question based on the provided table.

    Parameters:

    Name Type Description Default data Dict[str, Any]

    The table data and other parameters.

    required question str

    The question to be answered.

    required model_type str

    The type of the model ('tapas' or 'tapex').

    required

    Returns:

    Name Type Description str dict

    The answer derived from the table.

    "},{"location":"text/api/question_answering/#geniusrise_text.qa.api.QAAPI.initialize_pipeline","title":"initialize_pipeline()","text":"

    Lazy initialization of the QA Hugging Face pipeline.

    "},{"location":"text/api/summarization/","title":"Summarization","text":"

    Bases: TextAPI

    A class for serving a Hugging Face-based summarization model. This API provides an interface to submit text and receive a summarized version, utilizing state-of-the-art machine learning models for text summarization.

    Attributes:

    Name Type Description model AutoModelForSeq2SeqLM

    The loaded Hugging Face model for summarization.

    tokenizer AutoTokenizer

    The tokenizer for preprocessing text.

    Methods

    summarize(self, **kwargs: Any) -> Dict[str, Any]: Summarizes the input text based on the given parameters.

    CLI Usage:

    genius SummarizationAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\n--id facebook/bart-large-cnn-lol \\\nlisten \\\n--args \\\nmodel_name=\"facebook/bart-large-cnn\" \\\nmodel_class=\"AutoModelForSeq2SeqLM\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nprecision=\"float\" \\\nquantization=0 \\\ndevice_map=\"cuda:0\" \\\nmax_memory=None \\\ntorchscript=False \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n

    "},{"location":"text/api/summarization/#geniusrise_text.summarization.api.SummarizationAPI.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the SummarizationAPI class with input, output, and state configurations.

    Parameters:

    Name Type Description Default input BatchInput

    Configuration for input data.

    required output BatchOutput

    Configuration for output data.

    required state State

    State management for API.

    required **kwargs Any

    Additional keyword arguments for extended functionality.

    {}"},{"location":"text/api/summarization/#geniusrise_text.summarization.api.SummarizationAPI.initialize_pipeline","title":"initialize_pipeline()","text":"

    Lazy initialization of the summarization Hugging Face pipeline.

    "},{"location":"text/api/summarization/#geniusrise_text.summarization.api.SummarizationAPI.summarize","title":"summarize(**kwargs)","text":"

    Summarizes the input text based on the given parameters using a machine learning model. The method accepts parameters via a POST request and returns the summarized text.

    Parameters:

    Name Type Description Default **kwargs Any

    Arbitrary keyword arguments. Expected to receive these from the POST request's JSON body.

    {}

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the input text and its summary.

    Example CURL Requests:

    /usr/bin/curl -X POST localhost:3000/api/v1/summarize \\\n-H \"Content-Type: application/json\" \\\n-d '{\n        \"text\": \"Theres something magical about Recurrent Neural Networks (RNNs). I still remember when I trained my first recurrent network for Image Captioning. Within a few dozen minutes of training my first baby model (with rather arbitrarily-chosen hyperparameters) started to generate very nice looking descriptions of images that were on the edge of making sense. Sometimes the ratio of how simple your model is to the quality of the results you get out of it blows past your expectations, and this was one of those times. What made this result so shocking at the time was that the common wisdom was that RNNs were supposed to be difficult to train (with more experience Ive in fact reached the opposite conclusion). Fast forward about a year: Im training RNNs all the time and Ive witnessed their power and robustness many times, and yet their magical outputs still find ways of amusing me.\",\n        \"decoding_strategy\": \"generate\",\n        \"bos_token_id\": 0,\n        \"decoder_start_token_id\": 2,\n        \"early_stopping\": true,\n        \"eos_token_id\": 2,\n        \"forced_bos_token_id\": 0,\n        \"forced_eos_token_id\": 2,\n        \"length_penalty\": 2.0,\n        \"max_length\": 142,\n        \"min_length\": 56,\n        \"no_repeat_ngram_size\": 3,\n        \"num_beams\": 4,\n        \"pad_token_id\": 1,\n        \"do_sample\": false\n    }' | jq\n

    /usr/bin/curl -X POST localhost:3000/api/v1/summarize \\\n-H \"Content-Type: application/json\" \\\n-d '{\n        \"text\": \"Theres something magical about Recurrent Neural Networks (RNNs). I still remember when I trained my first recurrent network for Image Captioning. Within a few dozen minutes of training my first baby model (with rather arbitrarily-chosen hyperparameters) started to generate very nice looking descriptions of images that were on the edge of making sense. Sometimes the ratio of how simple your model is to the quality of the results you get out of it blows past your expectations, and this was one of those times. What made this result so shocking at the time was that the common wisdom was that RNNs were supposed to be difficult to train (with more experience Ive in fact reached the opposite conclusion). Fast forward about a year: Im training RNNs all the time and Ive witnessed their power and robustness many times, and yet their magical outputs still find ways of amusing me.\",\n        \"decoding_strategy\": \"generate\",\n        \"early_stopping\": true,\n        \"length_penalty\": 2.0,\n        \"max_length\": 142,\n        \"min_length\": 56,\n        \"no_repeat_ngram_size\": 3,\n        \"num_beams\": 4\n    }' | jq\n
    "},{"location":"text/api/summarization/#geniusrise_text.summarization.api.SummarizationAPI.summarize_pipeline","title":"summarize_pipeline(**kwargs)","text":"

    Summarizes the input text using the Hugging Face pipeline based on given parameters.

    Parameters:

    Name Type Description Default **kwargs Any

    Keyword arguments containing parameters for summarization.

    {}

    Returns:

    Type Description Dict[str, Any]

    A dictionary containing the input text and its summary.

    Example CURL Request for summarization: curl -X POST localhost:3000/api/v1/summarize_pipeline -H \"Content-Type: application/json\" -d '{\"text\": \"Your long text here\"}'

    "},{"location":"text/api/translation/","title":"Translation","text":"

    Bases: TextAPI

    A class for serving a Hugging Face-based translation model as a web API. This API allows users to submit text for translation and receive translated text in the specified target language using advanced machine learning models.

    Parameters:

    Name Type Description Default input BatchInput

    Configurations and data inputs for the batch process.

    required output BatchOutput

    Configurations for output data handling.

    required state State

    State management for the translation task.

    required **kwargs Any

    Additional keyword arguments for extended configurations.

    {}

    Example CLI Usage for interacting with the API:

    To start the API server:

    genius TranslationAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\n--id facebook/mbart-large-50-many-to-many-mmt-lol \\\nlisten \\\n--args \\\nmodel_name=\"facebook/mbart-large-50-many-to-many-mmt\" \\\nmodel_class=\"AutoModelForSeq2SeqLM\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nprecision=\"float\" \\\nquantization=0 \\\ndevice_map=\"cuda:0\" \\\nmax_memory=None \\\ntorchscript=False \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n

    To translate text using the API:

    curl -X POST localhost:8080/translate \\\n-H \"Content-Type: application/json\" \\\n-d '{\n        \"text\": \"Hello, world!\",\n        \"source_lang\": \"en\",\n        \"target_lang\": \"fr\",\n        \"decoding_strategy\": \"beam_search\",\n        \"num_beams\": 5\n    }'\n

    "},{"location":"text/api/translation/#geniusrise_text.translation.api.TranslationAPI.initialize_pipeline","title":"initialize_pipeline()","text":"

    Lazy initialization of the translation Hugging Face pipeline.

    "},{"location":"text/api/translation/#geniusrise_text.translation.api.TranslationAPI.translate","title":"translate(**kwargs)","text":"

    Translates text to a specified target language using the underlying Hugging Face model.

    This endpoint accepts JSON data with the text and language details, processes it through the machine learning model, and returns the translated text.

    Parameters:

    Name Type Description Default **kwargs Any

    Arbitrary keyword arguments, usually empty as parameters are in the POST body.

    {} POST body parameters

    text (str): The text to be translated. decoding_strategy (str): Strategy to use for decoding text; e.g., 'beam_search', 'greedy'. Default is 'generate'. source_lang (str): Source language code. target_lang (str): Target language code. Default is 'en'. additional_params (dict): Other model-specific parameters for translation.

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary with the original text, target language, and translated text.

    Example CURL requests:

    To translate text from English to French:

    curl -X POST localhost:8080/translate \\\n-H \"Content-Type: application/json\" \\\n-d '{\n        \"text\": \"Hello, world!\",\n        \"source_lang\": \"en\",\n        \"target_lang\": \"fr\",\n        \"decoding_strategy\": \"beam_search\",\n        \"num_beams\": 5\n    }'\n

    To translate text from Spanish to German:

    /usr/bin/curl -X POST localhost:3000/api/v1/translate \\\n-H \"Content-Type: application/json\" \\\n-d '{\n        \"text\": \"\u0938\u0902\u092f\u0941\u0915\u094d\u0924 \u0930\u093e\u0937\u094d\u091f\u094d\u0930 \u0915\u0947 \u092a\u094d\u0930\u092e\u0941\u0916 \u0915\u093e \u0915\u0939\u0928\u093e \u0939\u0948 \u0915\u093f \u0938\u0940\u0930\u093f\u092f\u093e \u092e\u0947\u0902 \u0915\u094b\u0908 \u0938\u0948\u0928\u094d\u092f \u0938\u092e\u093e\u0927\u093e\u0928 \u0928\u0939\u0940\u0902 \u0939\u0948\",\n        \"source_lang\": \"hi_IN\",\n        \"target_lang\": \"en_XX\",\n        \"decoding_strategy\": \"generate\",\n        \"decoder_start_token_id\": 2,\n        \"early_stopping\": true,\n        \"eos_token_id\": 2,\n        \"forced_eos_token_id\": 2,\n        \"max_length\": 200,\n        \"num_beams\": 5,\n        \"pad_token_id\": 1\n    }' | jq\n

    "},{"location":"text/api/translation/#geniusrise_text.translation.api.TranslationAPI.translate_pipeline","title":"translate_pipeline(**kwargs)","text":"

    Endpoint for translating text using a pre-initialized Hugging Face translation pipeline. This method is designed to handle translation requests more efficiently by utilizing a preloaded model and tokenizer, reducing the overhead of loading these components for each request.

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the original text, source language, target language, and the translated text.

    Example CURL Request for translation:

    curl -X POST localhost:8080/translate_pipeline             -H \"Content-Type: application/json\"             -d '{\n        \"text\": \"Hello, world!\",\n        \"source_lang\": \"en\",\n        \"target_lang\": \"fr\"\n    }'\n

    "},{"location":"text/bulk/base/","title":"Base Fine Tuner","text":"

    Bases: Bolt

    TextBulk is a foundational class for enabling bulk processing of text with various generation models. It primarily focuses on using Hugging Face models to provide a robust and efficient framework for large-scale text generation tasks. The class supports various decoding strategies to generate text that can be tailored to specific needs or preferences.

    Attributes:

    Name Type Description model AutoModelForCausalLM

    The language model for text generation.

    tokenizer AutoTokenizer

    The tokenizer for preparing input data for the model.

    Parameters:

    Name Type Description Default input BatchInput

    Configuration and data inputs for the batch process.

    required output BatchOutput

    Configurations for output data handling.

    required state State

    State management for the Bolt.

    required **kwargs

    Arbitrary keyword arguments for extended configurations.

    {} Methods

    text(**kwargs: Any) -> Dict[str, Any]: Provides an API endpoint for text generation functionality. Accepts various parameters for customizing the text generation process.

    generate(prompt: str, decoding_strategy: str = \"generate\", **generation_params: Any) -> dict: Generates text based on the provided prompt and parameters. Supports multiple decoding strategies for diverse applications.

    The class serves as a versatile tool for text generation, supporting various models and configurations. It can be extended or used as is for efficient text generation tasks.

    "},{"location":"text/bulk/base/#geniusrise_text.base.bulk.TextBulk.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the TextBulk with configurations and sets up logging. It prepares the environment for text generation tasks.

    Parameters:

    Name Type Description Default input BatchInput

    The input data configuration for the text generation task.

    required output BatchOutput

    The output data configuration for the results of the text generation.

    required state State

    The state configuration for the Bolt, managing its operational status.

    required **kwargs

    Additional keyword arguments for extended functionality and model configurations.

    {}"},{"location":"text/bulk/base/#geniusrise_text.base.bulk.TextBulk.generate","title":"generate(prompt, decoding_strategy='generate', **generation_params)","text":"

    Generate text completion for the given prompt using the specified decoding strategy.

    Parameters:

    Name Type Description Default prompt str

    The prompt to generate text completion for.

    required decoding_strategy str

    The decoding strategy to use. Defaults to \"generate\".

    'generate' **generation_params Any

    Additional parameters to pass to the decoding strategy.

    {}

    Returns:

    Name Type Description str str

    The generated text completion.

    Raises:

    Type Description Exception

    If an error occurs during generation.

    Supported decoding strategies and their additional parameters
    • \"generate\": Uses the model's default generation method. (Parameters: max_length, num_beams, etc.)
    • \"greedy_search\": Generates text using a greedy search decoding strategy. Parameters: max_length, eos_token_id, pad_token_id, output_attentions, output_hidden_states, output_scores, return_dict_in_generate, synced_gpus.
    • \"contrastive_search\": Generates text using contrastive search decoding strategy. Parameters: top_k, penalty_alpha, pad_token_id, eos_token_id, output_attentions, output_hidden_states, output_scores, return_dict_in_generate, synced_gpus, sequential.
    • \"sample\": Generates text using a sampling decoding strategy. Parameters: do_sample, temperature, top_k, top_p, max_length, pad_token_id, eos_token_id, output_attentions, output_hidden_states, output_scores, return_dict_in_generate, synced_gpus.
    • \"beam_search\": Generates text using beam search decoding strategy. Parameters: num_beams, max_length, pad_token_id, eos_token_id, output_attentions, output_hidden_states, output_scores, return_dict_in_generate, synced_gpus.
    • \"beam_sample\": Generates text using beam search with sampling decoding strategy. Parameters: num_beams, temperature, max_length, pad_token_id, eos_token_id, output_attentions, output_hidden_states, output_scores, return_dict_in_generate, synced_gpus.
    • \"group_beam_search\": Generates text using group beam search decoding strategy. Parameters: num_beams, diversity_penalty, max_length, pad_token_id, eos_token_id, output_attentions, output_hidden_states, output_scores, return_dict_in_generate, synced_gpus.
    • \"constrained_beam_search\": Generates text using constrained beam search decoding strategy. Parameters: num_beams, max_length, constraints, pad_token_id, eos_token_id, output_attentions, output_hidden_states, output_scores, return_dict_in_generate, synced_gpus.
    All generation parameters
    • max_length: Maximum length the generated tokens can have
    • max_new_tokens: Maximum number of tokens to generate, ignoring prompt tokens
    • min_length: Minimum length of the sequence to be generated
    • min_new_tokens: Minimum number of tokens to generate, ignoring prompt tokens
    • early_stopping: Stopping condition for beam-based methods
    • max_time: Maximum time allowed for computation in seconds
    • do_sample: Whether to use sampling for generation
    • num_beams: Number of beams for beam search
    • num_beam_groups: Number of groups for beam search to ensure diversity
    • penalty_alpha: Balances model confidence and degeneration penalty in contrastive search
    • use_cache: Whether the model should use past key/values attentions to speed up decoding
    • temperature: Modulates next token probabilities
    • top_k: Number of highest probability tokens to keep for top-k-filtering
    • top_p: Smallest set of most probable tokens with cumulative probability >= top_p
    • typical_p: Conditional probability of predicting a target token next
    • epsilon_cutoff: Tokens with a conditional probability > epsilon_cutoff will be sampled
    • eta_cutoff: Eta sampling, a hybrid of locally typical sampling and epsilon sampling
    • diversity_penalty: Penalty subtracted from a beam's score if it generates a token same as any other group
    • repetition_penalty: Penalty for repetition of ngrams
    • encoder_repetition_penalty: Penalty on sequences not in the original input
    • length_penalty: Exponential penalty to the length for beam-based generation
    • no_repeat_ngram_size: All ngrams of this size can only occur once
    • bad_words_ids: List of token ids that are not allowed to be generated
    • force_words_ids: List of token ids that must be generated
    • renormalize_logits: Renormalize the logits after applying all logits processors
    • constraints: Custom constraints for generation
    • forced_bos_token_id: Token ID to force as the first generated token
    • forced_eos_token_id: Token ID to force as the last generated token
    • remove_invalid_values: Remove possible NaN and inf outputs
    • exponential_decay_length_penalty: Exponentially increasing length penalty after a certain number of tokens
    • suppress_tokens: Tokens that will be suppressed during generation
    • begin_suppress_tokens: Tokens that will be suppressed at the beginning of generation
    • forced_decoder_ids: Mapping from generation indices to token indices that will be forced
    • sequence_bias: Maps a sequence of tokens to its bias term
    • guidance_scale: Guidance scale for classifier free guidance (CFG)
    • low_memory: Switch to sequential topk for contrastive search to reduce peak memory
    • num_return_sequences: Number of independently computed returned sequences for each batch element
    • output_attentions: Whether to return the attentions tensors of all layers
    • output_hidden_states: Whether to return the hidden states of all layers
    • output_scores: Whether to return the prediction scores
    • return_dict_in_generate: Whether to return a ModelOutput instead of a plain tuple
    • pad_token_id: The id of the padding token
    • bos_token_id: The id of the beginning-of-sequence token
    • eos_token_id: The id of the end-of-sequence token
    • max_length: The maximum length of the sequence to be generated
    • eos_token_id: End-of-sequence token ID
    • pad_token_id: Padding token ID
    • output_attentions: Return attention tensors of all attention layers if True
    • output_hidden_states: Return hidden states of all layers if True
    • output_scores: Return prediction scores if True
    • return_dict_in_generate: Return a ModelOutput instead of a plain tuple if True
    • synced_gpus: Continue running the while loop until max_length for ZeRO stage 3 if True
    • top_k: Size of the candidate set for re-ranking in contrastive search
    • penalty_alpha: Degeneration penalty; active when larger than 0
    • eos_token_id: End-of-sequence token ID(s)
    • sequential: Switch to sequential topk hidden state computation to reduce memory if True
    • do_sample: Use sampling for generation if True
    • temperature: Temperature for sampling
    • top_p: Cumulative probability for top-p-filtering
    • diversity_penalty: Penalty for reducing similarity across different beam groups
    • constraints: List of constraints to apply during beam search
    • synced_gpus: Whether to continue running the while loop until max_length (needed for ZeRO stage 3)
    "},{"location":"text/bulk/base/#geniusrise_text.base.bulk.TextBulk.load_models","title":"load_models(model_name, tokenizer_name, model_revision=None, tokenizer_revision=None, model_class='AutoModelForCausalLM', tokenizer_class='AutoTokenizer', use_cuda=False, precision='float16', quantization=0, device_map='auto', max_memory={0: '24GB'}, torchscript=False, compile=False, awq_enabled=False, flash_attention=False, better_transformers=False, **model_args)","text":"

    Loads and configures the specified model and tokenizer for text generation. It ensures the models are optimized for inference.

    Parameters:

    Name Type Description Default model_name str

    The name or path of the model to load.

    required tokenizer_name str

    The name or path of the tokenizer to load.

    required model_revision Optional[str]

    The specific model revision to load (e.g., a commit hash).

    None tokenizer_revision Optional[str]

    The specific tokenizer revision to load (e.g., a commit hash).

    None model_class str

    The class of the model to be loaded.

    'AutoModelForCausalLM' tokenizer_class str

    The class of the tokenizer to be loaded.

    'AutoTokenizer' use_cuda bool

    Flag to utilize CUDA for GPU acceleration.

    False precision str

    The desired precision for computations (\"float32\", \"float16\", etc.).

    'float16' quantization int

    The bit level for model quantization (0 for none, 8 for 8-bit quantization).

    0 device_map str | Dict | None

    The specific device(s) to use for model operations.

    'auto' max_memory Dict

    A dictionary defining the maximum memory to allocate for the model.

    {0: '24GB'} torchscript bool

    Flag to enable TorchScript for model optimization.

    False compile bool

    Flag to enable JIT compilation of the model.

    False awq_enabled bool

    Flag to enable AWQ (Adaptive Weight Quantization).

    False flash_attention bool

    Flag to enable Flash Attention optimization for faster processing.

    False better_transformers bool

    Flag to enable Better Transformers optimization for faster processing.

    False **model_args Any

    Additional arguments to pass to the model during its loading.

    {}

    Returns:

    Type Description Tuple[AutoModelForCausalLM, AutoTokenizer]

    Tuple[AutoModelForCausalLM, AutoTokenizer]: The loaded model and tokenizer ready for text generation.

    "},{"location":"text/bulk/base/#geniusrise_text.base.bulk.TextBulk.load_models_llama_cpp","title":"load_models_llama_cpp(model, filename, local_dir=None, n_gpu_layers=0, split_mode=llama_cpp.LLAMA_SPLIT_LAYER, main_gpu=0, tensor_split=None, vocab_only=False, use_mmap=True, use_mlock=False, kv_overrides=None, seed=llama_cpp.LLAMA_DEFAULT_SEED, n_ctx=512, n_batch=512, n_threads=None, n_threads_batch=None, rope_scaling_type=llama_cpp.LLAMA_ROPE_SCALING_UNSPECIFIED, rope_freq_base=0.0, rope_freq_scale=0.0, yarn_ext_factor=-1.0, yarn_attn_factor=1.0, yarn_beta_fast=32.0, yarn_beta_slow=1.0, yarn_orig_ctx=0, mul_mat_q=True, logits_all=False, embedding=False, offload_kqv=True, last_n_tokens_size=64, lora_base=None, lora_scale=1.0, lora_path=None, numa=False, chat_format=None, chat_handler=None, draft_model=None, tokenizer=None, verbose=True, **kwargs)","text":"

    Initializes and loads LLaMA model with llama.cpp backend, along with an optional tokenizer.

    Parameters:

    Name Type Description Default model str

    Huggingface ID to the LLaMA model.

    required filename Optional[str]

    A filename or glob pattern to match the model file in the repo.

    required local_dir Optional[Union[str, os.PathLike[str]]]

    The local directory to save the model to.

    None n_gpu_layers int

    Number of layers to offload to GPU. Default is 0.

    0 split_mode int

    Split mode for distributing model across GPUs.

    llama_cpp.LLAMA_SPLIT_LAYER main_gpu int

    Main GPU index.

    0 tensor_split Optional[List[float]]

    Tensor split configuration.

    None vocab_only bool

    Whether to load vocabulary only.

    False use_mmap bool

    Use memory-mapped files for model loading.

    True use_mlock bool

    Lock model data in RAM.

    False kv_overrides Optional[Dict[str, Union[bool, int, float]]]

    Key-value pairs for model overrides.

    None seed int

    Random seed for initialization.

    llama_cpp.LLAMA_DEFAULT_SEED n_ctx int

    Number of context tokens.

    512 n_batch int

    Batch size for processing prompts.

    512 n_threads Optional[int]

    Number of threads for generation.

    None n_threads_batch Optional[int]

    Number of threads for batch processing.

    None rope_scaling_type Optional[int]

    RoPE scaling type.

    llama_cpp.LLAMA_ROPE_SCALING_UNSPECIFIED rope_freq_base float

    Base frequency for RoPE.

    0.0 rope_freq_scale float

    Frequency scaling for RoPE.

    0.0 yarn_ext_factor float

    YaRN extrapolation mix factor.

    -1.0 yarn_attn_factor float

    YaRN attention factor.

    1.0 yarn_beta_fast float

    YaRN beta fast parameter.

    32.0 yarn_beta_slow float

    YaRN beta slow parameter.

    1.0 yarn_orig_ctx int

    Original context size for YaRN.

    0 mul_mat_q bool

    Whether to multiply matrices for queries.

    True logits_all bool

    Return logits for all tokens.

    False embedding bool

    Enable embedding mode only.

    False offload_kqv bool

    Offload K, Q, V matrices to GPU.

    True last_n_tokens_size int

    Size for the last_n_tokens buffer.

    64 lora_base Optional[str]

    Base model path for LoRA.

    None lora_scale float

    Scale factor for LoRA adjustments.

    1.0 lora_path Optional[str]

    Path to LoRA adjustments.

    None numa Union[bool, int]

    NUMA configuration.

    False chat_format Optional[str]

    Chat format configuration.

    None chat_handler Optional[llama_cpp.LlamaChatCompletionHandler]

    Handler for chat completions.

    None draft_model Optional[llama_cpp.LlamaDraftModel]

    Draft model for speculative decoding.

    None tokenizer Optional[PreTrainedTokenizerBase]

    Custom tokenizer instance.

    None verbose bool

    Enable verbose logging.

    True **kwargs

    Additional keyword arguments.

    {}

    Returns:

    Type Description Tuple[LlamaCPP, Optional[PreTrainedTokenizerBase]]

    Tuple[LlamaCPP, Optional[PreTrainedTokenizerBase]]: The loaded LLaMA model and tokenizer.

    "},{"location":"text/bulk/base/#geniusrise_text.base.bulk.TextBulk.load_models_vllm","title":"load_models_vllm(model, tokenizer, tokenizer_mode='auto', trust_remote_code=True, download_dir=None, load_format='auto', dtype='auto', seed=42, revision=None, tokenizer_revision=None, max_model_len=1024, quantization=None, enforce_eager=False, max_context_len_to_capture=8192, block_size=16, gpu_memory_utilization=0.9, swap_space=4, cache_dtype='auto', sliding_window=None, pipeline_parallel_size=1, tensor_parallel_size=1, worker_use_ray=False, max_parallel_loading_workers=None, disable_custom_all_reduce=False, max_num_batched_tokens=None, max_num_seqs=64, max_paddings=512, device='cuda', max_lora_rank=None, max_loras=None, max_cpu_loras=None, lora_dtype=None, lora_extra_vocab_size=0, placement_group=None, log_stats=False, batched_inference=False)","text":"

    Initializes and loads models using VLLM configurations with specific parameters.

    Parameters:

    Name Type Description Default model str

    Name or path of the Hugging Face model to use.

    required tokenizer str

    Name or path of the Hugging Face tokenizer to use.

    required tokenizer_mode str

    Tokenizer mode. \"auto\" will use the fast tokenizer if available, \"slow\" will always use the slow tokenizer.

    'auto' trust_remote_code bool

    Trust remote code (e.g., from Hugging Face) when downloading the model and tokenizer.

    True download_dir Optional[str]

    Directory to download and load the weights, default to the default cache directory of Hugging Face.

    None load_format str

    The format of the model weights to load. Options include \"auto\", \"pt\", \"safetensors\", \"npcache\", \"dummy\".

    'auto' dtype Union[str, torch.dtype]

    Data type for model weights and activations. Options include \"auto\", torch.float32, torch.float16, etc.

    'auto' seed int

    Random seed for reproducibility.

    42 revision Optional[str]

    The specific model version to use. Can be a branch name, a tag name, or a commit id.

    None code_revision Optional[str]

    The specific revision to use for the model code on Hugging Face Hub.

    required tokenizer_revision Optional[str]

    The specific tokenizer version to use.

    None max_model_len Optional[int]

    Maximum length of a sequence (including prompt and output). If None, will be derived from the model.

    1024 quantization Optional[str]

    Quantization method that was used to quantize the model weights. If None, we assume the model weights are not quantized.

    None enforce_eager bool

    Whether to enforce eager execution. If True, disables CUDA graph and always execute the model in eager mode.

    False max_context_len_to_capture Optional[int]

    Maximum context length covered by CUDA graphs. When larger, falls back to eager mode.

    8192 block_size int

    Size of a cache block in number of tokens.

    16 gpu_memory_utilization float

    Fraction of GPU memory to use for the VLLM execution.

    0.9 swap_space int

    Size of the CPU swap space per GPU (in GiB).

    4 cache_dtype str

    Data type for KV cache storage.

    'auto' sliding_window Optional[int]

    Configuration for sliding window if applicable.

    None pipeline_parallel_size int

    Number of pipeline parallel groups.

    1 tensor_parallel_size int

    Number of tensor parallel groups.

    1 worker_use_ray bool

    Whether to use Ray for model workers. Required if either pipeline_parallel_size or tensor_parallel_size is greater than 1.

    False max_parallel_loading_workers Optional[int]

    Maximum number of workers for loading the model in parallel to avoid RAM OOM.

    None disable_custom_all_reduce bool

    Disable custom all-reduce kernel and fall back to NCCL.

    False max_num_batched_tokens Optional[int]

    Maximum number of tokens to be processed in a single iteration.

    None max_num_seqs int

    Maximum number of sequences to be processed in a single iteration.

    64 max_paddings int

    Maximum number of paddings to be added to a batch.

    512 device str

    Device configuration, typically \"cuda\" or \"cpu\".

    'cuda' max_lora_rank Optional[int]

    Maximum rank for LoRA adjustments.

    None max_loras Optional[int]

    Maximum number of LoRA adjustments.

    None max_cpu_loras Optional[int]

    Maximum number of LoRA adjustments stored on CPU.

    None lora_dtype Optional[torch.dtype]

    Data type for LoRA parameters.

    None lora_extra_vocab_size Optional[int]

    Additional vocabulary size for LoRA.

    0 placement_group Optional[PlacementGroup]

    Ray placement group for distributed execution. Required for distributed execution.

    None log_stats bool

    Whether to log statistics during model operation.

    False

    Returns:

    Name Type Description LLMEngine AsyncLLMEngine | LLM

    An instance of the LLMEngine class initialized with the given configurations.

    "},{"location":"text/bulk/classification/","title":"Classification","text":"

    Bases: TextBulk

    TextClassificationBulk is designed to handle bulk text classification tasks using Hugging Face models efficiently and effectively. It allows for processing large datasets, utilizing state-of-the-art machine learning models to provide accurate classification of text data into predefined labels.

    Parameters:

    Name Type Description Default input BatchInput

    Configuration and data inputs for the batch process.

    required output BatchOutput

    Configurations for output data handling.

    required state State

    State management for the classification task.

    required **kwargs

    Arbitrary keyword arguments for extended configurations.

    {}

    Example CLI Usage:

    genius TextClassificationBulk rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\n--id cardiffnlp/twitter-roberta-base-hate-multiclass-latest-lol \\\nclassify \\\n--args \\\nmodel_name=\"cardiffnlp/twitter-roberta-base-hate-multiclass-latest\" \\\nmodel_class=\"AutoModelForSequenceClassification\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nprecision=\"bfloat16\" \\\nquantization=0 \\\ndevice_map=\"auto\" \\\nmax_memory=None \\\ntorchscript=False\n

    "},{"location":"text/bulk/classification/#geniusrise_text.classification.bulk.TextClassificationBulk.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the TextClassificationBulk class with input, output, and state configurations.

    Parameters:

    Name Type Description Default input BatchInput

    Configuration for the input data.

    required output BatchOutput

    Configuration for the output data.

    required state State

    State management for the classification task.

    required **kwargs

    Additional keyword arguments for extended functionality.

    {}"},{"location":"text/bulk/classification/#geniusrise_text.classification.bulk.TextClassificationBulk.classify","title":"classify(model_name, model_class='AutoModelForSequenceClassification', tokenizer_class='AutoTokenizer', use_cuda=False, precision='float', quantization=0, device_map='auto', max_memory={0: '24GB'}, torchscript=False, compile=False, awq_enabled=False, flash_attention=False, batch_size=32, notification_email=None, **kwargs)","text":"

    Perform bulk classification using the specified model and tokenizer. This method handles the entire classification process including loading the model, processing input data, predicting classifications, and saving the results.

    Parameters:

    Name Type Description Default model_name str

    Name or path of the model.

    required model_class str

    Class name of the model (default \"AutoModelForSequenceClassification\").

    'AutoModelForSequenceClassification' tokenizer_class str

    Class name of the tokenizer (default \"AutoTokenizer\").

    'AutoTokenizer' use_cuda bool

    Whether to use CUDA for model inference (default False).

    False precision str

    Precision for model computation (default \"float\").

    'float' quantization int

    Level of quantization for optimizing model size and speed (default 0).

    0 device_map str | Dict | None

    Specific device to use for computation (default \"auto\").

    'auto' max_memory Dict

    Maximum memory configuration for devices.

    {0: '24GB'} torchscript bool

    Whether to use a TorchScript-optimized version of the pre-trained language model. Defaults to False.

    False compile bool

    Whether to compile the model before fine-tuning. Defaults to True.

    False awq_enabled bool

    Whether to enable AWQ optimization (default False).

    False flash_attention bool

    Whether to use flash attention optimization (default False).

    False batch_size int

    Number of classifications to process simultaneously (default 32).

    32 **kwargs Any

    Arbitrary keyword arguments for model and generation configurations.

    {}"},{"location":"text/bulk/classification/#geniusrise_text.classification.bulk.TextClassificationBulk.load_dataset","title":"load_dataset(dataset_path, max_length=512, **kwargs)","text":"

    Load a classification dataset from a directory.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory.

    required max_length int

    The maximum length for tokenization. Defaults to 512.

    512

    Returns:

    Name Type Description Dataset Optional[Dataset]

    The loaded dataset.

    Raises:

    Type Description Exception

    If there was an error loading the dataset.

    "},{"location":"text/bulk/classification/#geniusrise_text.classification.bulk.TextClassificationBulk.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"text/bulk/classification/#geniusrise_text.classification.bulk.TextClassificationBulk.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\"text\": \"The text content\"}\n

    "},{"location":"text/bulk/classification/#geniusrise_text.classification.bulk.TextClassificationBulk.load_dataset--csv","title":"CSV","text":"

    Should contain 'text' columns.

    text\n\"The text content\"\n

    "},{"location":"text/bulk/classification/#geniusrise_text.classification.bulk.TextClassificationBulk.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'text' columns.

    "},{"location":"text/bulk/classification/#geniusrise_text.classification.bulk.TextClassificationBulk.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'text' keys.

    [{\"text\": \"The text content\"}]\n

    "},{"location":"text/bulk/classification/#geniusrise_text.classification.bulk.TextClassificationBulk.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'text' child elements.

    <record>\n<text>The text content</text>\n</record>\n

    "},{"location":"text/bulk/classification/#geniusrise_text.classification.bulk.TextClassificationBulk.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'text' keys.

    - text: \"The text content\"\n

    "},{"location":"text/bulk/classification/#geniusrise_text.classification.bulk.TextClassificationBulk.load_dataset--tsv","title":"TSV","text":"

    Should contain 'text' columns separated by tabs.

    "},{"location":"text/bulk/classification/#geniusrise_text.classification.bulk.TextClassificationBulk.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'text' columns.

    "},{"location":"text/bulk/classification/#geniusrise_text.classification.bulk.TextClassificationBulk.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'text' columns.

    "},{"location":"text/bulk/classification/#geniusrise_text.classification.bulk.TextClassificationBulk.load_dataset--feather","title":"Feather","text":"

    Should contain 'text' columns.

    "},{"location":"text/bulk/instruction_tuning/","title":"Instruction Tuning","text":"

    Bases: TextBulk

    InstructionBulk is a class designed to perform bulk text generation tasks using Hugging Face's instruction-tuned language models. It is optimized for large-scale text generation, providing an efficient interface to use state-of-the-art machine learning models for generating text based on a set of instructions or prompts.

    Attributes:

    Name Type Description model Any

    The loaded, pre-trained instruction-tuned language model.

    tokenizer Any

    The tokenizer for processing text compatible with the model.

    Methods

    load_dataset(dataset_path: str, max_length: int = 1024, **kwargs) -> Optional[Dataset]: Loads a dataset for text generation tasks from the specified directory.

    perform(model_name: str, **kwargs: Any) -> None: Performs bulk text generation using the specified model and tokenizer.

    Example CLI Usage:

    genius InstructionBulk rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder input/chat \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder output/chat \\\npostgres \\\n--postgres_host 127.0.0.1 \\\n--postgres_port 5432 \\\n--postgres_user postgres \\\n--postgres_password postgres \\\n--postgres_database geniusrise\\\n--postgres_table state \\\n--id mistralai/Mistral-7B-Instruct-v0.1-lol \\\nperform \\\n--args \\\nmodel_name=\"mistralai/Mistral-7B-Instruct-v0.1\" \\\nmodel_class=\"AutoModelForCausalLM\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nprecision=\"bfloat16\" \\\nquantization=0 \\\ndevice_map=\"auto\" \\\nmax_memory=None \\\ntorchscript=False \\\ndecoding_strategy=\"generate\" \\\ngeneration_max_new_tokens=100 \\\ngeneration_do_sample=true\n

    or using VLLM:

    genius InstructionBulk rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder input/chat \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder output/chat \\\nnone \\\n--id mistralai/Mistral-7B-Instruct-v0.1 \\\nperform_vllm \\\n--args \\\nmodel_name=\"mistralai/Mistral-7B-Instruct-v0.1\" \\\nuse_cuda=True \\\nprecision=\"bfloat16\" \\\nquantization=0 \\\ndevice_map=\"auto\" \\\ngeneration_temperature=0.7 \\\ngeneration_top_p=1.0 \\\ngeneration_n=1 \\\ngeneration_max_tokens=50 \\\ngeneration_stream=false \\\ngeneration_presence_penalty=0.0 \\\ngeneration_frequency_penalty=0.0\n

    or using llama.cpp:

    genius InstructionBulk rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder input/chat \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder output/chat \\\nnone \\\n--id mistralai/Mistral-7B-Instruct-v0.1 \\\nperform_llama_cpp \\\n--args \\\nmodel=\"TheBloke/Mistral-7B-Instruct-v0.2-GGUF\" \\\nfilename=\"mistral-7b-instruct-v0.2.Q4_K_M.gguf\" \\\nn_gpu_layers=35  \\\ngeneration_temperature=0.7 \\\ngeneration_top_p=0.95 \\\ngeneration_top_k=40 \\\ngeneration_max_tokens=50 \\\ngeneration_repeat_penalty=0.1\n

    "},{"location":"text/bulk/instruction_tuning/#geniusrise_text.instruction.bulk.InstructionBulk.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the InstructionBulk class with input, output, and state configurations for bulk text generation.

    Parameters:

    Name Type Description Default input BatchInput

    Configuration for input data handling.

    required output BatchOutput

    Configuration for output data handling.

    required state State

    State management for the text generation task.

    required **kwargs

    Additional keyword arguments for extended functionalities.

    {}"},{"location":"text/bulk/instruction_tuning/#geniusrise_text.instruction.bulk.InstructionBulk.load_dataset","title":"load_dataset(dataset_path, max_length=1024, **kwargs)","text":"

    Loads a dataset from the specified path. This method supports various data formats including JSON, CSV, Parquet, and others. It's designed to facilitate the bulk processing of text data for generation tasks.

    Parameters:

    Name Type Description Default dataset_path str

    Path to the directory containing the dataset files.

    required max_length int

    Maximum token length for text processing (default is 1024).

    1024 **kwargs

    Additional keyword arguments for dataset loading.

    {}

    Returns:

    Type Description Optional[Dataset]

    Optional[Dataset]: A Dataset object if loading is successful; otherwise, None.

    Raises:

    Type Description Exception

    If an error occurs during dataset loading.

    "},{"location":"text/bulk/instruction_tuning/#geniusrise_text.instruction.bulk.InstructionBulk.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"text/bulk/instruction_tuning/#geniusrise_text.instruction.bulk.InstructionBulk.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\"instruction\": \"The instruction\"}\n

    "},{"location":"text/bulk/instruction_tuning/#geniusrise_text.instruction.bulk.InstructionBulk.load_dataset--csv","title":"CSV","text":"

    Should contain 'instruction' columns.

    instruction\n\"The instruction\"\n

    "},{"location":"text/bulk/instruction_tuning/#geniusrise_text.instruction.bulk.InstructionBulk.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'instruction' columns.

    "},{"location":"text/bulk/instruction_tuning/#geniusrise_text.instruction.bulk.InstructionBulk.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'instruction' keys.

    [{\"instruction\": \"The instruction\"}]\n

    "},{"location":"text/bulk/instruction_tuning/#geniusrise_text.instruction.bulk.InstructionBulk.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'instruction' child elements.

    <record>\n<instruction>The instruction</instruction>\n</record>\n

    "},{"location":"text/bulk/instruction_tuning/#geniusrise_text.instruction.bulk.InstructionBulk.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'instruction' keys.

    - instruction: \"The instruction\"\n

    "},{"location":"text/bulk/instruction_tuning/#geniusrise_text.instruction.bulk.InstructionBulk.load_dataset--tsv","title":"TSV","text":"

    Should contain 'instruction' columns separated by tabs.

    "},{"location":"text/bulk/instruction_tuning/#geniusrise_text.instruction.bulk.InstructionBulk.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'instruction' columns.

    "},{"location":"text/bulk/instruction_tuning/#geniusrise_text.instruction.bulk.InstructionBulk.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'instruction' columns.

    "},{"location":"text/bulk/instruction_tuning/#geniusrise_text.instruction.bulk.InstructionBulk.load_dataset--feather","title":"Feather","text":"

    Should contain 'instruction' columns.

    "},{"location":"text/bulk/instruction_tuning/#geniusrise_text.instruction.bulk.InstructionBulk.perform","title":"perform(model_name, model_class='AutoModelForCausalLM', tokenizer_class='AutoTokenizer', use_cuda=False, precision='float16', quantization=0, device_map='auto', max_memory={0: '24GB'}, torchscript=False, compile=False, awq_enabled=False, flash_attention=False, decoding_strategy='generate', notification_email=None, **kwargs)","text":"

    Performs text generation in bulk using a specified instruction-tuned model. This method handles the entire process, including model loading, prompt processing, text generation, and saving the results.

    Parameters:

    Name Type Description Default model_name str

    The name or path of the instruction-tuned model.

    required model_class str

    The class of the language model. Defaults to \"AutoModelForCausalLM\".

    'AutoModelForCausalLM' tokenizer_class str

    The class of the tokenizer. Defaults to \"AutoTokenizer\".

    'AutoTokenizer' use_cuda bool

    Whether to use CUDA for model inference. Defaults to False.

    False precision str

    Precision for model computation. Defaults to \"float16\".

    'float16' quantization int

    Level of quantization for optimizing model size and speed. Defaults to 0.

    0 device_map str | Dict | None

    Specific device to use for computation. Defaults to \"auto\".

    'auto' max_memory Dict

    Maximum memory configuration for devices. Defaults to {0: \"24GB\"}.

    {0: '24GB'} torchscript bool

    Whether to use a TorchScript-optimized version of the pre-trained language model. Defaults to False.

    False compile bool

    Whether to compile the model before fine-tuning. Defaults to True.

    False awq_enabled bool

    Whether to enable AWQ optimization. Defaults to False.

    False flash_attention bool

    Whether to use flash attention optimization. Defaults to False.

    False decoding_strategy str

    Strategy for decoding the completion. Defaults to \"generate\".

    'generate' **kwargs Any

    Configuration and additional arguments for text generation such as model class, tokenizer class, precision, device map, and other generation-related parameters.

    {} Note

    Additional arguments are passed directly to the model and tokenizer initialization and the generation method.

    "},{"location":"text/bulk/instruction_tuning/#geniusrise_text.instruction.bulk.InstructionBulk.perform_llama_cpp","title":"perform_llama_cpp(model, filename=None, local_dir=None, n_gpu_layers=0, split_mode=llama_cpp.LLAMA_SPLIT_LAYER, main_gpu=0, tensor_split=None, vocab_only=False, use_mmap=True, use_mlock=False, kv_overrides=None, seed=llama_cpp.LLAMA_DEFAULT_SEED, n_ctx=512, n_batch=512, n_threads=None, n_threads_batch=None, rope_scaling_type=llama_cpp.LLAMA_ROPE_SCALING_UNSPECIFIED, rope_freq_base=0.0, rope_freq_scale=0.0, yarn_ext_factor=-1.0, yarn_attn_factor=1.0, yarn_beta_fast=32.0, yarn_beta_slow=1.0, yarn_orig_ctx=0, mul_mat_q=True, logits_all=False, embedding=False, offload_kqv=True, last_n_tokens_size=64, lora_base=None, lora_scale=1.0, lora_path=None, numa=False, chat_format=None, chat_handler=None, draft_model=None, tokenizer=None, verbose=True, notification_email=None, **kwargs)","text":"

    Performs bulk text generation using the LLaMA model with llama.cpp backend. This method handles the entire process, including model loading, prompt processing, text generation, and saving the results.

    Parameters:

    Name Type Description Default model str

    Path or identifier for the LLaMA model.

    required filename Optional[str]

    Optional filename or glob pattern to match the model file.

    None local_dir Optional[Union[str, os.PathLike[str]]]

    Local directory to save the model files.

    None n_gpu_layers int

    Number of layers to offload to GPU.

    0 split_mode int

    Split mode for distributing model across GPUs.

    llama_cpp.LLAMA_SPLIT_LAYER main_gpu int

    Main GPU index.

    0 tensor_split Optional[List[float]]

    Configuration for tensor splitting across GPUs.

    None vocab_only bool

    Whether to load only the vocabulary.

    False use_mmap bool

    Use memory-mapped files for model loading.

    True use_mlock bool

    Lock model data in RAM to prevent swapping.

    False kv_overrides Optional[Dict[str, Union[bool, int, float]]]

    Key-value pairs for overriding model config.

    None seed int

    Seed for random number generation.

    llama_cpp.LLAMA_DEFAULT_SEED n_ctx int

    Number of context tokens for generation.

    512 n_batch int

    Batch size for processing.

    512 n_threads Optional[int]

    Number of threads for generation.

    None n_threads_batch Optional[int]

    Number of threads for batch processing.

    None rope_scaling_type Optional[int]

    Scaling type for RoPE.

    llama_cpp.LLAMA_ROPE_SCALING_UNSPECIFIED rope_freq_base float

    Base frequency for RoPE.

    0.0 rope_freq_scale float

    Frequency scaling for RoPE.

    0.0 yarn_ext_factor float

    YaRN extrapolation factor.

    -1.0 yarn_attn_factor float

    YaRN attention factor.

    1.0 yarn_beta_fast float

    YaRN beta fast parameter.

    32.0 yarn_beta_slow float

    YaRN beta slow parameter.

    1.0 yarn_orig_ctx int

    Original context size for YaRN.

    0 mul_mat_q bool

    Multiply matrices for queries.

    True logits_all bool

    Return logits for all tokens.

    False embedding bool

    Enable embedding mode.

    False offload_kqv bool

    Offload K, Q, V matrices to GPU.

    True last_n_tokens_size int

    Size for the last_n_tokens buffer.

    64 lora_base Optional[str]

    Base model path for LoRA.

    None lora_scale float

    Scale factor for LoRA adjustments.

    1.0 lora_path Optional[str]

    Path for LoRA adjustments.

    None numa Union[bool, int]

    NUMA configuration.

    False chat_format Optional[str]

    Chat format configuration.

    None chat_handler Optional[llama_cpp.llama_chat_format.LlamaChatCompletionHandler]

    Handler for chat completions.

    None draft_model Optional[llama_cpp.LlamaDraftModel]

    Draft model for speculative decoding.

    None tokenizer Optional[PreTrainedTokenizerBase]

    Custom tokenizer instance.

    None verbose bool

    Enable verbose logging.

    True notification_email Optional[str]

    Email to send notifications upon completion.

    None **kwargs

    Additional arguments for model loading and text generation.

    {}"},{"location":"text/bulk/instruction_tuning/#geniusrise_text.instruction.bulk.InstructionBulk.perform_vllm","title":"perform_vllm(model_name, use_cuda=False, precision='float16', quantization=0, device_map='auto', vllm_tokenizer_mode='auto', vllm_download_dir=None, vllm_load_format='auto', vllm_seed=42, vllm_max_model_len=1024, vllm_enforce_eager=False, vllm_max_context_len_to_capture=8192, vllm_block_size=16, vllm_gpu_memory_utilization=0.9, vllm_swap_space=4, vllm_sliding_window=None, vllm_pipeline_parallel_size=1, vllm_tensor_parallel_size=1, vllm_worker_use_ray=False, vllm_max_parallel_loading_workers=None, vllm_disable_custom_all_reduce=False, vllm_max_num_batched_tokens=None, vllm_max_num_seqs=64, vllm_max_paddings=512, vllm_max_lora_rank=None, vllm_max_loras=None, vllm_max_cpu_loras=None, vllm_lora_extra_vocab_size=0, vllm_placement_group=None, vllm_log_stats=False, notification_email=None, batch_size=32, **kwargs)","text":"

    Performs bulk text generation using the Versatile Language Learning Model (VLLM) with specified parameters for fine-tuning model behavior, including quantization and parallel processing settings. This method is designed to process large datasets efficiently by leveraging VLLM capabilities for generating high-quality text completions based on provided prompts.

    Parameters:

    Name Type Description Default model_name str

    The name or path of the VLLM model to use for text generation.

    required use_cuda bool

    Flag indicating whether to use CUDA for GPU acceleration.

    False precision str

    Precision of computations, can be \"float16\", \"bfloat16\", etc.

    'float16' quantization int

    Level of quantization for model weights, 0 for none.

    0 device_map str | Dict | None

    Specific device(s) to use for model inference.

    'auto' vllm_tokenizer_mode str

    Mode of the tokenizer (\"auto\", \"fast\", or \"slow\").

    'auto' vllm_download_dir Optional[str]

    Directory to download and load the model and tokenizer.

    None vllm_load_format str

    Format to load the model, e.g., \"auto\", \"pt\".

    'auto' vllm_seed int

    Seed for random number generation.

    42 vllm_max_model_len int

    Maximum sequence length the model can handle.

    1024 vllm_enforce_eager bool

    Enforce eager execution instead of using optimization techniques.

    False vllm_max_context_len_to_capture int

    Maximum context length for CUDA graph capture.

    8192 vllm_block_size int

    Block size for caching mechanism.

    16 vllm_gpu_memory_utilization float

    Fraction of GPU memory to use.

    0.9 vllm_swap_space int

    Amount of swap space to use in GiB.

    4 vllm_sliding_window Optional[int]

    Size of the sliding window for processing.

    None vllm_pipeline_parallel_size int

    Number of pipeline parallel groups.

    1 vllm_tensor_parallel_size int

    Number of tensor parallel groups.

    1 vllm_worker_use_ray bool

    Whether to use Ray for model workers.

    False vllm_max_parallel_loading_workers Optional[int]

    Maximum number of workers for parallel loading.

    None vllm_disable_custom_all_reduce bool

    Disable custom all-reduce kernel and fall back to NCCL.

    False vllm_max_num_batched_tokens Optional[int]

    Maximum number of tokens to be processed in a single iteration.

    None vllm_max_num_seqs int

    Maximum number of sequences to be processed in a single iteration.

    64 vllm_max_paddings int

    Maximum number of paddings to be added to a batch.

    512 vllm_max_lora_rank Optional[int]

    Maximum rank for LoRA adjustments.

    None vllm_max_loras Optional[int]

    Maximum number of LoRA adjustments.

    None vllm_max_cpu_loras Optional[int]

    Maximum number of LoRA adjustments stored on CPU.

    None vllm_lora_extra_vocab_size int

    Additional vocabulary size for LoRA.

    0 vllm_placement_group Optional[dict]

    Ray placement group for distributed execution.

    None vllm_log_stats bool

    Whether to log statistics during model operation.

    False notification_email Optional[str]

    Email to send notifications upon completion.

    None batch_size int

    Number of prompts to process in each batch for efficient memory usage.

    32 **kwargs Any

    Additional keyword arguments for generation settings like temperature, top_p, etc.

    {}

    This method automates the loading of large datasets, generation of text completions, and saving results, facilitating efficient and scalable text generation tasks.

    "},{"location":"text/bulk/language_model/","title":"Language Model","text":"

    Bases: TextBulk

    LanguageModelBulk is designed for large-scale text generation using Hugging Face language models in a bulk processing manner. It's particularly useful for tasks such as bulk content creation, summarization, or any other scenario where large datasets need to be processed with a language model.

    Attributes:

    Name Type Description model Any

    The loaded language model used for text generation.

    tokenizer Any

    The tokenizer corresponding to the language model, used for processing input text.

    Parameters:

    Name Type Description Default input BatchInput

    Configuration for the input data.

    required output BatchOutput

    Configuration for the output data.

    required state State

    State management for the API.

    required **kwargs Any

    Arbitrary keyword arguments for extended functionality.

    {}

    CLI Usage Example:

    genius LanguageModelBulk rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder input/lm \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder output/lm \\\npostgres \\\n--postgres_host 127.0.0.1 \\\n--postgres_port 5432 \\\n--postgres_user postgres \\\n--postgres_password postgres \\\n--postgres_database geniusrise\\\n--postgres_table state \\\n--id mistralai/Mistral-7B-Instruct-v0.1-lol \\\ncomplete \\\n--args \\\nmodel_name=\"mistralai/Mistral-7B-Instruct-v0.1\" \\\nmodel_class=\"AutoModelForCausalLM\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nprecision=\"bfloat16\" \\\nquantization=0 \\\ndevice_map=\"auto\" \\\nmax_memory=None \\\ntorchscript=False \\\ndecoding_strategy=\"generate\" \\\ngeneration_max_new_tokens=100 \\\ngeneration_do_sample=true\n

    or using VLLM:

    genius LanguageModelBulk rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder input/lm \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder output/lm \\\nnone \\\n--id mistralai/Mistral-7B-v0.1 \\\ncomplete_vllm \\\n--args \\\nmodel_name=\"mistralai/Mistral-7B-v0.1\" \\\nuse_cuda=True \\\nprecision=\"bfloat16\" \\\nquantization=0 \\\ndevice_map=\"auto\" \\\nvllm_enforce_eager=True \\\ngeneration_temperature=0.7 \\\ngeneration_top_p=1.0 \\\ngeneration_n=1 \\\ngeneration_max_tokens=50 \\\ngeneration_stream=false \\\ngeneration_presence_penalty=0.0 \\\ngeneration_frequency_penalty=0.0\n

    or using llama.cpp:

    genius LanguageModelBulk rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder input/chat \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder output/chat \\\nnone \\\ncomplete_llama_cpp \\\n--args \\\nmodel=\"TheBloke/Mistral-7B-v0.1-GGUF\" \\\nfilename=\"mistral-7b-v0.1.Q4_K_M.gguf\" \\\nn_gpu_layers=35  \\\nn_ctx=32768 \\\ngeneration_temperature=0.7 \\\ngeneration_top_p=0.95 \\\ngeneration_top_k=40 \\\ngeneration_max_tokens=50 \\\ngeneration_repeat_penalty=0.1\n

    "},{"location":"text/bulk/language_model/#geniusrise_text.language_model.bulk.LanguageModelBulk.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the LanguageModelBulk object with the specified configurations for input, output, and state.

    Parameters:

    Name Type Description Default input BatchInput

    Configuration and data inputs for the bulk process.

    required output BatchOutput

    Configurations for output data handling.

    required state State

    State management for the bulk process.

    required **kwargs Any

    Additional keyword arguments for extended configurations.

    {}"},{"location":"text/bulk/language_model/#geniusrise_text.language_model.bulk.LanguageModelBulk.complete","title":"complete(model_name, model_class='AutoModelForCausalLM', tokenizer_class='AutoTokenizer', use_cuda=False, precision='float16', quantization=0, device_map='auto', max_memory={0: '24GB'}, torchscript=False, compile=False, awq_enabled=False, flash_attention=False, decoding_strategy='generate', notification_email=None, **kwargs)","text":"

    Performs text completion on the loaded dataset using the specified model and tokenizer. The method handles the entire process, including model loading, text generation, and saving the results.

    Parameters:

    Name Type Description Default model_name str

    The name of the language model to use for text completion.

    required model_class str

    The class of the language model. Defaults to \"AutoModelForCausalLM\".

    'AutoModelForCausalLM' tokenizer_class str

    The class of the tokenizer. Defaults to \"AutoTokenizer\".

    'AutoTokenizer' use_cuda bool

    Whether to use CUDA for model inference. Defaults to False.

    False precision str

    Precision for model computation. Defaults to \"float16\".

    'float16' quantization int

    Level of quantization for optimizing model size and speed. Defaults to 0.

    0 device_map str | Dict | None

    Specific device to use for computation. Defaults to \"auto\".

    'auto' max_memory Dict

    Maximum memory configuration for devices. Defaults to {0: \"24GB\"}.

    {0: '24GB'} torchscript bool

    Whether to use a TorchScript-optimized version of the pre-trained language model. Defaults to False.

    False compile bool

    Whether to compile the model before fine-tuning. Defaults to True.

    False awq_enabled bool

    Whether to enable AWQ optimization. Defaults to False.

    False flash_attention bool

    Whether to use flash attention optimization. Defaults to False.

    False decoding_strategy str

    Strategy for decoding the completion. Defaults to \"generate\".

    'generate' **kwargs Any

    Additional keyword arguments for text generation.

    {}"},{"location":"text/bulk/language_model/#geniusrise_text.language_model.bulk.LanguageModelBulk.complete_llama_cpp","title":"complete_llama_cpp(model, filename=None, local_dir=None, n_gpu_layers=0, split_mode=llama_cpp.LLAMA_SPLIT_LAYER, main_gpu=0, tensor_split=None, vocab_only=False, use_mmap=True, use_mlock=False, kv_overrides=None, seed=llama_cpp.LLAMA_DEFAULT_SEED, n_ctx=512, n_batch=512, n_threads=None, n_threads_batch=None, rope_scaling_type=llama_cpp.LLAMA_ROPE_SCALING_UNSPECIFIED, rope_freq_base=0.0, rope_freq_scale=0.0, yarn_ext_factor=-1.0, yarn_attn_factor=1.0, yarn_beta_fast=32.0, yarn_beta_slow=1.0, yarn_orig_ctx=0, mul_mat_q=True, logits_all=False, embedding=False, offload_kqv=True, last_n_tokens_size=64, lora_base=None, lora_scale=1.0, lora_path=None, numa=False, chat_format=None, chat_handler=None, draft_model=None, tokenizer=None, verbose=True, notification_email=None, **kwargs)","text":"

    Performs bulk text generation using the LLaMA model with llama.cpp backend. This method handles the entire process, including model loading, prompt processing, text generation, and saving the results.

    Parameters:

    Name Type Description Default model str

    Path or identifier for the LLaMA model.

    required filename Optional[str]

    Optional filename or glob pattern to match the model file.

    None local_dir Optional[Union[str, os.PathLike[str]]]

    Local directory to save the model files.

    None n_gpu_layers int

    Number of layers to offload to GPU.

    0 split_mode int

    Split mode for distributing model across GPUs.

    llama_cpp.LLAMA_SPLIT_LAYER main_gpu int

    Main GPU index.

    0 tensor_split Optional[List[float]]

    Configuration for tensor splitting across GPUs.

    None vocab_only bool

    Whether to load only the vocabulary.

    False use_mmap bool

    Use memory-mapped files for model loading.

    True use_mlock bool

    Lock model data in RAM to prevent swapping.

    False kv_overrides Optional[Dict[str, Union[bool, int, float]]]

    Key-value pairs for overriding model config.

    None seed int

    Seed for random number generation.

    llama_cpp.LLAMA_DEFAULT_SEED n_ctx int

    Number of context tokens for generation.

    512 n_batch int

    Batch size for processing.

    512 n_threads Optional[int]

    Number of threads for generation.

    None n_threads_batch Optional[int]

    Number of threads for batch processing.

    None rope_scaling_type Optional[int]

    Scaling type for RoPE.

    llama_cpp.LLAMA_ROPE_SCALING_UNSPECIFIED rope_freq_base float

    Base frequency for RoPE.

    0.0 rope_freq_scale float

    Frequency scaling for RoPE.

    0.0 yarn_ext_factor float

    YaRN extrapolation factor.

    -1.0 yarn_attn_factor float

    YaRN attention factor.

    1.0 yarn_beta_fast float

    YaRN beta fast parameter.

    32.0 yarn_beta_slow float

    YaRN beta slow parameter.

    1.0 yarn_orig_ctx int

    Original context size for YaRN.

    0 mul_mat_q bool

    Multiply matrices for queries.

    True logits_all bool

    Return logits for all tokens.

    False embedding bool

    Enable embedding mode.

    False offload_kqv bool

    Offload K, Q, V matrices to GPU.

    True last_n_tokens_size int

    Size for the last_n_tokens buffer.

    64 lora_base Optional[str]

    Base model path for LoRA.

    None lora_scale float

    Scale factor for LoRA adjustments.

    1.0 lora_path Optional[str]

    Path for LoRA adjustments.

    None numa Union[bool, int]

    NUMA configuration.

    False chat_format Optional[str]

    Chat format configuration.

    None chat_handler Optional[llama_cpp.llama_chat_format.LlamaChatCompletionHandler]

    Handler for chat completions.

    None draft_model Optional[llama_cpp.LlamaDraftModel]

    Draft model for speculative decoding.

    None tokenizer Optional[PreTrainedTokenizerBase]

    Custom tokenizer instance.

    None verbose bool

    Enable verbose logging.

    True notification_email Optional[str]

    Email to send notifications upon completion.

    None **kwargs

    Additional arguments for model loading and text generation.

    {}"},{"location":"text/bulk/language_model/#geniusrise_text.language_model.bulk.LanguageModelBulk.complete_vllm","title":"complete_vllm(model_name, use_cuda=False, precision='float16', quantization=0, device_map='auto', vllm_tokenizer_mode='auto', vllm_download_dir=None, vllm_load_format='auto', vllm_seed=42, vllm_max_model_len=1024, vllm_enforce_eager=False, vllm_max_context_len_to_capture=8192, vllm_block_size=16, vllm_gpu_memory_utilization=0.9, vllm_swap_space=4, vllm_sliding_window=None, vllm_pipeline_parallel_size=1, vllm_tensor_parallel_size=1, vllm_worker_use_ray=False, vllm_max_parallel_loading_workers=None, vllm_disable_custom_all_reduce=False, vllm_max_num_batched_tokens=None, vllm_max_num_seqs=64, vllm_max_paddings=512, vllm_max_lora_rank=None, vllm_max_loras=None, vllm_max_cpu_loras=None, vllm_lora_extra_vocab_size=0, vllm_placement_group=None, vllm_log_stats=False, notification_email=None, batch_size=32, **kwargs)","text":"

    Performs bulk text generation using the Versatile Language Learning Model (VLLM) with specified parameters for fine-tuning model behavior, including quantization and parallel processing settings. This method is designed to process large datasets efficiently by leveraging VLLM capabilities for generating high-quality text completions based on provided prompts.

    Parameters:

    Name Type Description Default model_name str

    The name or path of the VLLM model to use for text generation.

    required use_cuda bool

    Flag indicating whether to use CUDA for GPU acceleration.

    False precision str

    Precision of computations, can be \"float16\", \"bfloat16\", etc.

    'float16' quantization int

    Level of quantization for model weights, 0 for none.

    0 device_map str | Dict | None

    Specific device(s) to use for model inference.

    'auto' vllm_tokenizer_mode str

    Mode of the tokenizer (\"auto\", \"fast\", or \"slow\").

    'auto' vllm_download_dir Optional[str]

    Directory to download and load the model and tokenizer.

    None vllm_load_format str

    Format to load the model, e.g., \"auto\", \"pt\".

    'auto' vllm_seed int

    Seed for random number generation.

    42 vllm_max_model_len int

    Maximum sequence length the model can handle.

    1024 vllm_enforce_eager bool

    Enforce eager execution instead of using optimization techniques.

    False vllm_max_context_len_to_capture int

    Maximum context length for CUDA graph capture.

    8192 vllm_block_size int

    Block size for caching mechanism.

    16 vllm_gpu_memory_utilization float

    Fraction of GPU memory to use.

    0.9 vllm_swap_space int

    Amount of swap space to use in GiB.

    4 vllm_sliding_window Optional[int]

    Size of the sliding window for processing.

    None vllm_pipeline_parallel_size int

    Number of pipeline parallel groups.

    1 vllm_tensor_parallel_size int

    Number of tensor parallel groups.

    1 vllm_worker_use_ray bool

    Whether to use Ray for model workers.

    False vllm_max_parallel_loading_workers Optional[int]

    Maximum number of workers for parallel loading.

    None vllm_disable_custom_all_reduce bool

    Disable custom all-reduce kernel and fall back to NCCL.

    False vllm_max_num_batched_tokens Optional[int]

    Maximum number of tokens to be processed in a single iteration.

    None vllm_max_num_seqs int

    Maximum number of sequences to be processed in a single iteration.

    64 vllm_max_paddings int

    Maximum number of paddings to be added to a batch.

    512 vllm_max_lora_rank Optional[int]

    Maximum rank for LoRA adjustments.

    None vllm_max_loras Optional[int]

    Maximum number of LoRA adjustments.

    None vllm_max_cpu_loras Optional[int]

    Maximum number of LoRA adjustments stored on CPU.

    None vllm_lora_extra_vocab_size int

    Additional vocabulary size for LoRA.

    0 vllm_placement_group Optional[dict]

    Ray placement group for distributed execution.

    None vllm_log_stats bool

    Whether to log statistics during model operation.

    False notification_email Optional[str]

    Email to send notifications upon completion.

    None batch_size int

    Number of prompts to process in each batch for efficient memory usage.

    32 **kwargs Any

    Additional keyword arguments for generation settings like temperature, top_p, etc.

    {}

    This method automates the loading of large datasets, generation of text completions, and saving results, facilitating efficient and scalable text generation tasks.

    "},{"location":"text/bulk/language_model/#geniusrise_text.language_model.bulk.LanguageModelBulk.load_dataset","title":"load_dataset(dataset_path, max_length=512, **kwargs)","text":"

    Load a completion dataset from a directory.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory.

    required max_length int

    The maximum length for tokenization. Defaults to 512.

    512 **kwargs

    Additional keyword arguments to pass to the underlying dataset loading functions.

    {}

    Returns:

    Name Type Description Dataset Optional[Dataset]

    The loaded dataset.

    Raises:

    Type Description Exception

    If there was an error loading the dataset.

    "},{"location":"text/bulk/language_model/#geniusrise_text.language_model.bulk.LanguageModelBulk.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"text/bulk/language_model/#geniusrise_text.language_model.bulk.LanguageModelBulk.load_dataset--dataset-files-saved-by-hugging-face-datasets-library","title":"Dataset files saved by Hugging Face datasets library","text":"

    The directory should contain 'dataset_info.json' and other related files.

    "},{"location":"text/bulk/language_model/#geniusrise_text.language_model.bulk.LanguageModelBulk.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\"text\": \"The text content\"}\n

    "},{"location":"text/bulk/language_model/#geniusrise_text.language_model.bulk.LanguageModelBulk.load_dataset--csv","title":"CSV","text":"

    Should contain 'text' column.

    text\n\"The text content\"\n

    "},{"location":"text/bulk/language_model/#geniusrise_text.language_model.bulk.LanguageModelBulk.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'text' column.

    "},{"location":"text/bulk/language_model/#geniusrise_text.language_model.bulk.LanguageModelBulk.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'text' key.

    [{\"text\": \"The text content\"}]\n

    "},{"location":"text/bulk/language_model/#geniusrise_text.language_model.bulk.LanguageModelBulk.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'text' child element.

    <record>\n<text>The text content</text>\n</record>\n

    "},{"location":"text/bulk/language_model/#geniusrise_text.language_model.bulk.LanguageModelBulk.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'text' key.

    - text: \"The text content\"\n

    "},{"location":"text/bulk/language_model/#geniusrise_text.language_model.bulk.LanguageModelBulk.load_dataset--tsv","title":"TSV","text":"

    Should contain 'text' column separated by tabs.

    "},{"location":"text/bulk/language_model/#geniusrise_text.language_model.bulk.LanguageModelBulk.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'text' column.

    "},{"location":"text/bulk/language_model/#geniusrise_text.language_model.bulk.LanguageModelBulk.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'text' column.

    "},{"location":"text/bulk/language_model/#geniusrise_text.language_model.bulk.LanguageModelBulk.load_dataset--feather","title":"Feather","text":"

    Should contain 'text' column.

    "},{"location":"text/bulk/ner/","title":"Named Entity Recognition","text":"

    Bases: TextBulk

    NamedEntityRecognitionBulk is a class designed for bulk processing of Named Entity Recognition (NER) tasks. It leverages state-of-the-art NER models from Hugging Face's transformers library to identify and classify entities such as person names, locations, organizations, and other types of entities from a large corpus of text.

    This class provides functionalities to load large datasets, configure NER models, and perform entity recognition in bulk, making it suitable for processing large volumes of text data efficiently.

    Attributes:

    Name Type Description model Any

    The NER model loaded for entity recognition tasks.

    tokenizer Any

    The tokenizer used for text pre-processing in alignment with the model.

    Example CLI Usage:

    genius NamedEntityRecognitionBulk rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\n--id dslim/bert-large-NER-lol \\\nrecognize_entities \\\n--args \\\nmodel_name=\"dslim/bert-large-NER\" \\\nmodel_class=\"AutoModelForTokenClassification\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nprecision=\"float\" \\\nquantization=0 \\\ndevice_map=\"cuda:0\" \\\nmax_memory=None \\\ntorchscript=False\n

    "},{"location":"text/bulk/ner/#geniusrise_text.ner.bulk.NamedEntityRecognitionBulk.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the NamedEntityRecognitionBulk class with specified input, output, and state configurations. Sets up the NER model and tokenizer for bulk entity recognition tasks.

    Parameters:

    Name Type Description Default input BatchInput

    The input data configuration.

    required output BatchOutput

    The output data configuration.

    required state State

    The state management for the API.

    required **kwargs Any

    Additional keyword arguments for extended functionality.

    {}"},{"location":"text/bulk/ner/#geniusrise_text.ner.bulk.NamedEntityRecognitionBulk.load_dataset","title":"load_dataset(dataset_path, **kwargs)","text":"

    Loads a dataset from the specified directory path. The method supports various data formats and structures, ensuring that the dataset is properly formatted for NER tasks.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory.

    required **kwargs Any

    Additional keyword arguments to handle specific dataset loading scenarios.

    {}

    Returns:

    Type Description Optional[Dataset]

    Optional[Dataset]: The loaded dataset or None if an error occurs during loading.

    "},{"location":"text/bulk/ner/#geniusrise_text.ner.bulk.NamedEntityRecognitionBulk.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"text/bulk/ner/#geniusrise_text.ner.bulk.NamedEntityRecognitionBulk.load_dataset--hugging-face-dataset","title":"Hugging Face Dataset","text":"

    Dataset files saved by the Hugging Face datasets library.

    "},{"location":"text/bulk/ner/#geniusrise_text.ner.bulk.NamedEntityRecognitionBulk.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\"tokens\": [\"token1\", \"token2\", ...]}\n

    "},{"location":"text/bulk/ner/#geniusrise_text.ner.bulk.NamedEntityRecognitionBulk.load_dataset--csv","title":"CSV","text":"

    Should contain 'tokens' columns.

    tokens\n\"['token1', 'token2', ...]\"\n

    "},{"location":"text/bulk/ner/#geniusrise_text.ner.bulk.NamedEntityRecognitionBulk.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'tokens' columns.

    "},{"location":"text/bulk/ner/#geniusrise_text.ner.bulk.NamedEntityRecognitionBulk.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'tokens' keys.

    [{\"tokens\": [\"token1\", \"token2\", ...]}]\n

    "},{"location":"text/bulk/ner/#geniusrise_text.ner.bulk.NamedEntityRecognitionBulk.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'tokens' child elements.

    <record>\n<tokens>token1 token2 ...</tokens>\n</record>\n

    "},{"location":"text/bulk/ner/#geniusrise_text.ner.bulk.NamedEntityRecognitionBulk.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'tokens' keys.

    - tokens: [\"token1\", \"token2\", ...]\n

    "},{"location":"text/bulk/ner/#geniusrise_text.ner.bulk.NamedEntityRecognitionBulk.load_dataset--tsv","title":"TSV","text":"

    Should contain 'tokens' columns separated by tabs.

    "},{"location":"text/bulk/ner/#geniusrise_text.ner.bulk.NamedEntityRecognitionBulk.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'tokens' columns.

    "},{"location":"text/bulk/ner/#geniusrise_text.ner.bulk.NamedEntityRecognitionBulk.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'tokens' columns.

    "},{"location":"text/bulk/ner/#geniusrise_text.ner.bulk.NamedEntityRecognitionBulk.load_dataset--feather","title":"Feather","text":"

    Should contain 'tokens' columns.

    "},{"location":"text/bulk/ner/#geniusrise_text.ner.bulk.NamedEntityRecognitionBulk.recognize_entities","title":"recognize_entities(model_name, max_length=512, model_class='AutoModelForSeq2SeqLM', tokenizer_class='AutoTokenizer', use_cuda=False, precision='float16', quantization=0, device_map='auto', max_memory={0: '24GB'}, torchscript=False, compile=False, awq_enabled=False, flash_attention=False, batch_size=32, notification_email=None, **kwargs)","text":"

    Performs bulk named entity recognition on the loaded dataset. The method processes the text in batches, applying the NER model to recognize entities.

    Parameters:

    Name Type Description Default model_name str

    The name or path of the NER model.

    required max_length int

    The maximum sequence length for the tokenizer.

    512 model_class str

    The class of the model, defaults to \"AutoModelForTokenClassification\".

    'AutoModelForSeq2SeqLM' tokenizer_class str

    The class of the tokenizer, defaults to \"AutoTokenizer\".

    'AutoTokenizer' use_cuda bool

    Whether to use CUDA for model inference, defaults to False.

    False precision str

    Model computation precision, defaults to \"float16\".

    'float16' quantization int

    Level of quantization for model size and speed optimization, defaults to 0.

    0 device_map str | Dict | None

    Specific device configuration for computation, defaults to \"auto\".

    'auto' max_memory Dict

    Maximum memory configuration for the devices.

    {0: '24GB'} torchscript bool

    Whether to use a TorchScript-optimized version of the pre-trained language model. Defaults to False.

    False compile bool

    Whether to compile the model before fine-tuning. Defaults to True.

    False awq_enabled bool

    Whether to enable AWQ optimization, defaults to False.

    False flash_attention bool

    Whether to use flash attention optimization, defaults to False.

    False batch_size int

    Number of documents to process simultaneously, defaults to 32.

    32 **kwargs Any

    Arbitrary keyword arguments for additional configuration.

    {}

    Returns:

    Name Type Description None None

    The method processes the dataset and saves the predictions without returning any value.

    "},{"location":"text/bulk/nli/","title":"Natural Language Inference","text":"

    Bases: TextBulk

    The NLIBulk class provides functionality for large-scale natural language inference (NLI) processing using Hugging Face transformers. It allows users to load datasets, configure models, and perform inference on batches of premise-hypothesis pairs.

    Attributes:

    Name Type Description input BatchInput

    Configuration and data inputs for the batch process.

    output BatchOutput

    Configurations for output data handling.

    state State

    State management for the inference task.

    Example CLI Usage:

    genius NLIBulk rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder input/nli \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder output/nli \\\npostgres \\\n--postgres_host 127.0.0.1 \\\n--postgres_port 5432 \\\n--postgres_user postgres \\\n--postgres_password postgres \\\n--postgres_database geniusrise\\\n--postgres_table state \\\n--id MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7-lol \\\ninfer \\\n--args \\\nmodel_name=\"MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7\" \\\nmodel_class=\"AutoModelForSequenceClassification\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nprecision=\"float\" \\\nquantization=0 \\\ndevice_map=\"cuda:0\" \\\nmax_memory=None \\\ntorchscript=False\n

    "},{"location":"text/bulk/nli/#geniusrise_text.nli.bulk.NLIBulk.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the NLIBulk class with the specified input, output, and state configurations.

    Parameters:

    Name Type Description Default input BatchInput

    The input data.

    required output BatchOutput

    The output data.

    required state State

    The state data.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"text/bulk/nli/#geniusrise_text.nli.bulk.NLIBulk.infer","title":"infer(model_name, max_length=512, model_class='AutoModelForSeq2SeqLM', tokenizer_class='AutoTokenizer', use_cuda=False, precision='float16', quantization=0, device_map='auto', max_memory={0: '24GB'}, torchscript=False, compile=False, awq_enabled=False, flash_attention=False, batch_size=32, notification_email=None, **kwargs)","text":"

    Performs NLI inference on a loaded dataset using the specified model. The method processes the data in batches and saves the results to the configured output path.

    Parameters:

    Name Type Description Default model_name str

    Name or path of the NLI model.

    required max_length int

    Maximum length of the sequences for tokenization purposes. Defaults to 512.

    512 model_class str

    Class name of the model (e.g., \"AutoModelForSequenceClassification\"). Defaults to \"AutoModelForSeq2SeqLM\".

    'AutoModelForSeq2SeqLM' tokenizer_class str

    Class name of the tokenizer (e.g., \"AutoTokenizer\"). Defaults to \"AutoTokenizer\".

    'AutoTokenizer' use_cuda bool

    Whether to use CUDA for model inference. Defaults to False.

    False precision str

    Precision for model computation (e.g., \"float16\"). Defaults to \"float16\".

    'float16' quantization int

    Level of quantization for optimizing model size and speed. Defaults to 0.

    0 device_map str | Dict | None

    Specific device to use for computation. Defaults to \"auto\".

    'auto' max_memory Dict

    Maximum memory configuration for devices. Defaults to {0: \"24GB\"}.

    {0: '24GB'} torchscript bool

    Whether to use a TorchScript-optimized version of the pre-trained language model. Defaults to False.

    False compile bool

    Whether to compile the model before fine-tuning. Defaults to True.

    False awq_enabled bool

    Whether to enable AWQ optimization. Defaults to False.

    False flash_attention bool

    Whether to use flash attention optimization. Defaults to False.

    False batch_size int

    Number of premise-hypothesis pairs to process simultaneously. Defaults to 32.

    32 **kwargs Any

    Arbitrary keyword arguments for model and generation configurations.

    {}

    ```

    "},{"location":"text/bulk/nli/#geniusrise_text.nli.bulk.NLIBulk.load_dataset","title":"load_dataset(dataset_path, max_length=512, **kwargs)","text":"

    Load a commonsense reasoning dataset from a directory.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory or file.

    required max_length int

    Maximum length of text sequences for tokenization purposes. Defaults to 512.

    512 **kwargs

    Additional keyword arguments.

    {}

    Returns:

    Name Type Description Dataset Optional[Dataset]

    The loaded dataset.

    Raises:

    Type Description Exception

    If there was an error loading the dataset.

    "},{"location":"text/bulk/nli/#geniusrise_text.nli.bulk.NLIBulk.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"text/bulk/nli/#geniusrise_text.nli.bulk.NLIBulk.load_dataset--hugging-face-dataset","title":"Hugging Face Dataset","text":"

    Dataset files saved by the Hugging Face datasets library.

    "},{"location":"text/bulk/nli/#geniusrise_text.nli.bulk.NLIBulk.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\"premise\": \"The premise text\", \"hypothesis\": \"The hypothesis text\"}\n

    "},{"location":"text/bulk/nli/#geniusrise_text.nli.bulk.NLIBulk.load_dataset--csv","title":"CSV","text":"

    Should contain 'premise' and 'hypothesis' columns.

    premise,hypothesis\n\"The premise text\",\"The hypothesis text\"\n

    "},{"location":"text/bulk/nli/#geniusrise_text.nli.bulk.NLIBulk.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'premise' and 'hypothesis' columns.

    "},{"location":"text/bulk/nli/#geniusrise_text.nli.bulk.NLIBulk.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'premise' and 'hypothesis' keys.

    [{\"premise\": \"The premise text\", \"hypothesis\": \"The hypothesis text\"}]\n

    "},{"location":"text/bulk/nli/#geniusrise_text.nli.bulk.NLIBulk.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'premise' and 'hypothesis' child elements.

    <record>\n<premise>The premise text</premise>\n<hypothesis>The hypothesis text</hypothesis>\n</record>\n

    "},{"location":"text/bulk/nli/#geniusrise_text.nli.bulk.NLIBulk.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'premise' and 'hypothesis' keys.

    - premise: \"The premise text\"\nhypothesis: \"The hypothesis text\"\n

    "},{"location":"text/bulk/nli/#geniusrise_text.nli.bulk.NLIBulk.load_dataset--tsv","title":"TSV","text":"

    Should contain 'premise' and 'hypothesis' columns separated by tabs.

    "},{"location":"text/bulk/nli/#geniusrise_text.nli.bulk.NLIBulk.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'premise' and 'hypothesis' columns.

    "},{"location":"text/bulk/nli/#geniusrise_text.nli.bulk.NLIBulk.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'premise' and 'hypothesis' columns.

    "},{"location":"text/bulk/nli/#geniusrise_text.nli.bulk.NLIBulk.load_dataset--feather","title":"Feather","text":"

    Should contain 'premise' and 'hypothesis' columns.

    "},{"location":"text/bulk/question_answering/","title":"Question Answering","text":"

    Bases: TextBulk

    QABulk is a class designed for managing bulk question-answering tasks using Hugging Face models. It is capable of handling both traditional text-based QA and table-based QA (using TAPAS and TAPEX models), providing a versatile solution for automated question answering at scale.

    Parameters:

    Name Type Description Default input BatchInput

    Configuration and data inputs for batch processing.

    required output BatchOutput

    Configurations for output data handling.

    required state State

    State management for the bulk QA task.

    required **kwargs

    Arbitrary keyword arguments for extended functionality.

    {}

    Example CLI Usage:

    # For traditional text-based QA:\ngenius QABulk rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder input/qa-traditional \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder output/qa-traditional \\\npostgres \\\n--postgres_host 127.0.0.1 \\\n--postgres_port 5432 \\\n--postgres_user postgres \\\n--postgres_password postgres \\\n--postgres_database geniusrise\\\n--postgres_table state \\\n--id distilbert-base-uncased-distilled-squad-lol \\\nanswer_questions \\\n--args \\\nmodel_name=\"distilbert-base-uncased-distilled-squad\" \\\nmodel_class=\"AutoModelForQuestionAnswering\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nprecision=\"bfloat16\" \\\nquantization=0 \\\ndevice_map=\"cuda:0\" \\\nmax_memory=None \\\ntorchscript=False\n\n# For table-based QA using TAPAS:\ngenius QABulk rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder input/qa-table \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder output/qa-table \\\npostgres \\\n--postgres_host 127.0.0.1 \\\n--postgres_port 5432 \\\n--postgres_user postgres \\\n--postgres_password postgres \\\n--postgres_database geniusrise\\\n--postgres_table state \\\n--id google/tapas-base-finetuned-wtq-lol \\\nanswer_questions \\\n--args \\\nmodel_name=\"google/tapas-base-finetuned-wtq\" \\\nmodel_class=\"AutoModelForTableQuestionAnswering\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nprecision=\"float\" \\\nquantization=0 \\\ndevice_map=\"cuda:0\" \\\nmax_memory=None \\\ntorchscript=False\n\n# For table-based QA using TAPEX:\ngenius QABulk rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder input/qa-table \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder output/qa-table \\\npostgres \\\n--postgres_host 127.0.0.1 \\\n--postgres_port 5432 \\\n--postgres_user postgres \\\n--postgres_password postgres \\\n--postgres_database geniusrise\\\n--postgres_table state \\\n--id microsoft/tapex-large-finetuned-wtq-lol \\\nanswer_questions \\\n--args \\\nmodel_name=\"microsoft/tapex-large-finetuned-wtq\" \\\nmodel_class=\"AutoModelForSeq2SeqLM\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nprecision=\"float\" \\\nquantization=0 \\\ndevice_map=\"cuda:0\" \\\nmax_memory=None \\\ntorchscript=False\n

    "},{"location":"text/bulk/question_answering/#geniusrise_text.qa.bulk.QABulk.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the QABulk class with configurations for input, output, and state.

    Parameters:

    Name Type Description Default input BatchInput

    Configuration for the input data.

    required output BatchOutput

    Configuration for the output data.

    required state State

    State management for the QA task.

    required **kwargs Any

    Additional keyword arguments for extended functionality.

    {}"},{"location":"text/bulk/question_answering/#geniusrise_text.qa.bulk.QABulk.answer_questions","title":"answer_questions(model_name, model_class='AutoModelForQuestionAnswering', tokenizer_class='AutoTokenizer', use_cuda=False, precision='float16', quantization=0, device_map='auto', max_memory={0: '24GB'}, torchscript=False, compile=False, awq_enabled=False, flash_attention=False, batch_size=32, notification_email=None, **kwargs)","text":"

    Perform bulk question-answering using the specified model and tokenizer. This method can handle various types of QA models including traditional, TAPAS, and TAPEX.

    Parameters:

    Name Type Description Default model_name str

    Name or path of the question-answering model.

    required model_class str

    Class name of the model (e.g., \"AutoModelForQuestionAnswering\").

    'AutoModelForQuestionAnswering' tokenizer_class str

    Class name of the tokenizer (e.g., \"AutoTokenizer\").

    'AutoTokenizer' use_cuda bool

    Whether to use CUDA for model inference. Defaults to False.

    False precision str

    Precision for model computation. Defaults to \"float16\".

    'float16' quantization int

    Level of quantization for optimizing model size and speed. Defaults to 0.

    0 device_map str | Dict | None

    Specific device to use for computation. Defaults to \"auto\".

    'auto' max_memory Dict

    Maximum memory configuration for devices. Defaults to {0: \"24GB\"}.

    {0: '24GB'} torchscript bool

    Whether to use a TorchScript-optimized version of the pre-trained language model. Defaults to False.

    False compile bool

    Whether to compile the model before fine-tuning. Defaults to True.

    False awq_enabled bool

    Whether to enable AWQ optimization. Defaults to False.

    False flash_attention bool

    Whether to use flash attention optimization. Defaults to False.

    False batch_size int

    Number of questions to process simultaneously. Defaults to 32.

    32 **kwargs Any

    Arbitrary keyword arguments for model and generation configurations.

    {} Processing

    The method processes the data in batches, utilizing the appropriate model based on the model name and generating answers for the questions provided in the dataset.

    "},{"location":"text/bulk/question_answering/#geniusrise_text.qa.bulk.QABulk.load_dataset","title":"load_dataset(dataset_path, max_length=512, **kwargs)","text":"

    Load a dataset from a directory.

    "},{"location":"text/bulk/question_answering/#geniusrise_text.qa.bulk.QABulk.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"text/bulk/question_answering/#geniusrise_text.qa.bulk.QABulk.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\"context\": \"The context content\", \"question\": \"The question\"}\n

    "},{"location":"text/bulk/question_answering/#geniusrise_text.qa.bulk.QABulk.load_dataset--csv","title":"CSV","text":"

    Should contain 'context' and 'question' columns.

    context,question\n\"The context content\",\"The question\"\n

    "},{"location":"text/bulk/question_answering/#geniusrise_text.qa.bulk.QABulk.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'context' and 'question' columns.

    "},{"location":"text/bulk/question_answering/#geniusrise_text.qa.bulk.QABulk.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'context' and 'question' keys.

    [{\"context\": \"The context content\", \"question\": \"The question\"}]\n

    "},{"location":"text/bulk/question_answering/#geniusrise_text.qa.bulk.QABulk.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'context' and 'question' elements.

    <record>\n<context>The context content</context>\n<question>The question</question>\n</record>\n

    "},{"location":"text/bulk/question_answering/#geniusrise_text.qa.bulk.QABulk.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'context' and 'question' keys.

    - context: \"The context content\"\nquestion: \"The question\"\n

    "},{"location":"text/bulk/question_answering/#geniusrise_text.qa.bulk.QABulk.load_dataset--tsv","title":"TSV","text":"

    Should contain 'context' and 'question' columns separated by tabs.

    "},{"location":"text/bulk/question_answering/#geniusrise_text.qa.bulk.QABulk.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'context' and 'question' columns.

    "},{"location":"text/bulk/question_answering/#geniusrise_text.qa.bulk.QABulk.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'context' and 'question' columns.

    "},{"location":"text/bulk/question_answering/#geniusrise_text.qa.bulk.QABulk.load_dataset--feather","title":"Feather","text":"

    Should contain 'context' and 'question' columns.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory.

    required pad_on_right bool

    Whether to pad on the right.

    required max_length int

    The maximum length of the sequences.

    512 doc_stride int

    The document stride.

    required evaluate_squadv2 bool

    Whether to evaluate using SQuAD v2 metrics.

    required

    Returns:

    Name Type Description Dataset Optional[Dataset]

    The loaded dataset.

    "},{"location":"text/bulk/summarization/","title":"Summarization","text":"

    Bases: TextBulk

    SummarizationBulk is a class for managing bulk text summarization tasks using Hugging Face models. It is designed to handle large-scale summarization tasks efficiently and effectively, utilizing state-of-the-art machine learning models to provide high-quality summaries.

    The class provides methods to load datasets, configure summarization models, and execute bulk summarization tasks.

    Example CLI Usage:

    genius SummarizationBulk rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder input/summz \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder output/summz \\\npostgres \\\n--postgres_host 127.0.0.1 \\\n--postgres_port 5432 \\\n--postgres_user postgres \\\n--postgres_password postgres \\\n--postgres_database geniusrise\\\n--postgres_table state \\\n--id facebook/bart-large-cnn-lol \\\nsummarize \\\n--args \\\nmodel_name=\"facebook/bart-large-cnn\" \\\nmodel_class=\"AutoModelForSeq2SeqLM\" \\\ntokenizer_class=\"AutoTokenizer\" \\\nuse_cuda=True \\\nprecision=\"float\" \\\nquantization=0 \\\ndevice_map=\"cuda:0\" \\\nmax_memory=None \\\ntorchscript=False \\\ngeneration_bos_token_id=0 \\\ngeneration_decoder_start_token_id=2 \\\ngeneration_early_stopping=true \\\ngeneration_eos_token_id=2 \\\ngeneration_forced_bos_token_id=0 \\\ngeneration_forced_eos_token_id=2 \\\ngeneration_length_penalty=2.0 \\\ngeneration_max_length=142 \\\ngeneration_min_length=56 \\\ngeneration_no_repeat_ngram_size=3 \\\ngeneration_num_beams=4 \\\ngeneration_pad_token_id=1 \\\ngeneration_do_sample=false\n

    "},{"location":"text/bulk/summarization/#geniusrise_text.summarization.bulk.SummarizationBulk.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the SummarizationBulk class.

    Parameters:

    Name Type Description Default input BatchInput

    The input data configuration.

    required output BatchOutput

    The output data configuration.

    required state State

    The state configuration.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"text/bulk/summarization/#geniusrise_text.summarization.bulk.SummarizationBulk.load_dataset","title":"load_dataset(dataset_path, max_length=512, **kwargs)","text":"

    Load a dataset from a directory.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory.

    required **kwargs

    Additional keyword arguments.

    {}

    Returns:

    Type Description Optional[Dataset]

    Dataset | DatasetDict: The loaded dataset.

    "},{"location":"text/bulk/summarization/#geniusrise_text.summarization.bulk.SummarizationBulk.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"text/bulk/summarization/#geniusrise_text.summarization.bulk.SummarizationBulk.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\"text\": \"The text content\"}\n

    "},{"location":"text/bulk/summarization/#geniusrise_text.summarization.bulk.SummarizationBulk.load_dataset--csv","title":"CSV","text":"

    Should contain a 'text' column.

    text\n\"The text content\"\n

    "},{"location":"text/bulk/summarization/#geniusrise_text.summarization.bulk.SummarizationBulk.load_dataset--parquet","title":"Parquet","text":"

    Should contain a 'text' column.

    "},{"location":"text/bulk/summarization/#geniusrise_text.summarization.bulk.SummarizationBulk.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with a 'text' key.

    [{\"text\": \"The text content\"}]\n

    "},{"location":"text/bulk/summarization/#geniusrise_text.summarization.bulk.SummarizationBulk.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'text' child element.

    <record>\n<text>The text content</text>\n</record>\n

    "},{"location":"text/bulk/summarization/#geniusrise_text.summarization.bulk.SummarizationBulk.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with a 'text' key.

    - text: \"The text content\"\n

    "},{"location":"text/bulk/summarization/#geniusrise_text.summarization.bulk.SummarizationBulk.load_dataset--tsv","title":"TSV","text":"

    Should contain a 'text' column.

    "},{"location":"text/bulk/summarization/#geniusrise_text.summarization.bulk.SummarizationBulk.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain a 'text' column.

    "},{"location":"text/bulk/summarization/#geniusrise_text.summarization.bulk.SummarizationBulk.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with a 'text' column.

    "},{"location":"text/bulk/summarization/#geniusrise_text.summarization.bulk.SummarizationBulk.load_dataset--feather","title":"Feather","text":"

    Should contain a 'text' column.

    "},{"location":"text/bulk/summarization/#geniusrise_text.summarization.bulk.SummarizationBulk.summarize","title":"summarize(model_name, model_class='AutoModelForSeq2SeqLM', tokenizer_class='AutoTokenizer', use_cuda=False, precision='float16', quantization=0, device_map='auto', max_memory={0: '24GB'}, torchscript=False, compile=False, awq_enabled=False, flash_attention=False, batch_size=32, max_length=512, notification_email=None, **kwargs)","text":"

    Perform bulk summarization using the specified model and tokenizer. This method handles the entire summarization process including loading the model, processing input data, generating summarization, and saving the results.

    Parameters:

    Name Type Description Default model_name str

    Name or path of the translation model.

    required origin str

    Source language ISO code.

    required target str

    Target language ISO code.

    required max_length int

    Maximum length of the tokens (default 512).

    512 model_class str

    Class name of the model (default \"AutoModelForSeq2SeqLM\").

    'AutoModelForSeq2SeqLM' tokenizer_class str

    Class name of the tokenizer (default \"AutoTokenizer\").

    'AutoTokenizer' use_cuda bool

    Whether to use CUDA for model inference (default False).

    False precision str

    Precision for model computation (default \"float16\").

    'float16' quantization int

    Level of quantization for optimizing model size and speed (default 0).

    0 device_map str | Dict | None

    Specific device to use for computation (default \"auto\").

    'auto' max_memory Dict

    Maximum memory configuration for devices.

    {0: '24GB'} torchscript bool

    Whether to use a TorchScript-optimized version of the pre-trained language model. Defaults to False.

    False compile bool

    Whether to compile the model before fine-tuning. Defaults to True.

    False awq_enabled bool

    Whether to enable AWQ optimization (default False).

    False flash_attention bool

    Whether to use flash attention optimization (default False).

    False batch_size int

    Number of translations to process simultaneously (default 32).

    32 max_length int

    Maximum lenght of the summary to be generated (default 512).

    512 **kwargs Any

    Arbitrary keyword arguments for model and generation configurations.

    {}"},{"location":"text/bulk/translation/","title":"Translation","text":"

    Bases: TextBulk

    TranslationBulk is a class for managing bulk translations using Hugging Face models. It is designed to handle large-scale translation tasks efficiently and effectively, using state-of-the-art machine learning models to provide high-quality translations for various language pairs.

    This class provides methods for loading datasets, configuring translation models, and executing bulk translation tasks.

    Parameters:

    Name Type Description Default input BatchInput

    Configuration and data inputs for batch processing.

    required output BatchOutput

    Configuration for output data handling.

    required state State

    State management for translation tasks.

    required **kwargs

    Arbitrary keyword arguments for extended functionality.

    {}

    Example CLI Usage for Bulk Translation Task:

    genius TranslationBulk rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder input/trans \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder output/trans \\\npostgres \\\n--postgres_host 127.0.0.1 \\\n--postgres_port 5432 \\\n--postgres_user postgres \\\n--postgres_password postgres \\\n--postgres_database geniusrise\\\n--postgres_table state \\\n--id facebook/mbart-large-50-many-to-many-mmt-lol \\\ntranslate \\\n--args \\\nmodel_name=\"facebook/mbart-large-50-many-to-many-mmt\" \\\nmodel_class=\"AutoModelForSeq2SeqLM\" \\\ntokenizer_class=\"AutoTokenizer\" \\\norigin=\"hi_IN\" \\\ntarget=\"en_XX\" \\\nuse_cuda=True \\\nprecision=\"float\" \\\nquantization=0 \\\ndevice_map=\"cuda:0\" \\\nmax_memory=None \\\ntorchscript=False \\\ngenerate_decoder_start_token_id=2 \\\ngenerate_early_stopping=true \\\ngenerate_eos_token_id=2 \\\ngenerate_forced_eos_token_id=2 \\\ngenerate_max_length=200 \\\ngenerate_num_beams=5 \\\ngenerate_pad_token_id=1\n
    "},{"location":"text/bulk/translation/#geniusrise_text.translation.bulk.TranslationBulk.load_dataset","title":"load_dataset(dataset_path, max_length=512, origin='en', target='hi', **kwargs)","text":"

    Load a dataset from a directory.

    "},{"location":"text/bulk/translation/#geniusrise_text.translation.bulk.TranslationBulk.load_dataset--supported-data-formats-and-structures-for-translation-tasks","title":"Supported Data Formats and Structures for Translation Tasks:","text":"

    Note: All examples are assuming the source as \"en\", refer to the specific model for this parameter.

    "},{"location":"text/bulk/translation/#geniusrise_text.translation.bulk.TranslationBulk.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\n\"translation\": {\n\"en\": \"English text\"\n}\n}\n

    "},{"location":"text/bulk/translation/#geniusrise_text.translation.bulk.TranslationBulk.load_dataset--csv","title":"CSV","text":"

    Should contain 'en' column.

    en\n\"English text\"\n

    "},{"location":"text/bulk/translation/#geniusrise_text.translation.bulk.TranslationBulk.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'en' column.

    "},{"location":"text/bulk/translation/#geniusrise_text.translation.bulk.TranslationBulk.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'en' key.

    [\n{\n\"en\": \"English text\"\n}\n]\n

    "},{"location":"text/bulk/translation/#geniusrise_text.translation.bulk.TranslationBulk.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'en' child elements.

    <record>\n<en>English text</en>\n</record>\n

    "},{"location":"text/bulk/translation/#geniusrise_text.translation.bulk.TranslationBulk.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'en' key.

    - en: \"English text\"\n

    "},{"location":"text/bulk/translation/#geniusrise_text.translation.bulk.TranslationBulk.load_dataset--tsv","title":"TSV","text":"

    Should contain 'en' column separated by tabs.

    "},{"location":"text/bulk/translation/#geniusrise_text.translation.bulk.TranslationBulk.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'en' column.

    "},{"location":"text/bulk/translation/#geniusrise_text.translation.bulk.TranslationBulk.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'en' column.

    "},{"location":"text/bulk/translation/#geniusrise_text.translation.bulk.TranslationBulk.load_dataset--feather","title":"Feather","text":"

    Should contain 'en' column.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the directory containing the dataset files.

    required max_length int

    The maximum length for tokenization. Defaults to 512.

    512 origin str

    The origin language. Defaults to 'en'.

    'en' target str

    The target language. Defaults to 'hi'.

    'hi' **kwargs

    Additional keyword arguments.

    {}

    Returns:

    Name Type Description DatasetDict Optional[Dataset]

    The loaded dataset.

    "},{"location":"text/bulk/translation/#geniusrise_text.translation.bulk.TranslationBulk.translate","title":"translate(model_name, origin, target, max_length=512, model_class='AutoModelForSeq2SeqLM', tokenizer_class='AutoTokenizer', use_cuda=False, precision='float16', quantization=0, device_map='auto', max_memory={0: '24GB'}, torchscript=False, compile=False, awq_enabled=False, flash_attention=False, batch_size=32, notification_email=None, **kwargs)","text":"

    Perform bulk translation using the specified model and tokenizer. This method handles the entire translation process including loading the model, processing input data, generating translations, and saving the results.

    Parameters:

    Name Type Description Default model_name str

    Name or path of the translation model.

    required origin str

    Source language ISO code.

    required target str

    Target language ISO code.

    required max_length int

    Maximum length of the tokens (default 512).

    512 model_class str

    Class name of the model (default \"AutoModelForSeq2SeqLM\").

    'AutoModelForSeq2SeqLM' tokenizer_class str

    Class name of the tokenizer (default \"AutoTokenizer\").

    'AutoTokenizer' use_cuda bool

    Whether to use CUDA for model inference (default False).

    False precision str

    Precision for model computation (default \"float16\").

    'float16' quantization int

    Level of quantization for optimizing model size and speed (default 0).

    0 device_map str | Dict | None

    Specific device to use for computation (default \"auto\").

    'auto' max_memory Dict

    Maximum memory configuration for devices.

    {0: '24GB'} torchscript bool

    Whether to use a TorchScript-optimized version of the pre-trained language model. Defaults to False.

    False compile bool

    Whether to compile the model before fine-tuning. Defaults to True.

    False awq_enabled bool

    Whether to enable AWQ optimization (default False).

    False flash_attention bool

    Whether to use flash attention optimization (default False).

    False batch_size int

    Number of translations to process simultaneously (default 32).

    32 **kwargs Any

    Arbitrary keyword arguments for model and generation configurations.

    {}"},{"location":"text/fine_tune/base/","title":"Base Fine Tuner","text":"

    Bases: Bolt

    A bolt for fine-tuning Hugging Face models.

    This bolt uses the Hugging Face Transformers library to fine-tune a pre-trained model. It uses the Trainer class from the Transformers library to handle the training.

    "},{"location":"text/fine_tune/base/#geniusrise_text.base.fine_tune.TextFineTuner.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initialize the bolt.

    Parameters:

    Name Type Description Default input BatchInput

    The batch input data.

    required output BatchOutput

    The output data.

    required state State

    The state manager.

    required evaluate bool

    Whether to evaluate the model. Defaults to False.

    required **kwargs

    Additional keyword arguments.

    {}"},{"location":"text/fine_tune/base/#geniusrise_text.base.fine_tune.TextFineTuner.compute_metrics","title":"compute_metrics(eval_pred)","text":"

    Compute metrics for evaluation. This class implements a simple classification evaluation, tasks should ideally override this.

    Parameters:

    Name Type Description Default eval_pred EvalPrediction

    The evaluation predictions.

    required

    Returns:

    Name Type Description dict Optional[Dict[str, float]] | Dict[str, float]

    The computed metrics.

    "},{"location":"text/fine_tune/base/#geniusrise_text.base.fine_tune.TextFineTuner.fine_tune","title":"fine_tune(model_name, tokenizer_name, num_train_epochs, per_device_batch_size, model_class='AutoModel', tokenizer_class='AutoTokenizer', device_map='auto', precision='bfloat16', quantization=None, lora_config=None, use_accelerate=False, use_trl=False, accelerate_no_split_module_classes=[], compile=False, evaluate=False, save_steps=500, save_total_limit=None, load_best_model_at_end=False, metric_for_best_model=None, greater_is_better=None, map_data=None, use_huggingface_dataset=False, huggingface_dataset='', hf_repo_id=None, hf_commit_message=None, hf_token=None, hf_private=True, hf_create_pr=False, notification_email='', learning_rate=1e-05, **kwargs)","text":"

    Fine-tunes a pre-trained Hugging Face model.

    Parameters:

    Name Type Description Default model_name str

    The name of the pre-trained model.

    required tokenizer_name str

    The name of the pre-trained tokenizer.

    required num_train_epochs int

    The total number of training epochs to perform.

    required per_device_batch_size int

    The batch size per device during training.

    required model_class str

    The model class to use. Defaults to \"AutoModel\".

    'AutoModel' tokenizer_class str

    The tokenizer class to use. Defaults to \"AutoTokenizer\".

    'AutoTokenizer' device_map str | dict

    The device map for distributed training. Defaults to \"auto\".

    'auto' precision str

    The precision to use for training. Defaults to \"bfloat16\".

    'bfloat16' quantization int

    The quantization level to use for training. Defaults to None.

    None lora_config dict

    Configuration for PEFT LoRA optimization. Defaults to None.

    None use_accelerate bool

    Whether to use accelerate for distributed training. Defaults to False.

    False use_trl bool

    Whether to use TRL for training. Defaults to False.

    False accelerate_no_split_module_classes List[str]

    The module classes to not split during distributed training. Defaults to [].

    [] evaluate bool

    Whether to evaluate the model after training. Defaults to False.

    False compile bool

    Whether to compile the model before fine-tuning. Defaults to True.

    False save_steps int

    Number of steps between checkpoints. Defaults to 500.

    500 save_total_limit Optional[int]

    Maximum number of checkpoints to keep. Older checkpoints are deleted. Defaults to None.

    None load_best_model_at_end bool

    Whether to load the best model (according to evaluation) at the end of training. Defaults to False.

    False metric_for_best_model Optional[str]

    The metric to use to compare models. Defaults to None.

    None greater_is_better Optional[bool]

    Whether a larger value of the metric indicates a better model. Defaults to None.

    None use_huggingface_dataset bool

    Whether to load a dataset from huggingface hub.

    False huggingface_dataset str

    The huggingface dataset to use.

    '' map_data Callable

    A function to map data before training. Defaults to None.

    None hf_repo_id str

    The Hugging Face repo ID. Defaults to None.

    None hf_commit_message str

    The Hugging Face commit message. Defaults to None.

    None hf_token str

    The Hugging Face token. Defaults to None.

    None hf_private bool

    Whether to make the repo private. Defaults to True.

    True hf_create_pr bool

    Whether to create a pull request. Defaults to False.

    False notification_email str

    Whether to notify after job is complete. Defaults to None.

    '' learning_rate float

    Learning rate for backpropagation.

    1e-05 **kwargs

    Additional keyword arguments to pass to the model.

    {}

    Returns:

    Type Description

    None

    "},{"location":"text/fine_tune/base/#geniusrise_text.base.fine_tune.TextFineTuner.load_dataset","title":"load_dataset(dataset_path, **kwargs) abstractmethod","text":"

    Load a dataset from a file.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset file.

    required split str

    The split to load. Defaults to None.

    required **kwargs

    Additional keyword arguments to pass to the load_dataset method.

    {}

    Returns:

    Type Description Dataset | DatasetDict | Optional[Dataset]

    Union[Dataset, DatasetDict, None]: The loaded dataset.

    Raises:

    Type Description NotImplementedError

    This method should be overridden by subclasses.

    "},{"location":"text/fine_tune/base/#geniusrise_text.base.fine_tune.TextFineTuner.load_models","title":"load_models(model_name, tokenizer_name, model_class='AutoModel', tokenizer_class='AutoTokenizer', device_map='auto', precision='bfloat16', quantization=None, lora_config=None, use_accelerate=False, accelerate_no_split_module_classes=[], **kwargs)","text":"

    Load the model and tokenizer.

    Parameters:

    Name Type Description Default model_name str

    The name of the model to be loaded.

    required tokenizer_name str

    The name of the tokenizer to be loaded. Defaults to None.

    required model_class str

    The class of the model. Defaults to \"AutoModel\".

    'AutoModel' tokenizer_class str

    The class of the tokenizer. Defaults to \"AutoTokenizer\".

    'AutoTokenizer' device Union[str, torch.device]

    The device to be used. Defaults to \"cuda\".

    required precision str

    The precision to be used. Choose from 'float32', 'float16', 'bfloat16'. Defaults to \"float32\".

    'bfloat16' quantization Optional[int]

    The quantization to be used. Defaults to None.

    None lora_config Optional[dict]

    The LoRA configuration to be used. Defaults to None.

    None use_accelerate bool

    Whether to use accelerate. Defaults to False.

    False accelerate_no_split_module_classes List[str]

    The list of no split module classes to be used. Defaults to [].

    [] **kwargs

    Additional keyword arguments.

    {}

    Raises:

    Type Description ValueError

    If an unsupported precision is chosen.

    Returns:

    Type Description

    None

    "},{"location":"text/fine_tune/base/#geniusrise_text.base.fine_tune.TextFineTuner.preprocess_data","title":"preprocess_data(**kwargs)","text":"

    Load and preprocess the dataset

    "},{"location":"text/fine_tune/base/#geniusrise_text.base.fine_tune.TextFineTuner.upload_to_hf_hub","title":"upload_to_hf_hub(hf_repo_id=None, hf_commit_message=None, hf_token=None, hf_private=None, hf_create_pr=None)","text":"

    Upload the model and tokenizer to Hugging Face Hub.

    "},{"location":"text/fine_tune/classification/","title":"Classification","text":"

    Bases: TextFineTuner

    A bolt for fine-tuning Hugging Face models for text classification tasks.

    This class extends the TextFineTuner and specializes in fine-tuning models for text classification. It provides additional functionalities for loading and preprocessing text classification datasets in various formats.

    Parameters:

    Name Type Description Default input BatchInput

    The batch input data.

    required output OutputConfig

    The output data.

    required state State

    The state manager.

    required

    CLI Usage:

    genius TextClassificationFineTuner rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\n--id cardiffnlp/twitter-roberta-base-hate-multiclass-latest-lol \\\nfine_tune \\\n--args \\\nmodel_name=my_model \\\ntokenizer_name=my_tokenizer \\\nnum_train_epochs=3 \\\nper_device_train_batch_size=8 \\\ndata_max_length=512\n
    "},{"location":"text/fine_tune/classification/#geniusrise_text.classification.fine_tune.TextClassificationFineTuner.compute_metrics","title":"compute_metrics(eval_pred)","text":"

    Compute metrics for evaluation. This class implements a simple classification evaluation, tasks should ideally override this.

    Parameters:

    Name Type Description Default eval_pred EvalPrediction

    The evaluation predictions.

    required

    Returns:

    Name Type Description dict Union[Optional[Dict[str, float]], Dict[str, float]]

    The computed metrics.

    "},{"location":"text/fine_tune/classification/#geniusrise_text.classification.fine_tune.TextClassificationFineTuner.load_dataset","title":"load_dataset(dataset_path, max_length=512, **kwargs)","text":"

    Load a classification dataset from a directory.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory.

    required max_length int

    The maximum length for tokenization. Defaults to 512.

    512

    Returns:

    Name Type Description Dataset Optional[Dataset]

    The loaded dataset.

    Raises:

    Type Description Exception

    If there was an error loading the dataset.

    "},{"location":"text/fine_tune/classification/#geniusrise_text.classification.fine_tune.TextClassificationFineTuner.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"text/fine_tune/classification/#geniusrise_text.classification.fine_tune.TextClassificationFineTuner.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\"text\": \"The text content\", \"label\": \"The label\"}\n

    "},{"location":"text/fine_tune/classification/#geniusrise_text.classification.fine_tune.TextClassificationFineTuner.load_dataset--csv","title":"CSV","text":"

    Should contain 'text' and 'label' columns.

    text,label\n\"The text content\",\"The label\"\n

    "},{"location":"text/fine_tune/classification/#geniusrise_text.classification.fine_tune.TextClassificationFineTuner.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'text' and 'label' columns.

    "},{"location":"text/fine_tune/classification/#geniusrise_text.classification.fine_tune.TextClassificationFineTuner.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'text' and 'label' keys.

    [{\"text\": \"The text content\", \"label\": \"The label\"}]\n

    "},{"location":"text/fine_tune/classification/#geniusrise_text.classification.fine_tune.TextClassificationFineTuner.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'text' and 'label' child elements.

    <record>\n<text>The text content</text>\n<label>The label</label>\n</record>\n

    "},{"location":"text/fine_tune/classification/#geniusrise_text.classification.fine_tune.TextClassificationFineTuner.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'text' and 'label' keys.

    - text: \"The text content\"\nlabel: \"The label\"\n

    "},{"location":"text/fine_tune/classification/#geniusrise_text.classification.fine_tune.TextClassificationFineTuner.load_dataset--tsv","title":"TSV","text":"

    Should contain 'text' and 'label' columns separated by tabs.

    "},{"location":"text/fine_tune/classification/#geniusrise_text.classification.fine_tune.TextClassificationFineTuner.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'text' and 'label' columns.

    "},{"location":"text/fine_tune/classification/#geniusrise_text.classification.fine_tune.TextClassificationFineTuner.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'text' and 'label' columns.

    "},{"location":"text/fine_tune/classification/#geniusrise_text.classification.fine_tune.TextClassificationFineTuner.load_dataset--feather","title":"Feather","text":"

    Should contain 'text' and 'label' columns.

    "},{"location":"text/fine_tune/instruction_tuning/","title":"Instruction Tuning","text":"

    Bases: TextFineTuner

    A bolt for fine-tuning Hugging Face models on instruction tuning tasks.

    This class inherits from TextFineTuner and specializes in fine-tuning models for instruction-based tasks. It provides additional methods for loading and preparing datasets in various formats, as well as computing custom metrics.

    Parameters:

    Name Type Description Default input BatchInput

    The batch input data.

    required output OutputConfig

    The output data.

    required state State

    The state manager.

    required

    Attributes:

    Name Type Description max_length int

    The maximum length for tokenization.

    CLI Usage:

        genius InstructionFineTuner rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\n--id mistralai/Mistral-7B-Instruct-v0.1-lol \\\nfine_tune \\\n--args \\\nmodel_name=my_model \\\ntokenizer_name=my_tokenizer \\\nnum_train_epochs=3 \\\nper_device_train_batch_size=8 \\\ndata_max_length=512\n
    "},{"location":"text/fine_tune/instruction_tuning/#geniusrise_text.instruction.fine_tune.InstructionFineTuner.compute_metrics","title":"compute_metrics(eval_pred)","text":"

    Compute evaluation metrics for the model's predictions.

    This method takes the model's predictions and ground truth labels, converts them to text, and then computes the BLEU score for evaluation.

    Parameters:

    Name Type Description Default eval_pred EvalPrediction

    A named tuple containing predictions and label_ids. - predictions: The logits predicted by the model of shape (batch_size, sequence_length, num_classes). - label_ids: The ground truth labels of shape (batch_size, sequence_length).

    required

    Returns:

    Type Description Optional[Dict[str, float]]

    Optional[Dict[str, float]]: A dictionary containing the BLEU score. Returns None if an exception occurs.

    Raises:

    Type Description Exception

    If the tokenizer is not initialized.

    "},{"location":"text/fine_tune/instruction_tuning/#geniusrise_text.instruction.fine_tune.InstructionFineTuner.load_dataset","title":"load_dataset(dataset_path, max_length=512, **kwargs)","text":"

    Load an instruction tuning dataset from a directory.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory.

    required max_length int

    The maximum length for tokenization. Defaults to 512.

    512

    Returns:

    Name Type Description Dataset Union[Dataset, Dict]

    The loaded dataset.

    Raises:

    Type Description Exception

    If there was an error loading the dataset.

    "},{"location":"text/fine_tune/instruction_tuning/#geniusrise_text.instruction.fine_tune.InstructionFineTuner.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"text/fine_tune/instruction_tuning/#geniusrise_text.instruction.fine_tune.InstructionFineTuner.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\"instruction\": \"The instruction\", \"output\": \"The output\"}\n

    "},{"location":"text/fine_tune/instruction_tuning/#geniusrise_text.instruction.fine_tune.InstructionFineTuner.load_dataset--csv","title":"CSV","text":"

    Should contain 'instruction' and 'output' columns.

    instruction,output\n\"The instruction\",\"The output\"\n

    "},{"location":"text/fine_tune/instruction_tuning/#geniusrise_text.instruction.fine_tune.InstructionFineTuner.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'instruction' and 'output' columns.

    "},{"location":"text/fine_tune/instruction_tuning/#geniusrise_text.instruction.fine_tune.InstructionFineTuner.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'instruction' and 'output' keys.

    [{\"instruction\": \"The instruction\", \"output\": \"The output\"}]\n

    "},{"location":"text/fine_tune/instruction_tuning/#geniusrise_text.instruction.fine_tune.InstructionFineTuner.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'instruction' and 'output' child elements.

    <record>\n<instruction>The instruction</instruction>\n<output>The output</output>\n</record>\n

    "},{"location":"text/fine_tune/instruction_tuning/#geniusrise_text.instruction.fine_tune.InstructionFineTuner.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'instruction' and 'output' keys.

    - instruction: \"The instruction\"\noutput: \"The output\"\n

    "},{"location":"text/fine_tune/instruction_tuning/#geniusrise_text.instruction.fine_tune.InstructionFineTuner.load_dataset--tsv","title":"TSV","text":"

    Should contain 'instruction' and 'output' columns separated by tabs.

    "},{"location":"text/fine_tune/instruction_tuning/#geniusrise_text.instruction.fine_tune.InstructionFineTuner.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'instruction' and 'output' columns.

    "},{"location":"text/fine_tune/instruction_tuning/#geniusrise_text.instruction.fine_tune.InstructionFineTuner.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'instruction' and 'output' columns.

    "},{"location":"text/fine_tune/instruction_tuning/#geniusrise_text.instruction.fine_tune.InstructionFineTuner.load_dataset--feather","title":"Feather","text":"

    Should contain 'instruction' and 'output' columns.

    "},{"location":"text/fine_tune/instruction_tuning/#geniusrise_text.instruction.fine_tune.InstructionFineTuner.prepare_train_features","title":"prepare_train_features(examples)","text":"

    Tokenize the examples and prepare the features for training.

    Parameters:

    Name Type Description Default examples dict

    A dictionary of examples.

    required

    Returns:

    Name Type Description dict Dict

    The processed features.

    "},{"location":"text/fine_tune/language_model/","title":"Language Model","text":"

    Bases: TextFineTuner

    A bolt for fine-tuning Hugging Face models on language modeling tasks.

    Parameters:

    Name Type Description Default input BatchInput

    The batch input data.

    required output OutputConfig

    The output data.

    required state State

    The state manager.

    required

    CLI Usage:

        genius LanguageModelFineTuner rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder input/lm \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder output/lm \\\npostgres \\\n--postgres_host 127.0.0.1 \\\n--postgres_port 5432 \\\n--postgres_user postgres \\\n--postgres_password postgres \\\n--postgres_database geniusrise\\\n--postgres_table state \\\n--id mistralai/Mistral-7B-Instruct-v0.1-lol \\\nfine_tune \\\n--args \\\nmodel_name=my_model \\\ntokenizer_name=my_tokenizer \\\nnum_train_epochs=3 \\\nper_device_train_batch_size=8 \\\ndata_max_length=512\n
    "},{"location":"text/fine_tune/language_model/#geniusrise_text.language_model.fine_tune.LanguageModelFineTuner.compute_metrics","title":"compute_metrics(eval_pred)","text":"

    Compute evaluation metrics for the model's predictions.

    This method takes the model's predictions and ground truth labels, converts them to text, and then computes the BLEU score for evaluation.

    Parameters:

    Name Type Description Default eval_pred EvalPrediction

    A named tuple containing predictions and label_ids. - predictions: The logits predicted by the model of shape (batch_size, sequence_length, num_classes). - label_ids: The ground truth labels of shape (batch_size, sequence_length).

    required

    Returns:

    Type Description Optional[Dict[str, float]]

    Optional[Dict[str, float]]: A dictionary containing the BLEU score. Returns None if an exception occurs.

    Raises:

    Type Description Exception

    If the tokenizer is not initialized.

    "},{"location":"text/fine_tune/language_model/#geniusrise_text.language_model.fine_tune.LanguageModelFineTuner.data_collator","title":"data_collator(examples)","text":"

    Customize the data collator.

    Parameters:

    Name Type Description Default examples

    The examples to collate.

    required

    Returns:

    Name Type Description dict

    The collated data.

    "},{"location":"text/fine_tune/language_model/#geniusrise_text.language_model.fine_tune.LanguageModelFineTuner.load_dataset","title":"load_dataset(dataset_path, masked=False, max_length=512, **kwargs)","text":"

    Load a language modeling dataset from a directory.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory.

    required masked bool

    Whether to use masked language modeling. Defaults to True.

    False max_length int

    The maximum length for tokenization. Defaults to 512.

    512

    Returns:

    Name Type Description Dataset

    The loaded dataset.

    Raises:

    Type Description Exception

    If there was an error loading the dataset.

    "},{"location":"text/fine_tune/language_model/#geniusrise_text.language_model.fine_tune.LanguageModelFineTuner.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"text/fine_tune/language_model/#geniusrise_text.language_model.fine_tune.LanguageModelFineTuner.load_dataset--dataset-files-saved-by-hugging-face-datasets-library","title":"Dataset files saved by Hugging Face datasets library","text":"

    The directory should contain 'dataset_info.json' and other related files.

    "},{"location":"text/fine_tune/language_model/#geniusrise_text.language_model.fine_tune.LanguageModelFineTuner.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\"text\": \"The text content\"}\n

    "},{"location":"text/fine_tune/language_model/#geniusrise_text.language_model.fine_tune.LanguageModelFineTuner.load_dataset--csv","title":"CSV","text":"

    Should contain 'text' column.

    text\n\"The text content\"\n

    "},{"location":"text/fine_tune/language_model/#geniusrise_text.language_model.fine_tune.LanguageModelFineTuner.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'text' column.

    "},{"location":"text/fine_tune/language_model/#geniusrise_text.language_model.fine_tune.LanguageModelFineTuner.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'text' key.

    [{\"text\": \"The text content\"}]\n

    "},{"location":"text/fine_tune/language_model/#geniusrise_text.language_model.fine_tune.LanguageModelFineTuner.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'text' child element.

    <record>\n<text>The text content</text>\n</record>\n

    "},{"location":"text/fine_tune/language_model/#geniusrise_text.language_model.fine_tune.LanguageModelFineTuner.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'text' key.

    - text: \"The text content\"\n

    "},{"location":"text/fine_tune/language_model/#geniusrise_text.language_model.fine_tune.LanguageModelFineTuner.load_dataset--tsv","title":"TSV","text":"

    Should contain 'text' column separated by tabs.

    "},{"location":"text/fine_tune/language_model/#geniusrise_text.language_model.fine_tune.LanguageModelFineTuner.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'text' column.

    "},{"location":"text/fine_tune/language_model/#geniusrise_text.language_model.fine_tune.LanguageModelFineTuner.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'text' column.

    "},{"location":"text/fine_tune/language_model/#geniusrise_text.language_model.fine_tune.LanguageModelFineTuner.load_dataset--feather","title":"Feather","text":"

    Should contain 'text' column.

    "},{"location":"text/fine_tune/language_model/#geniusrise_text.language_model.fine_tune.LanguageModelFineTuner.prepare_train_features","title":"prepare_train_features(examples)","text":"

    Tokenize the examples and prepare the features for training.

    Parameters:

    Name Type Description Default examples dict

    A dictionary of examples.

    required

    Returns:

    Name Type Description dict

    The processed features.

    "},{"location":"text/fine_tune/ner/","title":"Named Entity Recognition","text":"

    Bases: TextFineTuner

    A bolt for fine-tuning Hugging Face models on named entity recognition tasks.

    Parameters:

    Name Type Description Default input BatchInput

    The batch input data.

    required output OutputConfig

    The output data.

    required state State

    The state manager.

    required

    CLI Usage:

        genius NamedEntityRecognitionFineTuner rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\n--id dslim/bert-large-NER-lol \\\nfine_tune \\\n--args \\\nmodel_name=my_model \\\ntokenizer_name=my_tokenizer \\\nnum_train_epochs=3 \\\nper_device_train_batch_size=8\n
    "},{"location":"text/fine_tune/ner/#geniusrise_text.ner.fine_tune.NamedEntityRecognitionFineTuner.data_collator","title":"data_collator(examples)","text":"

    Customize the data collator.

    Parameters:

    Name Type Description Default examples List[Dict[str, torch.Tensor]]

    The examples to collate.

    required

    Returns:

    Type Description Dict[str, torch.Tensor]

    Dict[str, torch.Tensor]: The collated data.

    "},{"location":"text/fine_tune/ner/#geniusrise_text.ner.fine_tune.NamedEntityRecognitionFineTuner.load_dataset","title":"load_dataset(dataset_path, label_list=[], **kwargs)","text":"

    Load a named entity recognition dataset from a directory.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory.

    required label_list List[str]

    The list of labels for named entity recognition. Defaults to [].

    []

    Returns:

    Name Type Description DatasetDict Union[Dataset, DatasetDict, None]

    The loaded dataset.

    Raises:

    Type Description Exception

    If there was an error loading the dataset.

    "},{"location":"text/fine_tune/ner/#geniusrise_text.ner.fine_tune.NamedEntityRecognitionFineTuner.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"text/fine_tune/ner/#geniusrise_text.ner.fine_tune.NamedEntityRecognitionFineTuner.load_dataset--hugging-face-dataset","title":"Hugging Face Dataset","text":"

    Dataset files saved by the Hugging Face datasets library.

    "},{"location":"text/fine_tune/ner/#geniusrise_text.ner.fine_tune.NamedEntityRecognitionFineTuner.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\"tokens\": [\"token1\", \"token2\", ...], \"ner_tags\": [0, 1, ...]}\n

    "},{"location":"text/fine_tune/ner/#geniusrise_text.ner.fine_tune.NamedEntityRecognitionFineTuner.load_dataset--csv","title":"CSV","text":"

    Should contain 'tokens' and 'ner_tags' columns.

    tokens,ner_tags\n\"['token1', 'token2', ...]\", \"[0, 1, ...]\"\n

    "},{"location":"text/fine_tune/ner/#geniusrise_text.ner.fine_tune.NamedEntityRecognitionFineTuner.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'tokens' and 'ner_tags' columns.

    "},{"location":"text/fine_tune/ner/#geniusrise_text.ner.fine_tune.NamedEntityRecognitionFineTuner.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'tokens' and 'ner_tags' keys.

    [{\"tokens\": [\"token1\", \"token2\", ...], \"ner_tags\": [0, 1, ...]}]\n

    "},{"location":"text/fine_tune/ner/#geniusrise_text.ner.fine_tune.NamedEntityRecognitionFineTuner.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'tokens' and 'ner_tags' child elements.

    <record>\n<tokens>token1 token2 ...</tokens>\n<ner_tags>0 1 ...</ner_tags>\n</record>\n

    "},{"location":"text/fine_tune/ner/#geniusrise_text.ner.fine_tune.NamedEntityRecognitionFineTuner.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'tokens' and 'ner_tags' keys.

    - tokens: [\"token1\", \"token2\", ...]\nner_tags: [0, 1, ...]\n

    "},{"location":"text/fine_tune/ner/#geniusrise_text.ner.fine_tune.NamedEntityRecognitionFineTuner.load_dataset--tsv","title":"TSV","text":"

    Should contain 'tokens' and 'ner_tags' columns separated by tabs.

    "},{"location":"text/fine_tune/ner/#geniusrise_text.ner.fine_tune.NamedEntityRecognitionFineTuner.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'tokens' and 'ner_tags' columns.

    "},{"location":"text/fine_tune/ner/#geniusrise_text.ner.fine_tune.NamedEntityRecognitionFineTuner.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'tokens' and 'ner_tags' columns.

    "},{"location":"text/fine_tune/ner/#geniusrise_text.ner.fine_tune.NamedEntityRecognitionFineTuner.load_dataset--feather","title":"Feather","text":"

    Should contain 'tokens' and 'ner_tags' columns.

    "},{"location":"text/fine_tune/ner/#geniusrise_text.ner.fine_tune.NamedEntityRecognitionFineTuner.prepare_train_features","title":"prepare_train_features(examples)","text":"

    Tokenize the examples and prepare the features for training.

    Parameters:

    Name Type Description Default examples Dict[str, Union[List[str], List[int]]]

    A dictionary of examples.

    required

    Returns:

    Type Description Dict[str, Union[List[str], List[int]]]

    Dict[str, Union[List[str], List[int]]]: The processed features.

    "},{"location":"text/fine_tune/nli/","title":"Natural Language Inference","text":"

    Bases: TextFineTuner

    A bolt for fine-tuning Hugging Face models for text classification tasks.

    This class extends the TextFineTuner and specializes in fine-tuning models for text classification. It provides additional functionalities for loading and preprocessing text classification datasets in various formats.

    Parameters:

    Name Type Description Default input BatchInput

    The batch input data.

    required output OutputConfig

    The output data.

    required state State

    The state manager.

    required

    CLI Usage:

        genius NLIFineTuner rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\n--id MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7-lol\n        fine_tune \\\n--args \\\nmodel_name=my_model \\\ntokenizer_name=my_tokenizer \\\nnum_train_epochs=3 \\\nper_device_train_batch_size=8\n
    "},{"location":"text/fine_tune/nli/#geniusrise_text.nli.fine_tune.NLIFineTuner.data_collator","title":"data_collator(examples)","text":"

    Customize the data collator.

    Parameters:

    Name Type Description Default examples Dict

    The examples to collate.

    required

    Returns:

    Name Type Description dict Dict

    The collated data.

    "},{"location":"text/fine_tune/nli/#geniusrise_text.nli.fine_tune.NLIFineTuner.load_dataset","title":"load_dataset(dataset_path, **kwargs)","text":"

    Load a commonsense reasoning dataset from a directory.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory.

    required **kwargs Any

    Additional keyword arguments.

    {}

    Returns:

    Name Type Description Dataset Union[Dataset, DatasetDict, None]

    The loaded dataset.

    Raises:

    Type Description Exception

    If there was an error loading the dataset.

    "},{"location":"text/fine_tune/nli/#geniusrise_text.nli.fine_tune.NLIFineTuner.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"text/fine_tune/nli/#geniusrise_text.nli.fine_tune.NLIFineTuner.load_dataset--hugging-face-dataset","title":"Hugging Face Dataset","text":"

    Dataset files saved by the Hugging Face datasets library.

    "},{"location":"text/fine_tune/nli/#geniusrise_text.nli.fine_tune.NLIFineTuner.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\"premise\": \"The premise text\", \"hypothesis\": \"The hypothesis text\", \"label\": 0 or 1 or 2}\n

    "},{"location":"text/fine_tune/nli/#geniusrise_text.nli.fine_tune.NLIFineTuner.load_dataset--csv","title":"CSV","text":"

    Should contain 'premise', 'hypothesis', and 'label' columns.

    premise,hypothesis,label\n\"The premise text\",\"The hypothesis text\",0\n

    "},{"location":"text/fine_tune/nli/#geniusrise_text.nli.fine_tune.NLIFineTuner.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'premise', 'hypothesis', and 'label' columns.

    "},{"location":"text/fine_tune/nli/#geniusrise_text.nli.fine_tune.NLIFineTuner.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'premise', 'hypothesis', and 'label' keys.

    [{\"premise\": \"The premise text\", \"hypothesis\": \"The hypothesis text\", \"label\": 0}]\n

    "},{"location":"text/fine_tune/nli/#geniusrise_text.nli.fine_tune.NLIFineTuner.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'premise', 'hypothesis', and 'label' child elements.

    <record>\n<premise>The premise text</premise>\n<hypothesis>The hypothesis text</hypothesis>\n<label>0</label>\n</record>\n

    "},{"location":"text/fine_tune/nli/#geniusrise_text.nli.fine_tune.NLIFineTuner.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'premise', 'hypothesis', and 'label' keys.

    - premise: \"The premise text\"\nhypothesis: \"The hypothesis text\"\nlabel: 0\n

    "},{"location":"text/fine_tune/nli/#geniusrise_text.nli.fine_tune.NLIFineTuner.load_dataset--tsv","title":"TSV","text":"

    Should contain 'premise', 'hypothesis', and 'label' columns separated by tabs.

    "},{"location":"text/fine_tune/nli/#geniusrise_text.nli.fine_tune.NLIFineTuner.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'premise', 'hypothesis', and 'label' columns.

    "},{"location":"text/fine_tune/nli/#geniusrise_text.nli.fine_tune.NLIFineTuner.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'premise', 'hypothesis', and 'label' columns.

    "},{"location":"text/fine_tune/nli/#geniusrise_text.nli.fine_tune.NLIFineTuner.load_dataset--feather","title":"Feather","text":"

    Should contain 'premise', 'hypothesis', and 'label' columns.

    "},{"location":"text/fine_tune/nli/#geniusrise_text.nli.fine_tune.NLIFineTuner.prepare_train_features","title":"prepare_train_features(examples)","text":"

    Tokenize the examples and prepare the features for training.

    Parameters:

    Name Type Description Default examples dict

    A dictionary of examples.

    required

    Returns:

    Name Type Description dict Dict

    The processed features.

    "},{"location":"text/fine_tune/question_answering/","title":"Question Answering","text":"

    Bases: TextFineTuner

    A bolt for fine-tuning Hugging Face models on question answering tasks.

    Parameters:

    Name Type Description Default input BatchInput

    The batch input data.

    required output OutputConfig

    The output data.

    required state State

    The state manager.

    required

    CLI Usage:

        genius QAFineTuner rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\n--id microsoft/tapex-large-finetuned-wtq-lol \\\nfine_tune \\\n--args \\\nmodel_name=my_model \\\ntokenizer_name=my_tokenizer \\\nnum_train_epochs=3 \\\nper_device_train_batch_size=8\n
    "},{"location":"text/fine_tune/question_answering/#geniusrise_text.qa.fine_tune.QAFineTuner.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initialize the bolt.

    Args:\n    input (BatchInput): The batch input data.\n    output (OutputConfig): The output data.\n    state (State): The state manager.\n    **kwargs: Additional keyword arguments.\n
    "},{"location":"text/fine_tune/question_answering/#geniusrise_text.qa.fine_tune.QAFineTuner.compute_metrics","title":"compute_metrics(eval_pred)","text":"

    Compute the accuracy of the model's predictions.

    Parameters:

    Name Type Description Default eval_pred tuple

    A tuple containing two elements: - predictions (np.ndarray): The model's predictions. - label_ids (np.ndarray): The true labels.

    required

    Returns:

    Name Type Description dict Optional[Dict[str, float]]

    A dictionary mapping metric names to computed values.

    "},{"location":"text/fine_tune/question_answering/#geniusrise_text.qa.fine_tune.QAFineTuner.load_dataset","title":"load_dataset(dataset_path, pad_on_right=True, max_length=None, doc_stride=None, evaluate_squadv2=False, **kwargs)","text":"

    Load a dataset from a directory.

    "},{"location":"text/fine_tune/question_answering/#geniusrise_text.qa.fine_tune.QAFineTuner.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"text/fine_tune/question_answering/#geniusrise_text.qa.fine_tune.QAFineTuner.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\"context\": \"The context content\", \"question\": \"The question\", \"answers\": {\"answer_start\": [int], \"text\": [str]}}\n

    "},{"location":"text/fine_tune/question_answering/#geniusrise_text.qa.fine_tune.QAFineTuner.load_dataset--csv","title":"CSV","text":"

    Should contain 'context', 'question', and 'answers' columns.

    context,question,answers\n\"The context content\",\"The question\",\"{'answer_start': [int], 'text': [str]}\"\n

    "},{"location":"text/fine_tune/question_answering/#geniusrise_text.qa.fine_tune.QAFineTuner.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'context', 'question', and 'answers' columns.

    "},{"location":"text/fine_tune/question_answering/#geniusrise_text.qa.fine_tune.QAFineTuner.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'context', 'question', and 'answers' keys.

    [{\"context\": \"The context content\", \"question\": \"The question\", \"answers\": {\"answer_start\": [int], \"text\": [str]}}]\n

    "},{"location":"text/fine_tune/question_answering/#geniusrise_text.qa.fine_tune.QAFineTuner.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'context', 'question', and 'answers' child elements.

    <record>\n<context>The context content</context>\n<question>The question</question>\n<answers answer_start=\"int\" text=\"str\"></answers>\n</record>\n

    "},{"location":"text/fine_tune/question_answering/#geniusrise_text.qa.fine_tune.QAFineTuner.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'context', 'question', and 'answers' keys.

    - context: \"The context content\"\nquestion: \"The question\"\nanswers:\nanswer_start: [int]\ntext: [str]\n

    "},{"location":"text/fine_tune/question_answering/#geniusrise_text.qa.fine_tune.QAFineTuner.load_dataset--tsv","title":"TSV","text":"

    Should contain 'context', 'question', and 'answers' columns separated by tabs.

    "},{"location":"text/fine_tune/question_answering/#geniusrise_text.qa.fine_tune.QAFineTuner.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'context', 'question', and 'answers' columns.

    "},{"location":"text/fine_tune/question_answering/#geniusrise_text.qa.fine_tune.QAFineTuner.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'context', 'question', and 'answers' columns.

    "},{"location":"text/fine_tune/question_answering/#geniusrise_text.qa.fine_tune.QAFineTuner.load_dataset--feather","title":"Feather","text":"

    Should contain 'context', 'question', and 'answers' columns.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory.

    required pad_on_right bool

    Whether to pad on the right.

    True max_length int

    The maximum length of the sequences.

    None doc_stride int

    The document stride.

    None evaluate_squadv2 bool

    Whether to evaluate using SQuAD v2 metrics.

    False

    Returns:

    Name Type Description Dataset Optional[Dataset]

    The loaded dataset.

    "},{"location":"text/fine_tune/question_answering/#geniusrise_text.qa.fine_tune.QAFineTuner.prepare_train_features","title":"prepare_train_features(examples, cls_token_id=None)","text":"

    Tokenize our examples with truncation and padding, but keep the overflows using a stride.

    Parameters:

    Name Type Description Default examples Dict[str, Union[str, List[str]]]

    The examples to be tokenized.

    required

    Returns:

    Type Description Optional[Dict[str, Union[List[int], List[List[int]]]]]

    The tokenized examples.

    "},{"location":"text/fine_tune/summarization/","title":"Summarization","text":"

    Bases: TextFineTuner

    A bolt for fine-tuning Hugging Face models on summarization tasks.

    Parameters:

    Name Type Description Default input BatchInput

    The batch input data.

    required output OutputConfig

    The output data.

    required state State

    The state manager.

    required

    CLI Usage:

        genius SummarizationFineTuner rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\nfine_tune \\\n--args \\\nmodel_name=my_model \\\ntokenizer_name=my_tokenizer \\\nnum_train_epochs=3 \\\nper_device_train_batch_size=8\n
    "},{"location":"text/fine_tune/summarization/#geniusrise_text.summarization.fine_tune.SummarizationFineTuner.compute_metrics","title":"compute_metrics(pred)","text":"

    Compute ROUGE metrics.

    Parameters:

    Name Type Description Default pred EvalPrediction

    The predicted results.

    required

    Returns:

    Name Type Description dict Dict[str, float]

    A dictionary with ROUGE-1, ROUGE-2, and ROUGE-L scores.

    "},{"location":"text/fine_tune/summarization/#geniusrise_text.summarization.fine_tune.SummarizationFineTuner.data_collator","title":"data_collator(examples)","text":"

    Customize the data collator.

    Parameters:

    Name Type Description Default examples List[Dict[str, Union[str, List[int]]]]

    The examples to collate.

    required

    Returns:

    Name Type Description dict Dict[str, Union[List[int], List[List[int]]]]

    The collated data.

    "},{"location":"text/fine_tune/summarization/#geniusrise_text.summarization.fine_tune.SummarizationFineTuner.load_dataset","title":"load_dataset(dataset_path, **kwargs)","text":"

    Load a dataset from a directory.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the dataset directory.

    required **kwargs Any

    Additional keyword arguments.

    {}

    Returns:

    Type Description Optional[DatasetDict]

    Dataset | DatasetDict: The loaded dataset.

    "},{"location":"text/fine_tune/summarization/#geniusrise_text.summarization.fine_tune.SummarizationFineTuner.load_dataset--supported-data-formats-and-structures","title":"Supported Data Formats and Structures:","text":""},{"location":"text/fine_tune/summarization/#geniusrise_text.summarization.fine_tune.SummarizationFineTuner.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\"text\": \"The text content\", \"summary\": \"The summary\"}\n

    "},{"location":"text/fine_tune/summarization/#geniusrise_text.summarization.fine_tune.SummarizationFineTuner.load_dataset--csv","title":"CSV","text":"

    Should contain 'text' and 'summary' columns.

    text,summary\n\"The text content\",\"The summary\"\n

    "},{"location":"text/fine_tune/summarization/#geniusrise_text.summarization.fine_tune.SummarizationFineTuner.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'text' and 'summary' columns.

    "},{"location":"text/fine_tune/summarization/#geniusrise_text.summarization.fine_tune.SummarizationFineTuner.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'text' and 'summary' keys.

    [{\"text\": \"The text content\", \"summary\": \"The summary\"}]\n

    "},{"location":"text/fine_tune/summarization/#geniusrise_text.summarization.fine_tune.SummarizationFineTuner.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'text' and 'summary' child elements.

    <record>\n<text>The text content</text>\n<summary>The summary</summary>\n</record>\n

    "},{"location":"text/fine_tune/summarization/#geniusrise_text.summarization.fine_tune.SummarizationFineTuner.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'text' and 'summary' keys.

    - text: \"The text content\"\nsummary: \"The summary\"\n

    "},{"location":"text/fine_tune/summarization/#geniusrise_text.summarization.fine_tune.SummarizationFineTuner.load_dataset--tsv","title":"TSV","text":"

    Should contain 'text' and 'summary' columns separated by tabs.

    "},{"location":"text/fine_tune/summarization/#geniusrise_text.summarization.fine_tune.SummarizationFineTuner.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'text' and 'summary' columns.

    "},{"location":"text/fine_tune/summarization/#geniusrise_text.summarization.fine_tune.SummarizationFineTuner.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'text' and 'summary' columns.

    "},{"location":"text/fine_tune/summarization/#geniusrise_text.summarization.fine_tune.SummarizationFineTuner.load_dataset--feather","title":"Feather","text":"

    Should contain 'text' and 'summary' columns.

    "},{"location":"text/fine_tune/summarization/#geniusrise_text.summarization.fine_tune.SummarizationFineTuner.prepare_train_features","title":"prepare_train_features(examples)","text":"

    Tokenize the examples and prepare the features for training.

    Parameters:

    Name Type Description Default examples dict

    A dictionary of examples.

    required

    Returns:

    Name Type Description dict Optional[Dict[str, List[int]]]

    The processed features.

    "},{"location":"text/fine_tune/translation/","title":"Translation","text":"

    Bases: TextFineTuner

    A bolt for fine-tuning Hugging Face models on translation tasks.

    Args:\n    input (BatchInput): The batch input data.\n    output (OutputConfig): The output data.\n    state (State): The state manager.\n    **kwargs: Arbitrary keyword arguments for extended functionality.\n

    CLI Usage:

        genius TranslationFineTuner rise \\\nbatch \\\n--input_s3_bucket geniusrise-test \\\n--input_s3_folder input/trans \\\nbatch \\\n--output_s3_bucket geniusrise-test \\\n--output_s3_folder output/trans \\\npostgres \\\n--postgres_host 127.0.0.1 \\\n--postgres_port 5432 \\\n--postgres_user postgres \\\n--postgres_password postgres \\\n--postgres_database geniusrise\\\n--postgres_table state \\\n--id facebook/mbart-large-50-many-to-many-mmt-lol \\\nfine_tune \\\n--args \\\nmodel_name=my_model \\\ntokenizer_name=my_tokenizer \\\nnum_train_epochs=3 \\\nper_device_train_batch_size=8 \\\ndata_max_length=512\n
    "},{"location":"text/fine_tune/translation/#geniusrise_text.translation.fine_tune.TranslationFineTuner.data_collator","title":"data_collator(examples)","text":"

    Customize the data collator.

    Parameters:

    Name Type Description Default examples

    The examples to collate.

    required

    Returns:

    Name Type Description dict

    The collated data.

    "},{"location":"text/fine_tune/translation/#geniusrise_text.translation.fine_tune.TranslationFineTuner.load_dataset","title":"load_dataset(dataset_path, max_length=512, origin='en', target='fr', **kwargs)","text":"

    Load a dataset from a directory.

    "},{"location":"text/fine_tune/translation/#geniusrise_text.translation.fine_tune.TranslationFineTuner.load_dataset--supported-data-formats-and-structures-for-translation-tasks","title":"Supported Data Formats and Structures for Translation Tasks:","text":""},{"location":"text/fine_tune/translation/#geniusrise_text.translation.fine_tune.TranslationFineTuner.load_dataset--jsonl","title":"JSONL","text":"

    Each line is a JSON object representing an example.

    {\n\"translation\": {\n\"en\": \"English text\",\n\"fr\": \"French text\"\n}\n}\n

    "},{"location":"text/fine_tune/translation/#geniusrise_text.translation.fine_tune.TranslationFineTuner.load_dataset--csv","title":"CSV","text":"

    Should contain 'en' and 'fr' columns.

    en,fr\n\"English text\",\"French text\"\n

    "},{"location":"text/fine_tune/translation/#geniusrise_text.translation.fine_tune.TranslationFineTuner.load_dataset--parquet","title":"Parquet","text":"

    Should contain 'en' and 'fr' columns.

    "},{"location":"text/fine_tune/translation/#geniusrise_text.translation.fine_tune.TranslationFineTuner.load_dataset--json","title":"JSON","text":"

    An array of dictionaries with 'en' and 'fr' keys.

    [\n{\n\"en\": \"English text\",\n\"fr\": \"French text\"\n}\n]\n

    "},{"location":"text/fine_tune/translation/#geniusrise_text.translation.fine_tune.TranslationFineTuner.load_dataset--xml","title":"XML","text":"

    Each 'record' element should contain 'en' and 'fr' child elements.

    <record>\n<en>English text</en>\n<fr>French text</fr>\n</record>\n

    "},{"location":"text/fine_tune/translation/#geniusrise_text.translation.fine_tune.TranslationFineTuner.load_dataset--yaml","title":"YAML","text":"

    Each document should be a dictionary with 'en' and 'fr' keys.

    - en: \"English text\"\nfr: \"French text\"\n

    "},{"location":"text/fine_tune/translation/#geniusrise_text.translation.fine_tune.TranslationFineTuner.load_dataset--tsv","title":"TSV","text":"

    Should contain 'en' and 'fr' columns separated by tabs.

    "},{"location":"text/fine_tune/translation/#geniusrise_text.translation.fine_tune.TranslationFineTuner.load_dataset--excel-xls-xlsx","title":"Excel (.xls, .xlsx)","text":"

    Should contain 'en' and 'fr' columns.

    "},{"location":"text/fine_tune/translation/#geniusrise_text.translation.fine_tune.TranslationFineTuner.load_dataset--sqlite-db","title":"SQLite (.db)","text":"

    Should contain a table with 'en' and 'fr' columns.

    "},{"location":"text/fine_tune/translation/#geniusrise_text.translation.fine_tune.TranslationFineTuner.load_dataset--feather","title":"Feather","text":"

    Should contain 'en' and 'fr' columns.

    Parameters:

    Name Type Description Default dataset_path str

    The path to the directory containing the dataset files.

    required max_length int

    The maximum length for tokenization. Defaults to 512.

    512 origin str

    The origin language. Defaults to 'en'.

    'en' target str

    The target language. Defaults to 'fr'.

    'fr' **kwargs Any

    Additional keyword arguments.

    {}

    Returns:

    Name Type Description DatasetDict Optional[DatasetDict]

    The loaded dataset.

    "},{"location":"text/fine_tune/translation/#geniusrise_text.translation.fine_tune.TranslationFineTuner.prepare_train_features","title":"prepare_train_features(examples)","text":"

    Tokenize the examples and prepare the features for training.

    Parameters:

    Name Type Description Default examples dict

    A dictionary of examples.

    required

    Returns:

    Name Type Description dict

    The processed features.

    "},{"location":"vision/api/base/","title":"Vision Base","text":"

    Bases: VisionBulk

    The VisionAPI class inherits from VisionBulk and is designed to facilitate the handling of vision-based tasks using a pre-trained machine learning model. It sets up a server to process image-related requests using a specified model.

    "},{"location":"vision/api/base/#geniusrise_vision.base.api.VisionAPI.__init__","title":"__init__(input, output, state)","text":"

    Initializes the VisionAPI object with batch input, output, and state.

    Parameters:

    Name Type Description Default input BatchInput

    Object to handle batch input operations.

    required output BatchOutput

    Object to handle batch output operations.

    required state State

    Object to maintain the state of the API.

    required"},{"location":"vision/api/base/#geniusrise_vision.base.api.VisionAPI.listen","title":"listen(model_name, model_class='AutoModel', processor_class='AutoProcessor', device_map='auto', max_memory={0: '24GB'}, use_cuda=False, precision='float16', quantization=0, torchscript=False, compile=False, flash_attention=False, better_transformers=False, concurrent_queries=False, endpoint='*', port=3000, cors_domain='http://localhost:3000', username=None, password=None, **model_args)","text":"

    Configures and starts a CherryPy server to listen for image processing requests.

    Parameters:

    Name Type Description Default model_name str

    The name of the pre-trained vision model.

    required model_class str

    The class of the pre-trained vision model. Defaults to \"AutoModel\".

    'AutoModel' processor_class str

    The class of the processor for input image preprocessing. Defaults to \"AutoProcessor\".

    'AutoProcessor' device_map str | Dict | None

    Device mapping for model inference. Defaults to \"auto\".

    'auto' max_memory Dict[int, str]

    Maximum memory allocation for model inference. Defaults to {0: \"24GB\"}.

    {0: '24GB'} precision str

    The floating-point precision to be used by the model. Options are 'float32', 'float16', 'bfloat16'.

    'float16' quantization int

    The bit level for model quantization (0 for none, 8 for 8-bit quantization).

    0 torchscript bool

    Whether to use TorchScript for model optimization. Defaults to True.

    False compile bool

    Whether to compile the model before fine-tuning. Defaults to False.

    False flash_attention bool

    Whether to use flash attention 2. Default is False.

    False better_transformers bool

    Flag to enable Better Transformers optimization for faster processing.

    False concurrent_queries bool

    (bool): Whether the API supports concurrent API calls (usually false).

    False endpoint str

    The network endpoint for the server. Defaults to \"*\".

    '*' port int

    The network port for the server. Defaults to 3000.

    3000 cors_domain str

    The domain to allow for CORS requests. Defaults to \"http://localhost:3000\".

    'http://localhost:3000' username Optional[str]

    Username for server authentication. Defaults to None.

    None password Optional[str]

    Password for server authentication. Defaults to None.

    None **model_args Any

    Additional arguments for the vision model.

    {}"},{"location":"vision/api/base/#geniusrise_vision.base.api.VisionAPI.validate_password","title":"validate_password(realm, username, password)","text":"

    Validate the username and password against expected values.

    Parameters:

    Name Type Description Default realm str

    The authentication realm.

    required username str

    The provided username.

    required password str

    The provided password.

    required

    Returns:

    Name Type Description bool

    True if credentials are valid, False otherwise.

    "},{"location":"vision/api/imgclass/","title":"Image Classsification API","text":"

    Bases: VisionAPI

    ImageClassificationAPI extends the VisionAPI for image classification tasks. This API provides functionalities to classify images into various categories based on the trained model it uses. It supports both single-label and multi-label classification problems.

    Methods

    classify_image(self): Endpoint to classify an uploaded image and return the classification scores. sigmoid(self, _outputs): Applies the sigmoid function to the model's outputs. softmax(self, _outputs): Applies the softmax function to the model's outputs.

    Example CLI Usage:

    genius ImageClassificationAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\nlisten \\\n--args \\\nmodel_name=\"Kaludi/food-category-classification-v2.0\" \\\nmodel_class=\"AutoModelForImageClassification\" \\\nprocessor_class=\"AutoImageProcessor\" \\\ndevice_map=\"cuda:0\" \\\nuse_cuda=True \\\nprecision=\"float\" \\\nquantization=0 \\\nmax_memory=None \\\ntorchscript=False \\\ncompile=False \\\nflash_attention=False \\\nbetter_transformers=False \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n
    "},{"location":"vision/api/imgclass/#geniusrise_vision.imgclass.api.ImageClassificationAPI.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the ImageClassificationAPI with the necessary configurations for input, output, and state management, along with model-specific parameters.

    Parameters:

    Name Type Description Default input BatchInput

    Configuration for the input data.

    required output BatchOutput

    Configuration for the output data.

    required state State

    State management for the API.

    required **kwargs

    Additional keyword arguments for extended functionality, such as model configuration.

    {}"},{"location":"vision/api/imgclass/#geniusrise_vision.imgclass.api.ImageClassificationAPI.classify_image","title":"classify_image()","text":"

    Endpoint for classifying an image. It accepts a base64-encoded image, decodes it, preprocesses it, and runs it through the classification model. It supports both single-label and multi-label classification by applying the appropriate post-processing function to the model outputs.

    Returns:

    Type Description Dict[str, Any]

    Dict[str, Any]: A dictionary containing the predictions with the highest scores and all prediction scores.

    Dict[str, Any]

    Each prediction includes the label and its corresponding score.

    Raises:

    Type Description Exception

    If an error occurs during image processing or classification.

    Example CURL Request:

    curl -X POST localhost:3000/api/v1/classify_image             -H \"Content-Type: application/json\"             -d '{\"image_base64\": \"<base64-encoded-image>\"}'\n

    or to feed an image:

    (base64 -w 0 cat.jpg | awk '{print \"{\"image_base64\": \"\"$0\"\"}\"}' > /tmp/image_payload.json)\ncurl -X POST http://localhost:3000/api/v1/classify_image             -H \"Content-Type: application/json\"             -u user:password             -d @/tmp/image_payload.json | jq\n

    "},{"location":"vision/api/imgclass/#geniusrise_vision.imgclass.api.ImageClassificationAPI.sigmoid","title":"sigmoid(_outputs)","text":"

    Applies the sigmoid function to the model's outputs for binary classification or multi-label classification tasks.

    Parameters:

    Name Type Description Default _outputs np.ndarray

    The raw outputs from the model.

    required

    Returns:

    Type Description np.ndarray

    np.ndarray: The outputs after applying the sigmoid function.

    "},{"location":"vision/api/imgclass/#geniusrise_vision.imgclass.api.ImageClassificationAPI.softmax","title":"softmax(_outputs)","text":"

    Applies the softmax function to the model's outputs for single-label classification tasks, ensuring the output scores sum to 1 across classes.

    Parameters:

    Name Type Description Default _outputs np.ndarray

    The raw outputs from the model.

    required

    Returns:

    Type Description np.ndarray

    np.ndarray: The outputs after applying the softmax function.

    "},{"location":"vision/api/ocr/","title":"OCR API","text":"

    Bases: VisionAPI

    ImageOCRAPI provides Optical Character Recognition (OCR) capabilities for images, leveraging different OCR engines like EasyOCR, PaddleOCR, and Hugging Face models tailored for OCR tasks. This API can decode base64-encoded images, process them through the chosen OCR engine, and return the recognized text.

    The API supports dynamic selection of OCR engines and configurations based on the provided model name and arguments, offering flexibility in processing various languages and image types.

    Methods

    ocr(self): Processes an uploaded image for OCR and returns the recognized text.

    Example CLI Usage:

    EasyOCR:

    genius ImageOCRAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\nlisten \\\n--args \\\nmodel_name=\"easyocr\" \\\ndevice_map=\"cuda:0\" \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n

    Paddle OCR:

    genius ImageOCRAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\nlisten \\\n--args \\\nmodel_name=\"paddleocr\" \\\ndevice_map=\"cuda:0\" \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n

    Huggingface models:

    genius ImageOCRAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\nlisten \\\n--args \\\nmodel_name=\"facebook/nougat-base\" \\\nmodel_class=\"VisionEncoderDecoderModel\" \\\nprocessor_class=\"NougatProcessor\" \\\ndevice_map=\"cuda:0\" \\\nuse_cuda=True \\\nprecision=\"float\" \\\nquantization=0 \\\nmax_memory=None \\\ntorchscript=False \\\ncompile=False \\\nflash_attention=False \\\nbetter_transformers=False \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n
    "},{"location":"vision/api/ocr/#geniusrise_vision.ocr.api.ImageOCRAPI.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the ImageOCRAPI with configurations for input, output, state management, and OCR model specifics.

    Parameters:

    Name Type Description Default input BatchInput

    Configuration for the input data.

    required output BatchOutput

    Configuration for the output data.

    required state State

    State management for the API.

    required **kwargs

    Additional keyword arguments for extended functionality.

    {}"},{"location":"vision/api/ocr/#geniusrise_vision.ocr.api.ImageOCRAPI.ocr","title":"ocr()","text":"

    Endpoint for performing OCR on an uploaded image. It accepts a base64-encoded image, decodes it, preprocesses it through the specified OCR model, and returns the recognized text.

    Returns:

    Type Description

    Dict[str, Any]: A dictionary containing the success status, recognized text ('result'), and the original

    image name ('image_name') if provided.

    Raises:

    Type Description Exception

    If an error occurs during image processing or OCR.

    Example CURL Request:

    curl -X POST localhost:3000/api/v1/ocr             -H \"Content-Type: application/json\"             -d '{\"image_base64\": \"<base64-encoded-image>\", \"model_name\": \"easyocr\", \"use_easyocr_bbox\": true}'\n

    or

    (base64 -w 0 test_images_ocr/ReceiptSwiss.jpg | awk '{print \"{\"image_base64\": \"\"$0\"\", \"max_length\": 1024}\"}' > /tmp/image_payload.json)\ncurl -X POST http://localhost:3000/api/v1/ocr             -H \"Content-Type: application/json\"             -u user:password             -d @/tmp/image_payload.json | jq\n
    "},{"location":"vision/api/ocr/#geniusrise_vision.ocr.api.ImageOCRAPI.process_huggingface_models","title":"process_huggingface_models(image, use_easyocr_bbox)","text":"

    Processes the image using a Hugging Face model specified for OCR tasks. Supports advanced configurations and post-processing to handle various OCR-related challenges.

    Parameters:

    Name Type Description Default image Image.Image

    The image to process.

    required use_easyocr_bbox bool

    Whether to use EasyOCR to detect text bounding boxes before processing with Hugging Face models.

    required

    Returns:

    Name Type Description str

    The recognized text from the image.

    "},{"location":"vision/api/ocr/#geniusrise_vision.ocr.api.ImageOCRAPI.process_other_models","title":"process_other_models(image)","text":"

    Processes the image using non-Hugging Face OCR models like EasyOCR or PaddleOCR based on the initialization.

    Parameters:

    Name Type Description Default image Image.Image

    The image to process.

    required

    Returns:

    Name Type Description Any Any

    The OCR results which might include text, bounding boxes, and confidence scores depending on the model.

    Raises:

    Type Description ValueError

    If an invalid or unsupported OCR model is specified.

    "},{"location":"vision/api/segment/","title":"Image Segmentation API","text":"

    Bases: VisionAPI

    VisionSegmentationAPI extends VisionAPI to provide image segmentation functionalities, including panoptic, instance, and semantic segmentation. This API supports different segmentation tasks based on the model's capabilities and the specified subtask in the request.

    Methods

    segment_image(self): Processes an image for segmentation and returns the segmentation masks along with labels.

    Example CLI Usage:

    genius VisionSegmentationAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\nlisten \\\n--args \\\nmodel_name=\"facebook/mask2former-swin-large-mapillary-vistas-semantic\" \\\nmodel_class=\"Mask2FormerForUniversalSegmentation\" \\\nprocessor_class=\"AutoImageProcessor\" \\\ndevice_map=\"cuda:0\" \\\nuse_cuda=True \\\nprecision=\"float\" \\\nquantization=0 \\\nmax_memory=None \\\ntorchscript=False \\\ncompile=False \\\nflash_attention=False \\\nbetter_transformers=False \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n
    "},{"location":"vision/api/segment/#geniusrise_vision.segment.api.VisionSegmentationAPI.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the VisionSegmentationAPI with configurations for input, output, and state management, along with any model-specific parameters for segmentation tasks.

    Parameters:

    Name Type Description Default input BatchInput

    Configuration for the input data.

    required output BatchOutput

    Configuration for the output data.

    required state State

    State management for the API.

    required **kwargs

    Additional keyword arguments for extended functionality.

    {}"},{"location":"vision/api/segment/#geniusrise_vision.segment.api.VisionSegmentationAPI.segment_image","title":"segment_image()","text":"

    Endpoint for segmenting an image according to the specified subtask (panoptic, instance, or semantic segmentation). It decodes the base64-encoded image, processes it through the model, and returns the segmentation masks along with labels and scores (if applicable) in base64 format.

    The method supports dynamic task inputs for models requiring specific task descriptions and applies different post-processing techniques based on the subtask.

    Returns:

    Type Description List[Dict[str, Any]]

    List[Dict[str, Any]]: A list of dictionaries where each dictionary contains a 'label', a 'score' (if applicable),

    List[Dict[str, Any]]

    and a 'mask' (base64-encoded image of the segmentation mask).

    Raises:

    Type Description Exception

    If an error occurs during image processing or segmentation.

    Example CURL Request:

    curl -X POST localhost:3000/api/v1/segment_image             -H \"Content-Type: application/json\"             -d '{\"image_base64\": \"<base64-encoded-image>\", \"subtask\": \"panoptic\"}'\n

    or to save all masks:

    (base64 -w 0 guy.jpg | awk '{print \"{\"image_base64\": \"\"$0\"\", \"subtask\": \"semantic\"}\"}' > /tmp/image_payload.json)\ncurl -X POST http://localhost:3000/api/v1/segment_image             -H \"Content-Type: application/json\"             -u user:password             -d @/tmp/image_payload.json | jq -r '.[] | .mask + \" \" + .label' | while read mask label; do echo $mask | base64 --decode > \"${label}.jpg\"; done\n
    "},{"location":"vision/api/vqa/","title":"Visual Question Answering","text":"

    Bases: VisionAPI

    VisualQAAPI extends VisionAPI to provide an interface for visual question answering (VQA) tasks. This API supports answering questions about an image by utilizing deep learning models specifically trained for VQA. It processes requests containing an image and a question about the image, performs inference using the loaded model, and returns the predicted answer.

    Methods

    answer_question(self): Receives an image and a question, returns the answer based on visual content.

    Example CLI Usage:

    genius VisualQAAPI rise \\\nbatch \\\n--input_folder ./input \\\nbatch \\\n--output_folder ./output \\\nnone \\\nlisten \\\n--args \\\nmodel_name=\"llava-hf/bakLlava-v1-hf\" \\\nmodel_class=\"LlavaForConditionalGeneration\" \\\nprocessor_class=\"AutoProcessor\" \\\ndevice_map=\"cuda:0\" \\\nuse_cuda=True \\\nprecision=\"bfloat16\" \\\nquantization=0 \\\nmax_memory=None \\\ntorchscript=False \\\ncompile=False \\\nflash_attention=False \\\nbetter_transformers=False \\\nendpoint=\"*\" \\\nport=3000 \\\ncors_domain=\"http://localhost:3000\" \\\nusername=\"user\" \\\npassword=\"password\"\n
    "},{"location":"vision/api/vqa/#geniusrise_vision.vqa.api.VisualQAAPI.__init__","title":"__init__(input, output, state, **kwargs)","text":"

    Initializes the VisualQAAPI with configurations for input, output, state management, and any model-specific parameters for visual question answering tasks.

    Parameters:

    Name Type Description Default input BatchInput

    Configuration for the input data.

    required output BatchOutput

    Configuration for the output data.

    required state State

    State management for the API.

    required **kwargs

    Additional keyword arguments for extended functionality.

    {}"},{"location":"vision/api/vqa/#geniusrise_vision.vqa.api.VisualQAAPI.answer_question","title":"answer_question()","text":"

    Endpoint for receiving an image with a question and returning the answer based on the visual content of the image. It processes the request containing a base64-encoded image and a question string, and utilizes the loaded model to predict the answer to the question related to the image.

    Returns:

    Type Description

    Dict[str, Any]: A dictionary containing the original question and the predicted answer.

    Raises:

    Type Description ValueError

    If required fields 'image_base64' and 'question' are not provided in the request.

    Exception

    If an error occurs during image processing or inference.

    Example CURL Request:

    curl -X POST localhost:3000/api/v1/answer_question             -H \"Content-Type: application/json\"             -d '{\"image_base64\": \"<base64-encoded-image>\", \"question\": \"What is the color of the sky in the image?\"}'\n

    or

    (base64 -w 0 test_images_segment_finetune/image1.jpg | awk '{print \"{\"image_base64\": \"\"$0\"\", \"question\": \"how many cats are there?\"}\"}' > /tmp/image_payload.json)\ncurl -X POST http://localhost:3000/api/v1/answer_question             -H \"Content-Type: application/json\"             -u user:password             -d @/tmp/image_payload.json | jq\n
    "}]} \ No newline at end of file diff --git a/sitemap.xml b/sitemap.xml index 9c9bfbb..104bf77 100644 --- a/sitemap.xml +++ b/sitemap.xml @@ -2,902 +2,902 @@ https://docs.geniusrise.ai/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/audio/api/base/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/audio/api/s2t/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/audio/api/t2s/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/audio/bulk/base/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/audio/bulk/s2t/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/audio/bulk/t2s/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/blog/huggingface/chat/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/blog/huggingface/imgclass/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/blog/huggingface/lm/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/blog/huggingface/ner/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/blog/huggingface/nli/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/blog/huggingface/ocr/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/blog/huggingface/qa/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/blog/huggingface/segment/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/blog/huggingface/speak/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/blog/huggingface/speech/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/blog/huggingface/summz/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/blog/huggingface/table_qa/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/blog/huggingface/trans/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/blog/huggingface/txtclass/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/blog/huggingface/vqa/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/bolts/openai/base/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/bolts/openai/classification/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/bolts/openai/commonsense_reasoning/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/bolts/openai/instruction_tuning/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/bolts/openai/language_model/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/bolts/openai/ner/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/bolts/openai/question_answering/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/bolts/openai/sentiment_analysis/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/bolts/openai/summarization/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/bolts/openai/translation/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/core/airflow/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/core/cli_boltctl/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/core/cli_discover/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/core/cli_dockerctl/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/core/cli_geniusctl/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/core/cli_schema/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/core/cli_spoutctl/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/core/cli_yamlctl/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/core/core_bolt/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/core/core_data_batch_input/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/core/core_data_batch_output/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/core/core_data_input/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/core/core_data_output/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/core/core_data_streaming_input/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/core/core_data_streaming_output/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/core/core_spout/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/core/core_state_base/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/core/core_state_dynamo/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/core/core_state_memory/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/core/core_state_postgres/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/core/core_state_redis/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/core/core_task_base/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/core/docker/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/core/docker_swarm/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/core/k8s_base/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/core/k8s_cron_job/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/core/k8s_deployment/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/core/k8s_job/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/core/k8s_service/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/databases/arangodb/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/databases/athena/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/databases/azure_table/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/databases/bigquery/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/databases/bigtable/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/databases/cassandra/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/databases/cloud_sql/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/databases/cockroach/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/databases/cosmosdb/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/databases/couchbase/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/databases/db2/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/databases/documentdb/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/databases/dynamodb/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/databases/elasticsearch/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/databases/firestore/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/databases/graphite/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/databases/hbase/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/databases/influxdb/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/databases/kairosdb/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/databases/keyspaces/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/databases/ldap/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/databases/memsql/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/databases/mongodb/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/databases/mysql/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/databases/neo4j/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/databases/nuodb/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/databases/opentsdb/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/databases/oracle/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/databases/postgres/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/databases/presto/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/databases/redis/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/databases/riak/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/databases/spanner/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/databases/sql_server/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/databases/sqlite/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/databases/sybase/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/databases/teradata/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/databases/tidb/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/databases/timescaledb/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/databases/vertica/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/databases/voltdb/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/guides/404/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/guides/architecture/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/guides/cli/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/guides/concepts/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/guides/deployment/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/guides/dev_cycle/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/guides/installation/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/guides/kubernetes/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/guides/local/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/guides/packaging/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/guides/pin/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/guides/usage/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/guides/yaml/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/listeners/activemq/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/listeners/amqp/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/listeners/grpc/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/listeners/http_polling/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/listeners/kafka/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/listeners/kinesis/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/listeners/mqtt/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/listeners/quic/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/listeners/redis_pubsub/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/listeners/redis_streams/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/listeners/sns/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/listeners/socket.io/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/listeners/sqs/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/listeners/udp/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/listeners/webhook/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/listeners/websocket/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/listeners/zeromq/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/ocr/ConvertImage/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/ocr/FineTunePix2Struct/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/ocr/FineTuneTROCR/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/ocr/ImageClassPredictor/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/ocr/ParseCBZCBR/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/ocr/ParseDjvu/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/ocr/ParseEpub/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/ocr/ParseMOBI/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/ocr/ParsePdf/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/ocr/ParsePostScript/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/ocr/ParseXPS/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/ocr/Pix2StructImageOCR/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/ocr/Pix2StructImageOCRAPI/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/ocr/TROCRImageOCR/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/ocr/TROCRImageOCRAPI/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/ocr/TrainImageClassifier/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/text/api/base/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/text/api/classification/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/text/api/instruction_tuning/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/text/api/language_model/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/text/api/ner/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/text/api/nli/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/text/api/question_answering/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/text/api/summarization/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/text/api/translation/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/text/bulk/base/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/text/bulk/classification/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/text/bulk/instruction_tuning/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/text/bulk/language_model/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/text/bulk/ner/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/text/bulk/nli/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/text/bulk/question_answering/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/text/bulk/summarization/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/text/bulk/translation/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/text/fine_tune/base/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/text/fine_tune/classification/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/text/fine_tune/instruction_tuning/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/text/fine_tune/language_model/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/text/fine_tune/ner/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/text/fine_tune/nli/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/text/fine_tune/question_answering/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/text/fine_tune/summarization/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/text/fine_tune/translation/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/vision/api/base/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/vision/api/imgclass/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/vision/api/ocr/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/vision/api/segment/ - 2024-02-28 + 2024-03-04 daily https://docs.geniusrise.ai/vision/api/vqa/ - 2024-02-28 + 2024-03-04 daily \ No newline at end of file diff --git a/sitemap.xml.gz b/sitemap.xml.gz index 2c41986..3310148 100644 Binary files a/sitemap.xml.gz and b/sitemap.xml.gz differ diff --git a/text/api/base/index.html b/text/api/base/index.html index 41fa70e..9231db0 100644 --- a/text/api/base/index.html +++ b/text/api/base/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/text/api/classification/index.html b/text/api/classification/index.html index 4e30450..051f301 100644 --- a/text/api/classification/index.html +++ b/text/api/classification/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/text/api/instruction_tuning/index.html b/text/api/instruction_tuning/index.html index 613ff54..3d6eed4 100644 --- a/text/api/instruction_tuning/index.html +++ b/text/api/instruction_tuning/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/text/api/language_model/index.html b/text/api/language_model/index.html index d67521c..cab58e1 100644 --- a/text/api/language_model/index.html +++ b/text/api/language_model/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/text/api/ner/index.html b/text/api/ner/index.html index fc65eb2..90abbed 100644 --- a/text/api/ner/index.html +++ b/text/api/ner/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/text/api/nli/index.html b/text/api/nli/index.html index b21f126..1a4e810 100644 --- a/text/api/nli/index.html +++ b/text/api/nli/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/text/api/question_answering/index.html b/text/api/question_answering/index.html index 995f1bb..4118e1c 100644 --- a/text/api/question_answering/index.html +++ b/text/api/question_answering/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/text/api/summarization/index.html b/text/api/summarization/index.html index 9158dc8..0ee3e82 100644 --- a/text/api/summarization/index.html +++ b/text/api/summarization/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/text/api/translation/index.html b/text/api/translation/index.html index c31c745..a79e2cb 100644 --- a/text/api/translation/index.html +++ b/text/api/translation/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/text/bulk/base/index.html b/text/bulk/base/index.html index 55beb1e..adab97b 100644 --- a/text/bulk/base/index.html +++ b/text/bulk/base/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/text/bulk/classification/index.html b/text/bulk/classification/index.html index ae2e95a..32435ee 100644 --- a/text/bulk/classification/index.html +++ b/text/bulk/classification/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/text/bulk/instruction_tuning/index.html b/text/bulk/instruction_tuning/index.html index 0c700b6..2641832 100644 --- a/text/bulk/instruction_tuning/index.html +++ b/text/bulk/instruction_tuning/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/text/bulk/language_model/index.html b/text/bulk/language_model/index.html index 2dd5c5b..0a118e4 100644 --- a/text/bulk/language_model/index.html +++ b/text/bulk/language_model/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/text/bulk/ner/index.html b/text/bulk/ner/index.html index 488cab1..d9e172e 100644 --- a/text/bulk/ner/index.html +++ b/text/bulk/ner/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/text/bulk/nli/index.html b/text/bulk/nli/index.html index 602c2a6..68bf76e 100644 --- a/text/bulk/nli/index.html +++ b/text/bulk/nli/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/text/bulk/question_answering/index.html b/text/bulk/question_answering/index.html index 8d77ec4..005c8bd 100644 --- a/text/bulk/question_answering/index.html +++ b/text/bulk/question_answering/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/text/bulk/summarization/index.html b/text/bulk/summarization/index.html index 55cd365..e7c4199 100644 --- a/text/bulk/summarization/index.html +++ b/text/bulk/summarization/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/text/bulk/translation/index.html b/text/bulk/translation/index.html index 76a5912..97f63f9 100644 --- a/text/bulk/translation/index.html +++ b/text/bulk/translation/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/text/fine_tune/base/index.html b/text/fine_tune/base/index.html index 10c9301..00c62d8 100644 --- a/text/fine_tune/base/index.html +++ b/text/fine_tune/base/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/text/fine_tune/classification/index.html b/text/fine_tune/classification/index.html index 18e3b38..41e3f5c 100644 --- a/text/fine_tune/classification/index.html +++ b/text/fine_tune/classification/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/text/fine_tune/instruction_tuning/index.html b/text/fine_tune/instruction_tuning/index.html index 42439fc..a77173d 100644 --- a/text/fine_tune/instruction_tuning/index.html +++ b/text/fine_tune/instruction_tuning/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/text/fine_tune/language_model/index.html b/text/fine_tune/language_model/index.html index 4183665..b7f4165 100644 --- a/text/fine_tune/language_model/index.html +++ b/text/fine_tune/language_model/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/text/fine_tune/ner/index.html b/text/fine_tune/ner/index.html index ef4afa6..cf3a4d6 100644 --- a/text/fine_tune/ner/index.html +++ b/text/fine_tune/ner/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/text/fine_tune/nli/index.html b/text/fine_tune/nli/index.html index a4542d1..231dd84 100644 --- a/text/fine_tune/nli/index.html +++ b/text/fine_tune/nli/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/text/fine_tune/question_answering/index.html b/text/fine_tune/question_answering/index.html index a275bce..c67f16e 100644 --- a/text/fine_tune/question_answering/index.html +++ b/text/fine_tune/question_answering/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/text/fine_tune/summarization/index.html b/text/fine_tune/summarization/index.html index 0fd1515..2ee82b3 100644 --- a/text/fine_tune/summarization/index.html +++ b/text/fine_tune/summarization/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/text/fine_tune/translation/index.html b/text/fine_tune/translation/index.html index 7e505f3..5b77ce3 100644 --- a/text/fine_tune/translation/index.html +++ b/text/fine_tune/translation/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/vision/api/base/index.html b/vision/api/base/index.html index 9c9b10b..0a54cf1 100644 --- a/vision/api/base/index.html +++ b/vision/api/base/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/vision/api/imgclass/index.html b/vision/api/imgclass/index.html index ab40c7f..2ce37df 100644 --- a/vision/api/imgclass/index.html +++ b/vision/api/imgclass/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/vision/api/ocr/index.html b/vision/api/ocr/index.html index e00fe51..575fcd0 100644 --- a/vision/api/ocr/index.html +++ b/vision/api/ocr/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/vision/api/segment/index.html b/vision/api/segment/index.html index f153de5..1e52ac8 100644 --- a/vision/api/segment/index.html +++ b/vision/api/segment/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + + diff --git a/vision/api/vqa/index.html b/vision/api/vqa/index.html index 788bce6..a2f4b64 100644 --- a/vision/api/vqa/index.html +++ b/vision/api/vqa/index.html @@ -497,6 +497,48 @@ + + + + + +
  • + + Summarization + +
  • + + + + + + + + + +
  • + + Chat + +
  • + + + + + + + + + +
  • + + Language Models + +
  • + + + +