diff --git a/docker-compose.yaml b/docker-compose.yaml index 0f714f34f..124afbc9b 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -8,7 +8,6 @@ services: ports: - "8001:8001" depends_on: - - super__tgwui - super__redis - super__postgres networks: @@ -21,7 +20,6 @@ services: context: . dockerfile: DockerfileCelery depends_on: - - super__tgwui - super__redis - super__postgres networks: @@ -40,47 +38,6 @@ services: - /app/node_modules - /app/.next - super__tgwui: - build: - context: . - #target: llama-cublas # Specify the variant to build - dockerfile: DockerfileTGWUI -# args: -# - LCL_SRC_DIR=text-generation-webui # Developers - see Dockerfile app_base - container_name: text-generation-webui - environment: - - EXTRA_LAUNCH_ARGS="--chat --listen --verbose --extensions openai --threads 4" -# - EXTRA_LAUNCH_ARGS="--listen --no-mmap --verbose --extensions openai --auto-devices --gpu-memory 20 20 --n-gpu-layers 100 --threads 8 --model vicuna-13b-cot.ggmlv3.q8_0.bin" # GPU Custom launch args (e.g., --model MODEL_NAME) - ports: - - 7860:7860 # Default web port - - 5000:5000 # Default API port - - 5005:5005 # Default streaming port - - 5001:5001 # Default OpenAI API extension port - volumes: - - ./tgwui/config/loras:/app/loras - - ./tgwui/config/models:/app/models - - ./tgwui/config/presets:/app/presets - - ./tgwui/config/prompts:/app/prompts - - ./tgwui/config/softprompts:/app/softprompts - - ./tgwui/config/training:/app/training - logging: - driver: json-file - options: - max-file: "3" # number of files or file count - max-size: '10m' - networks: - - super_network -### Uncomment the following lines to run the container using the host machine's GPU resources -# deploy: -# resources: -# reservations: -# devices: -# - driver: nvidia -# count: all -## device_ids: ['0', '1'] # must comment the above line if this line is uncommented. -# capabilities: [gpu] - - super__redis: image: "docker.io/library/redis:latest" networks: diff --git a/docker-compose.yaml.bak b/local-llm similarity index 51% rename from docker-compose.yaml.bak rename to local-llm index a62a9da09..c30f23307 100644 --- a/docker-compose.yaml.bak +++ b/local-llm @@ -8,6 +8,7 @@ services: ports: - "8001:8001" depends_on: + - super__tgwui - super__redis - super__postgres networks: @@ -20,11 +21,12 @@ services: context: . dockerfile: DockerfileCelery depends_on: + - super__tgwui - super__redis - super__postgres networks: - super_network - + gui: build: ./gui ports: @@ -38,13 +40,40 @@ services: - /app/node_modules - /app/.next + super__tgwui: + build: + context: . + dockerfile: ./tgwui/DockerfileTGWUI + container_name: super__tgwui + environment: + - EXTRA_LAUNCH_ARGS="--listen --verbose --extensions openai --threads 4 --n_ctx 1600" + ports: + - 7860:7860 # Default web port + - 5000:5000 # Default API port + - 5005:5005 # Default streaming port + - 5001:5001 # Default OpenAI API extension port + volumes: + - ./tgwui/config/loras:/app/loras + - ./tgwui/config/models:/app/models + - ./tgwui/config/presets:/app/presets + - ./tgwui/config/prompts:/app/prompts + - ./tgwui/config/softprompts:/app/softprompts + - ./tgwui/config/training:/app/training + logging: + driver: json-file + options: + max-file: "3" # number of files or file count + max-size: '10m' + networks: + - super_network + super__redis: - image: "redis:latest" + image: "docker.io/library/redis:latest" networks: - super_network super__postgres: - image: "postgres:latest" + image: "docker.io/library/postgres:latest" environment: - POSTGRES_USER=superagi - POSTGRES_PASSWORD=password @@ -59,6 +88,6 @@ services: networks: super_network: driver: bridge - + volumes: superagi_postgres_data: diff --git a/local-llm-gpu b/local-llm-gpu new file mode 100644 index 000000000..dceffd3d8 --- /dev/null +++ b/local-llm-gpu @@ -0,0 +1,107 @@ +version: '3.8' + +services: + backend: + volumes: + - "./:/app" + build: . + ports: + - "8001:8001" + depends_on: + - super__tgwui + - super__redis + - super__postgres + networks: + - super_network + + celery: + volumes: + - "./:/app" + build: + context: . + dockerfile: DockerfileCelery + depends_on: + - super__tgwui + - super__redis + - super__postgres + networks: + - super_network + + gui: + build: ./gui + ports: + - "3000:3000" + environment: + - NEXT_PUBLIC_API_BASE_URL=http://localhost:8001 + networks: + - super_network + volumes: + - ./gui:/app + - /app/node_modules + - /app/.next + + super__tgwui: + build: + context: . + target: llama-cublas + dockerfile: ./tgwui/DockerfileTGWUI +# args: +# - LCL_SRC_DIR=text-generation-webui # Developers - see Dockerfile app_base + container_name: super__tgwui + environment: + - EXTRA_LAUNCH_ARGS="--listen --no-mmap --verbose --extensions openai --auto-devices --n_ctx 1600 --gpu-memory 20 20 --n-gpu-layers 128 --threads 8 --model vicuna-13b-cot.ggmlv3.q8_0.bin" + ports: + - 7860:7860 # Default web port + - 5000:5000 # Default API port + - 5005:5005 # Default streaming port + - 5001:5001 # Default OpenAI API extension port + volumes: + - ./tgwui/config/loras:/app/loras + - ./tgwui/config/models:/app/models + - ./tgwui/config/presets:/app/presets + - ./tgwui/config/prompts:/app/prompts + - ./tgwui/config/softprompts:/app/softprompts + - ./tgwui/config/training:/app/training + - ./tgwui/config/embeddings:/app/embeddings + logging: + driver: json-file + options: + max-file: "3" # number of files or file count + max-size: '10m' + networks: + - super_network +### Uncomment the following lines to run the container using the host machine's GPU resources + deploy: + resources: + reservations: + devices: + - driver: nvidia +# count: "all" + device_ids: ['0', '1'] # must comment the above line if this line is uncommented. + capabilities: [gpu] + + + super__redis: + image: "docker.io/library/redis:latest" + networks: + - super_network + + super__postgres: + image: "docker.io/library/postgres:latest" + environment: + - POSTGRES_USER=superagi + - POSTGRES_PASSWORD=password + - POSTGRES_DB=super_agi_main + volumes: + - superagi_postgres_data:/var/lib/postgresql/data/ + networks: + - super_network + ports: + - "5432:5432" + +networks: + super_network: + driver: bridge + +volumes: + superagi_postgres_data: diff --git a/superagi/helper/json_cleaner.py b/superagi/helper/json_cleaner.py index f5b43119f..58ff5c6b2 100644 --- a/superagi/helper/json_cleaner.py +++ b/superagi/helper/json_cleaner.py @@ -51,7 +51,7 @@ def extract_json_section(cls, input_str: str = ""): @classmethod def remove_escape_sequences(cls, string): - return string.encode('utf-8').decode('unicode_escape').encode('raw_unicode_escape').decode('utf-8') + return string.encode('utf-8').decode('unicode_escape').encode('raw_unicode_escape') @classmethod def add_quotes_to_property_names(cls, json_string: str) -> str: diff --git a/DockerfileTGWUI b/tgwui/DockerfileTGWUI similarity index 89% rename from DockerfileTGWUI rename to tgwui/DockerfileTGWUI index dbfd5c98b..01cb26779 100644 --- a/DockerfileTGWUI +++ b/tgwui/DockerfileTGWUI @@ -16,11 +16,10 @@ FROM env_base AS app_base ### DEVELOPERS/ADVANCED USERS ### # Clone oobabooga/text-generation-webui RUN git clone https://github.com/oobabooga/text-generation-webui /src -# This is required to get multi-gpu support until the main branch updates the requirements.txt file to include llama-cpp-python 0.1.59 or greater. -COPY tgwui/tgwui_requirements.txt /src/requirements.txt # To use local source: comment out the git clone command then set the build arg `LCL_SRC_DIR` #ARG LCL_SRC_DIR="text-generation-webui" #COPY ${LCL_SRC_DIR} /src +# This is required to get multi-gpu support until the main branch updates the requirements.txt file to include llama-cpp-python 0.1.59 or greater. ################################# # Copy source to app @@ -28,15 +27,15 @@ RUN cp -ar /src /app # Install oobabooga/text-generation-webui RUN --mount=type=cache,target=/root/.cache/pip pip3 install -r /app/requirements.txt # Install extensions -COPY tgwui/scripts/build_extensions.sh /scripts/build_extensions.sh +COPY tgwui/scripts/build_extensions.sh /app/scripts/build_extensions.sh RUN --mount=type=cache,target=/root/.cache/pip \ - chmod +x /scripts/build_extensions.sh && . /scripts/build_extensions.sh \ + chmod +x /app/scripts/build_extensions.sh && . /app/scripts/build_extensions.sh ## Clone default GPTQ RUN git clone https://github.com/oobabooga/GPTQ-for-LLaMa.git -b cuda /app/repositories/GPTQ-for-LLaMa ## Build and install default GPTQ ('quant_cuda') ARG TORCH_CUDA_ARCH_LIST="6.1;7.0;7.5;8.0;8.6+PTX" -#RUN cd /app/repositories/GPTQ-for-LLaMa/ && python3 setup_cuda.py install +RUN cd /app/repositories/GPTQ-for-LLaMa/ && python3 setup_cuda.py install FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 AS base # Runtime pre-reqs @@ -62,6 +61,13 @@ ENV PYTHONUNBUFFERED=1 ARG BUILD_DATE ENV BUILD_DATE=$BUILD_DATE RUN echo "$BUILD_DATE" > /build_date.txt + +# Set embeddings model for llama +#ENV OPENEDAI_EMBEDDING_MODEL=/app/embeddings/SGPT-125M-weightedmean-nli-bitfit +#COPY tgwui/config/embeddings/SGPT-125M-weightedmean-nli-bitfit /app/embeddings +#RUN echo -e "Embeddings model $OPENEDAI_EMBEDDING_MODEL" +#RUN python extensions/openai/cache_embedding_model.py + # Run COPY tgwui/scripts/docker-entrypoint.sh /scripts/docker-entrypoint.sh RUN chmod +x /scripts/docker-entrypoint.sh diff --git a/tgwui/config/place-your-models-here.txt b/tgwui/config/place-your-models-here.txt deleted file mode 100644 index e69de29bb..000000000 diff --git a/tgwui/tgwui_requirements.txt b/tgwui/tgwui_requirements.txt deleted file mode 100644 index 72cac5df1..000000000 --- a/tgwui/tgwui_requirements.txt +++ /dev/null @@ -1,25 +0,0 @@ -colorama -datasets -einops -flexgen==0.1.7 -gradio_client==0.2.5 -gradio==3.33.1 -markdown -numpy -pandas -Pillow>=9.5.0 -pyyaml -requests -safetensors==0.3.1 -sentencepiece -tqdm -scipy -git+https://github.com/huggingface/peft@3714aa2fff158fdfa637b2b65952580801d890b2 -git+https://github.com/huggingface/transformers@e45e756d22206ca8fa9fb057c8c3d8fa79bf81c6 -git+https://github.com/huggingface/accelerate@0226f750257b3bf2cadc4f189f9eef0c764a0467 -bitsandbytes==0.39.0; platform_system != "Windows" -https://github.com/jllllll/bitsandbytes-windows-webui/raw/main/bitsandbytes-0.39.0-py3-none-any.whl; platform_system == "Windows" -llama-cpp-python==0.1.59; platform_system != "Windows" -https://github.com/abetlen/llama-cpp-python/releases/download/v0.1.57/llama_cpp_python-0.1.57-cp310-cp310-win_amd64.whl; platform_system == "Windows" -https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.2.0/auto_gptq-0.2.0+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows" -https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.2.0/auto_gptq-0.2.0+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux"