diff --git a/docker-compose.yaml b/docker-compose.yaml
index 0f714f34f..124afbc9b 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -8,7 +8,6 @@ services:
     ports:
       - "8001:8001"
     depends_on:
-      - super__tgwui
       - super__redis
       - super__postgres
     networks:
@@ -21,7 +20,6 @@ services:
       context: .
       dockerfile: DockerfileCelery
     depends_on:
-      - super__tgwui
       - super__redis
       - super__postgres
     networks:
@@ -40,47 +38,6 @@ services:
       - /app/node_modules
       - /app/.next
 
-  super__tgwui:
-    build:
-      context: .
-      #target: llama-cublas  # Specify the variant to build
-      dockerfile: DockerfileTGWUI
-#      args:
-#        - LCL_SRC_DIR=text-generation-webui  # Developers - see Dockerfile app_base
-    container_name: text-generation-webui
-    environment:
-      - EXTRA_LAUNCH_ARGS="--chat --listen --verbose --extensions openai --threads 4"
-#      - EXTRA_LAUNCH_ARGS="--listen --no-mmap --verbose --extensions openai --auto-devices --gpu-memory 20 20 --n-gpu-layers 100 --threads 8  --model vicuna-13b-cot.ggmlv3.q8_0.bin" # GPU Custom launch args (e.g., --model MODEL_NAME)
-    ports:
-      - 7860:7860  # Default web port
-      - 5000:5000  # Default API port
-      - 5005:5005  # Default streaming port
-      - 5001:5001  # Default OpenAI API extension port
-    volumes:
-      - ./tgwui/config/loras:/app/loras
-      - ./tgwui/config/models:/app/models
-      - ./tgwui/config/presets:/app/presets
-      - ./tgwui/config/prompts:/app/prompts
-      - ./tgwui/config/softprompts:/app/softprompts
-      - ./tgwui/config/training:/app/training
-    logging:
-      driver:  json-file
-      options:
-        max-file: "3"   # number of files or file count
-        max-size: '10m'
-    networks:
-      - super_network
-###  Uncomment the following lines to run the container using the host machine's GPU resources
-#    deploy:
-#        resources:
-#          reservations:
-#            devices:
-#              - driver: nvidia
-#                count: all
-##                device_ids: ['0', '1'] # must comment the above line if this line is uncommented.
-#                capabilities: [gpu]
-
-
   super__redis:
     image: "docker.io/library/redis:latest"
     networks:
diff --git a/docker-compose.yaml.bak b/local-llm
similarity index 51%
rename from docker-compose.yaml.bak
rename to local-llm
index a62a9da09..c30f23307 100644
--- a/docker-compose.yaml.bak
+++ b/local-llm
@@ -8,6 +8,7 @@ services:
     ports:
       - "8001:8001"
     depends_on:
+      - super__tgwui
       - super__redis
       - super__postgres
     networks:
@@ -20,11 +21,12 @@ services:
       context: .
       dockerfile: DockerfileCelery
     depends_on:
+      - super__tgwui
       - super__redis
       - super__postgres
     networks:
       - super_network
-
+    
   gui:
     build: ./gui
     ports:
@@ -38,13 +40,40 @@ services:
       - /app/node_modules
       - /app/.next
 
+  super__tgwui:
+    build:
+      context: .
+      dockerfile: ./tgwui/DockerfileTGWUI
+    container_name: super__tgwui
+    environment:
+      - EXTRA_LAUNCH_ARGS="--listen --verbose --extensions openai --threads 4 --n_ctx 1600"
+    ports:
+      - 7860:7860  # Default web port
+      - 5000:5000  # Default API port
+      - 5005:5005  # Default streaming port
+      - 5001:5001  # Default OpenAI API extension port
+    volumes:
+      - ./tgwui/config/loras:/app/loras
+      - ./tgwui/config/models:/app/models
+      - ./tgwui/config/presets:/app/presets
+      - ./tgwui/config/prompts:/app/prompts
+      - ./tgwui/config/softprompts:/app/softprompts
+      - ./tgwui/config/training:/app/training
+    logging:
+      driver:  json-file
+      options:
+        max-file: "3"   # number of files or file count
+        max-size: '10m'
+    networks:
+      - super_network
+
   super__redis:
-    image: "redis:latest"
+    image: "docker.io/library/redis:latest"
     networks:
       - super_network
 
   super__postgres:
-    image: "postgres:latest"
+    image: "docker.io/library/postgres:latest"
     environment:
       - POSTGRES_USER=superagi
       - POSTGRES_PASSWORD=password
@@ -59,6 +88,6 @@ services:
 networks:
   super_network:
     driver: bridge
-    
+
 volumes:
   superagi_postgres_data:
diff --git a/local-llm-gpu b/local-llm-gpu
new file mode 100644
index 000000000..dceffd3d8
--- /dev/null
+++ b/local-llm-gpu
@@ -0,0 +1,107 @@
+version: '3.8'
+
+services:
+  backend:
+    volumes:
+      - "./:/app"
+    build: .
+    ports:
+      - "8001:8001"
+    depends_on:
+      - super__tgwui
+      - super__redis
+      - super__postgres
+    networks:
+      - super_network
+
+  celery:
+    volumes:
+      - "./:/app"
+    build:
+      context: .
+      dockerfile: DockerfileCelery
+    depends_on:
+      - super__tgwui
+      - super__redis
+      - super__postgres
+    networks:
+      - super_network
+    
+  gui:
+    build: ./gui
+    ports:
+      - "3000:3000"
+    environment:
+      - NEXT_PUBLIC_API_BASE_URL=http://localhost:8001
+    networks:
+      - super_network
+    volumes:
+      - ./gui:/app
+      - /app/node_modules
+      - /app/.next
+
+  super__tgwui:
+    build:
+      context: .
+      target: llama-cublas
+      dockerfile: ./tgwui/DockerfileTGWUI
+#      args:
+#        - LCL_SRC_DIR=text-generation-webui  # Developers - see Dockerfile app_base
+    container_name: super__tgwui
+    environment:
+      - EXTRA_LAUNCH_ARGS="--listen --no-mmap --verbose --extensions openai --auto-devices --n_ctx 1600 --gpu-memory 20 20 --n-gpu-layers 128 --threads 8  --model vicuna-13b-cot.ggmlv3.q8_0.bin"
+    ports:
+      - 7860:7860  # Default web port
+      - 5000:5000  # Default API port
+      - 5005:5005  # Default streaming port
+      - 5001:5001  # Default OpenAI API extension port
+    volumes:
+      - ./tgwui/config/loras:/app/loras
+      - ./tgwui/config/models:/app/models
+      - ./tgwui/config/presets:/app/presets
+      - ./tgwui/config/prompts:/app/prompts
+      - ./tgwui/config/softprompts:/app/softprompts
+      - ./tgwui/config/training:/app/training
+      - ./tgwui/config/embeddings:/app/embeddings
+    logging:
+      driver:  json-file
+      options:
+        max-file: "3"   # number of files or file count
+        max-size: '10m'
+    networks:
+      - super_network
+###  Uncomment the following lines to run the container using the host machine's GPU resources
+    deploy:
+        resources:
+          reservations:
+            devices:
+              - driver: nvidia
+#                count: "all"
+                device_ids: ['0', '1'] # must comment the above line if this line is uncommented.
+                capabilities: [gpu]
+
+
+  super__redis:
+    image: "docker.io/library/redis:latest"
+    networks:
+      - super_network
+
+  super__postgres:
+    image: "docker.io/library/postgres:latest"
+    environment:
+      - POSTGRES_USER=superagi
+      - POSTGRES_PASSWORD=password
+      - POSTGRES_DB=super_agi_main
+    volumes:
+      - superagi_postgres_data:/var/lib/postgresql/data/
+    networks:
+      - super_network
+    ports:
+      - "5432:5432"
+
+networks:
+  super_network:
+    driver: bridge
+
+volumes:
+  superagi_postgres_data:
diff --git a/superagi/helper/json_cleaner.py b/superagi/helper/json_cleaner.py
index f5b43119f..58ff5c6b2 100644
--- a/superagi/helper/json_cleaner.py
+++ b/superagi/helper/json_cleaner.py
@@ -51,7 +51,7 @@ def extract_json_section(cls, input_str: str = ""):
 
     @classmethod
     def remove_escape_sequences(cls, string):
-        return string.encode('utf-8').decode('unicode_escape').encode('raw_unicode_escape').decode('utf-8')
+        return string.encode('utf-8').decode('unicode_escape').encode('raw_unicode_escape')
 
     @classmethod
     def add_quotes_to_property_names(cls, json_string: str) -> str:
diff --git a/DockerfileTGWUI b/tgwui/DockerfileTGWUI
similarity index 89%
rename from DockerfileTGWUI
rename to tgwui/DockerfileTGWUI
index dbfd5c98b..01cb26779 100644
--- a/DockerfileTGWUI
+++ b/tgwui/DockerfileTGWUI
@@ -16,11 +16,10 @@ FROM env_base AS app_base
 ### DEVELOPERS/ADVANCED USERS ###
 # Clone oobabooga/text-generation-webui
 RUN git clone https://github.com/oobabooga/text-generation-webui /src
-# This is required to get multi-gpu support until the main branch updates the requirements.txt file to include llama-cpp-python 0.1.59 or greater.
-COPY tgwui/tgwui_requirements.txt /src/requirements.txt
 # To use local source: comment out the git clone command then set the build arg `LCL_SRC_DIR`
 #ARG LCL_SRC_DIR="text-generation-webui"
 #COPY ${LCL_SRC_DIR} /src
+# This is required to get multi-gpu support until the main branch updates the requirements.txt file to include llama-cpp-python 0.1.59 or greater.
 
 #################################
 # Copy source to app
@@ -28,15 +27,15 @@ RUN cp -ar /src /app
 # Install oobabooga/text-generation-webui
 RUN --mount=type=cache,target=/root/.cache/pip pip3 install -r /app/requirements.txt
 # Install extensions
-COPY tgwui/scripts/build_extensions.sh /scripts/build_extensions.sh
+COPY tgwui/scripts/build_extensions.sh /app/scripts/build_extensions.sh
 RUN --mount=type=cache,target=/root/.cache/pip \
-    chmod +x /scripts/build_extensions.sh && . /scripts/build_extensions.sh \
+    chmod +x /app/scripts/build_extensions.sh && . /app/scripts/build_extensions.sh
 
 ## Clone default GPTQ
 RUN git clone https://github.com/oobabooga/GPTQ-for-LLaMa.git -b cuda /app/repositories/GPTQ-for-LLaMa
 ## Build and install default GPTQ ('quant_cuda')
 ARG TORCH_CUDA_ARCH_LIST="6.1;7.0;7.5;8.0;8.6+PTX"
-#RUN cd /app/repositories/GPTQ-for-LLaMa/ && python3 setup_cuda.py install
+RUN cd /app/repositories/GPTQ-for-LLaMa/ && python3 setup_cuda.py install
 
 FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 AS base
 # Runtime pre-reqs
@@ -62,6 +61,13 @@ ENV PYTHONUNBUFFERED=1
 ARG BUILD_DATE
 ENV BUILD_DATE=$BUILD_DATE
 RUN echo "$BUILD_DATE" > /build_date.txt
+
+# Set embeddings model for llama
+#ENV OPENEDAI_EMBEDDING_MODEL=/app/embeddings/SGPT-125M-weightedmean-nli-bitfit
+#COPY tgwui/config/embeddings/SGPT-125M-weightedmean-nli-bitfit /app/embeddings
+#RUN echo -e "Embeddings model $OPENEDAI_EMBEDDING_MODEL"
+#RUN python extensions/openai/cache_embedding_model.py
+
 # Run
 COPY tgwui/scripts/docker-entrypoint.sh /scripts/docker-entrypoint.sh
 RUN chmod +x /scripts/docker-entrypoint.sh
diff --git a/tgwui/config/place-your-models-here.txt b/tgwui/config/place-your-models-here.txt
deleted file mode 100644
index e69de29bb..000000000
diff --git a/tgwui/tgwui_requirements.txt b/tgwui/tgwui_requirements.txt
deleted file mode 100644
index 72cac5df1..000000000
--- a/tgwui/tgwui_requirements.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-colorama
-datasets
-einops
-flexgen==0.1.7
-gradio_client==0.2.5
-gradio==3.33.1
-markdown
-numpy
-pandas
-Pillow>=9.5.0
-pyyaml
-requests
-safetensors==0.3.1
-sentencepiece
-tqdm
-scipy
-git+https://github.com/huggingface/peft@3714aa2fff158fdfa637b2b65952580801d890b2
-git+https://github.com/huggingface/transformers@e45e756d22206ca8fa9fb057c8c3d8fa79bf81c6
-git+https://github.com/huggingface/accelerate@0226f750257b3bf2cadc4f189f9eef0c764a0467
-bitsandbytes==0.39.0; platform_system != "Windows"
-https://github.com/jllllll/bitsandbytes-windows-webui/raw/main/bitsandbytes-0.39.0-py3-none-any.whl; platform_system == "Windows"
-llama-cpp-python==0.1.59; platform_system != "Windows"
-https://github.com/abetlen/llama-cpp-python/releases/download/v0.1.57/llama_cpp_python-0.1.57-cp310-cp310-win_amd64.whl; platform_system == "Windows"
-https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.2.0/auto_gptq-0.2.0+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
-https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.2.0/auto_gptq-0.2.0+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux"