diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml
index fd7b1c0a..a8f15e15 100644
--- a/.github/workflows/build_wheel.yml
+++ b/.github/workflows/build_wheel.yml
@@ -15,7 +15,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python: ["3.9", "3.10", "3.11"]
+        python: ["3.8", "3.9", "3.10", "3.11"]
         cuda: ["11.8", "12.1"]
         torch: ["2.1", "2.2", "2.3"]
     runs-on: [self-hosted, linux, release]
diff --git a/.github/workflows/publish_manylinux_image.yml b/.github/workflows/publish_manylinux_image.yml
index 30a99c9a..a4771175 100644
--- a/.github/workflows/publish_manylinux_image.yml
+++ b/.github/workflows/publish_manylinux_image.yml
@@ -44,7 +44,7 @@ jobs:
         uses: docker/build-push-action@v5
         with:
           context: ./docker
-          file: ./docker/Dockerfile.manylinux_gcc11
+          file: ./docker/Dockerfile.manylinux
           push: true
           cache-from: type=local,src=$CI_CACHE_DIR/.buildx-cache
           cache-to: type=local,dest=$CI_CACHE_DIR/.buildx-cache
diff --git a/.github/workflows/publish_wheel.yml b/.github/workflows/publish_wheel.yml
index 938239a9..7806c959 100644
--- a/.github/workflows/publish_wheel.yml
+++ b/.github/workflows/publish_wheel.yml
@@ -48,7 +48,7 @@ jobs:
       - name: rename wheel to manylinux
         run: |
           for whl in python/dist/scalellm-*.whl; do
-            new_whl=${whl//"-linux_"/"-manylinux_2_28_"}
+            new_whl=${whl//"-linux_"/"-manylinux1_"}
             if [ "$whl" != "$new_whl" ]; then
               mv $whl $new_whl
             fi
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 607b84ca..95a0db82 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -238,7 +238,7 @@ message(STATUS "TORCH_CXX_FLAGS: ${TORCH_CXX_FLAGS}")
 add_compile_options(${TORCH_CXX_FLAGS})
 add_compile_definitions(TORCH_CUDA=1)
 
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DC10_USE_GLOG -flto=auto")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DC10_USE_GLOG")
 
 message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")
 message(STATUS "CMAKE_CXX_FLAGS_DEBUG: ${CMAKE_CXX_FLAGS_DEBUG}")
diff --git a/docker/Dockerfile.manylinux b/docker/Dockerfile.manylinux
index 13fd4897..d4d90cf4 100644
--- a/docker/Dockerfile.manylinux
+++ b/docker/Dockerfile.manylinux
@@ -1,4 +1,5 @@
-FROM quay.io/pypa/manylinux_2_28_x86_64 as base
+ARG CUDA_VERSION=12.1
+FROM pytorch/manylinux-builder:cuda${CUDA_VERSION} as base
 
 LABEL maintainer="mi@vectorch.com"
 ENV DEBIAN_FRONTEND noninteractive
@@ -7,36 +8,24 @@ ENV LC_ALL en_US.UTF-8
 ENV LANG en_US.UTF-8
 ENV LANGUAGE en_US.UTF-8
 
-# Install common dependencies
-COPY ./common/install_base.sh install_base.sh
-RUN bash ./install_base.sh && rm install_base.sh
-
-# Install user
-COPY ./common/install_user.sh install_user.sh
-RUN bash ./install_user.sh && rm install_user.sh
-
-# Install cuda, cudnn and nccl
-ARG CUDA_VERSION=12.1
-COPY ./common/install_cuda.sh install_cuda.sh
-RUN bash ./install_cuda.sh ${CUDA_VERSION} && rm install_cuda.sh
-ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:$PATH
-
-# ARG CMAKE_VERSION=3.29.3
-# COPY ./common/install_cmake.sh install_cmake.sh
-# RUN if [ -n "${CMAKE_VERSION}" ]; then bash ./install_cmake.sh; fi
-# RUN rm install_cmake.sh
-
-ARG NINJA_VERSION=1.11.1
-COPY ./common/install_ninja.sh install_ninja.sh
-RUN if [ -n "${NINJA_VERSION}" ]; then bash ./install_ninja.sh; fi
-RUN rm install_ninja.sh
-
+# Install dependencies for vcpkg
+RUN yum -y update && yum -y install \
+    zip \
+    unzip \
+    tar \
+    wget \
+    curl \
+    perl \
+    perl-IPC-Cmd \
+    sudo 
+
+# Install ccache
 ARG CCACHE_VERSION=4.8.3
 COPY ./common/install_ccache.sh install_ccache.sh
 RUN if [ -n "${CCACHE_VERSION}" ]; then bash ./install_ccache.sh; fi
 RUN rm install_ccache.sh
 
-# install rust
+# Install rust
 ENV RUSTUP_HOME=/usr/local/rustup
 ENV CARGO_HOME=/usr/local/cargo
 ENV PATH=/usr/local/cargo/bin:$PATH
diff --git a/docker/Dockerfile.manylinux_gcc11 b/docker/Dockerfile.manylinux_2_28
similarity index 93%
rename from docker/Dockerfile.manylinux_gcc11
rename to docker/Dockerfile.manylinux_2_28
index 43f24bd4..6464816d 100644
--- a/docker/Dockerfile.manylinux_gcc11
+++ b/docker/Dockerfile.manylinux_2_28
@@ -11,10 +11,6 @@ ENV LANGUAGE en_US.UTF-8
 COPY ./common/install_base.sh install_base.sh
 RUN bash ./install_base.sh && rm install_base.sh
 
-# Install user
-COPY ./common/install_user.sh install_user.sh
-RUN bash ./install_user.sh && rm install_user.sh
-
 # Install gcc-11
 RUN rm -rf /opt/rh/gcc-toolset-12
 RUN yum install -y gcc-toolset-11-toolchain
diff --git a/python/scalellm/__init__.py b/python/scalellm/__init__.py
index 28ef0c27..0cf72a3f 100644
--- a/python/scalellm/__init__.py
+++ b/python/scalellm/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "0.1.0"
+__version__ = "0.1.1"
 try:
     # torch needs to be imported first, otherwise it will segfault upon import.
     import torch  # noqa: F401
diff --git a/scripts/build_wheel.sh b/scripts/build_wheel.sh
index 2f308fea..31acf59f 100755
--- a/scripts/build_wheel.sh
+++ b/scripts/build_wheel.sh
@@ -34,6 +34,11 @@ echo "::group::Install PyTorch"
 pip install torch==$TORCH_VERSION --index-url "https://download.pytorch.org/whl/cu${CUDA_MAJOR}${CUDA_MINOR}"
 echo "::endgroup::"
 
+echo "::group::Install other dependencies"
+pip install numpy
+pip install --upgrade setuptools wheel
+echo "::endgroup::"
+
 
 echo "::group::Build wheel for ScaleLLM"
 cd "$PROJECT_ROOT/python"
@@ -53,6 +58,6 @@ fi
 # pip install auditwheel
 # cd "$PROJECT_ROOT/python"
 # for whl in dist/*.whl; do
-#     auditwheel repair "$whl" --plat manylinux_2_28_x86_64 -w dist/
+#     auditwheel repair "$whl" --plat manylinux1_x86_64 -w dist/
 # done
 # echo "::endgroup::"
\ No newline at end of file