From 8bcaa3c8a47e5d517d5e91aedbfed685363d778b Mon Sep 17 00:00:00 2001 From: "wangang.wa" Date: Thu, 17 Oct 2024 10:32:08 +0800 Subject: [PATCH 1/6] Install Bazel 6.1.0 for BladeDISC in Dockerfile --- docker/Dockerfile.base | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docker/Dockerfile.base b/docker/Dockerfile.base index 96eade2..607c6d2 100644 --- a/docker/Dockerfile.base +++ b/docker/Dockerfile.base @@ -67,6 +67,9 @@ RUN env ${PROXY} curl -L -O "http://github.com/bazelbuild/bazel/releases/downloa && ./"$bazel_file" --user \ && rm -f "$bazel_file" +# install bazel 6.1.0 for BladeDISC +RUN cd "/root/.bazel/bin" && env ${PROXY} curl -fLO https://releases.bazel.build/6.1.0/release/bazel-6.1.0-linux-x86_64 && chmod +x bazel-6.1.0-linux-x86_64 + ENV PATH "/root/bin:$PATH" ENV TERM xterm From 00343f403070b6e0ae86f27d4831896e180c10d6 Mon Sep 17 00:00:00 2001 From: "wangang.wa" Date: Fri, 18 Oct 2024 14:17:10 +0800 Subject: [PATCH 2/6] update image building time --- .github/workflows/build_image.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_image.yml b/.github/workflows/build_image.yml index 2a8e898..4532839 100644 --- a/.github/workflows/build_image.yml +++ b/.github/workflows/build_image.yml @@ -3,8 +3,8 @@ name: Daily Image Building Script Execution on: workflow_dispatch: schedule: - # Runs at 1:00 AM every day - - cron: '0 1 * * *' + # Runs daily at 6:00 AM, Beijing time. + - cron: '0 22 * * *' # This is UTC time jobs: run-shell-script: From dc81936bb8e0daea6084c46adb85ac61524b5c5a Mon Sep 17 00:00:00 2001 From: "wangang.wa" Date: Tue, 22 Oct 2024 18:51:46 +0800 Subject: [PATCH 3/6] optimize --- docker/Dockerfile.release | 3 ++- docker/build_whls.sh | 5 ++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docker/Dockerfile.release b/docker/Dockerfile.release index 8c8ce73..636ad0f 100644 --- a/docker/Dockerfile.release +++ b/docker/Dockerfile.release @@ -9,6 +9,7 @@ ENV PROXY=${use_proxy:+'https_proxy=http://127.0.0.1:7890 http_proxy=http://127. COPY ./whls/* ${work_dir}/ RUN cd ${work_dir} \ - && env ${PROXY} pip install *.whl transformers==4.33.0 datasets pillow SentencePiece accelerate transformers_stream_generator tiktoken peft bitsandbytes scipy \ + && env ${PROXY} pip install *.whl \ + && env ${PROXY} pip install transformers==4.33.0 flash-attn==2.5.6 datasets pillow SentencePiece accelerate transformers_stream_generator tiktoken peft bitsandbytes scipy \ && env ${PROXY} pip install torchvision==0.18.0 --no-deps \ && rm -rf ${work_dir}/* /root/.cache/pip diff --git a/docker/build_whls.sh b/docker/build_whls.sh index 8f1ef2e..c37e0b5 100755 --- a/docker/build_whls.sh +++ b/docker/build_whls.sh @@ -34,8 +34,8 @@ function build_pytorch { function build_torch_xla { pushd pytorch/xla - python setup.py clean - env ${proxy} TF_CUDA_COMPUTE_CAPABILITIES="${cuda_compute_capabilities//\ /,}" \ + # python setup.py clean + env ${proxy} ENABLE_DISC=1 TF_CUDA_COMPUTE_CAPABILITIES="${cuda_compute_capabilities//\ /,}" \ TORCH_CUDA_ARCH_LIST="${cuda_compute_capabilities}" \ BUILD_CPP_TESTS=0 \ TF_NEED_CUDA=1 \ @@ -50,7 +50,6 @@ function build_torch_xla { python setup.py ${build_mode} if [ "${build_mode}" = "bdist_wheel" ]; then cp dist/*.whl ${work_dir}/whls/ - cp third_party/flash-attention/dist/*.whl ${work_dir}/whls/ fi popd } From c1b30deb036301bdb994ed3d5ddd4c5f7d30c922 Mon Sep 17 00:00:00 2001 From: "wangang.wa" Date: Wed, 23 Oct 2024 10:20:09 +0800 Subject: [PATCH 4/6] Update fa version --- docker/Dockerfile.release | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile.release b/docker/Dockerfile.release index 636ad0f..cf7b1d0 100644 --- a/docker/Dockerfile.release +++ b/docker/Dockerfile.release @@ -10,6 +10,6 @@ ENV PROXY=${use_proxy:+'https_proxy=http://127.0.0.1:7890 http_proxy=http://127. COPY ./whls/* ${work_dir}/ RUN cd ${work_dir} \ && env ${PROXY} pip install *.whl \ - && env ${PROXY} pip install transformers==4.33.0 flash-attn==2.5.6 datasets pillow SentencePiece accelerate transformers_stream_generator tiktoken peft bitsandbytes scipy \ + && env ${PROXY} pip install transformers==4.33.0 flash-attn==2.5.8 datasets pillow SentencePiece accelerate transformers_stream_generator tiktoken peft bitsandbytes scipy \ && env ${PROXY} pip install torchvision==0.18.0 --no-deps \ && rm -rf ${work_dir}/* /root/.cache/pip From b442e9066dac22d0e07aa168b620317682f5c475 Mon Sep 17 00:00:00 2001 From: "wangang.wa" Date: Wed, 23 Oct 2024 13:44:04 +0800 Subject: [PATCH 5/6] update fa --- docker/Dockerfile.release | 2 +- docker/build_whls.sh | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/docker/Dockerfile.release b/docker/Dockerfile.release index cf7b1d0..3b93d26 100644 --- a/docker/Dockerfile.release +++ b/docker/Dockerfile.release @@ -10,6 +10,6 @@ ENV PROXY=${use_proxy:+'https_proxy=http://127.0.0.1:7890 http_proxy=http://127. COPY ./whls/* ${work_dir}/ RUN cd ${work_dir} \ && env ${PROXY} pip install *.whl \ - && env ${PROXY} pip install transformers==4.33.0 flash-attn==2.5.8 datasets pillow SentencePiece accelerate transformers_stream_generator tiktoken peft bitsandbytes scipy \ + && env ${PROXY} pip install transformers==4.33.0 datasets pillow SentencePiece accelerate transformers_stream_generator tiktoken peft bitsandbytes scipy \ && env ${PROXY} pip install torchvision==0.18.0 --no-deps \ && rm -rf ${work_dir}/* /root/.cache/pip diff --git a/docker/build_whls.sh b/docker/build_whls.sh index c37e0b5..eaaccd6 100755 --- a/docker/build_whls.sh +++ b/docker/build_whls.sh @@ -34,8 +34,7 @@ function build_pytorch { function build_torch_xla { pushd pytorch/xla - # python setup.py clean - env ${proxy} ENABLE_DISC=1 TF_CUDA_COMPUTE_CAPABILITIES="${cuda_compute_capabilities//\ /,}" \ + env ${proxy} TF_CUDA_COMPUTE_CAPABILITIES="${cuda_compute_capabilities//\ /,}" \ TORCH_CUDA_ARCH_LIST="${cuda_compute_capabilities}" \ BUILD_CPP_TESTS=0 \ TF_NEED_CUDA=1 \ @@ -50,6 +49,9 @@ function build_torch_xla { python setup.py ${build_mode} if [ "${build_mode}" = "bdist_wheel" ]; then cp dist/*.whl ${work_dir}/whls/ + # TODO: Directly installing FlashAttention 2.5.6 via `pip install` can cause an torch related undefined symbol error. + # Install FlashAttention in the Dockerfile.release after torchacc adapts FA to version 2.5.8. + wget https://odps-release.oss-cn-zhangjiakou.aliyuncs.com/torchacc/whls/flash_attn-2.5.6-cp310-cp310-linux_x86_64.whl -P ${work_dir}/whls/ fi popd } From 6a98c47cb3fe519364fcdd648f76900036f48309 Mon Sep 17 00:00:00 2001 From: "wangang.wa" Date: Wed, 23 Oct 2024 13:50:03 +0800 Subject: [PATCH 6/6] update building time --- .github/workflows/build_image.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_image.yml b/.github/workflows/build_image.yml index 4532839..ae98b5c 100644 --- a/.github/workflows/build_image.yml +++ b/.github/workflows/build_image.yml @@ -3,8 +3,8 @@ name: Daily Image Building Script Execution on: workflow_dispatch: schedule: - # Runs daily at 6:00 AM, Beijing time. - - cron: '0 22 * * *' # This is UTC time + # Runs daily at 1:00 AM, Beijing time. + - cron: '0 17 * * *' # This is UTC time jobs: run-shell-script: