diff --git a/.github/workflows/build_image.yml b/.github/workflows/build_image.yml index 2a8e898..ae98b5c 100644 --- a/.github/workflows/build_image.yml +++ b/.github/workflows/build_image.yml @@ -3,8 +3,8 @@ name: Daily Image Building Script Execution on: workflow_dispatch: schedule: - # Runs at 1:00 AM every day - - cron: '0 1 * * *' + # Runs daily at 1:00 AM, Beijing time. + - cron: '0 17 * * *' # This is UTC time jobs: run-shell-script: diff --git a/docker/Dockerfile.base b/docker/Dockerfile.base index 96eade2..607c6d2 100644 --- a/docker/Dockerfile.base +++ b/docker/Dockerfile.base @@ -67,6 +67,9 @@ RUN env ${PROXY} curl -L -O "http://github.com/bazelbuild/bazel/releases/downloa && ./"$bazel_file" --user \ && rm -f "$bazel_file" +# install bazel 6.1.0 for BladeDISC +RUN cd "/root/.bazel/bin" && env ${PROXY} curl -fLO https://releases.bazel.build/6.1.0/release/bazel-6.1.0-linux-x86_64 && chmod +x bazel-6.1.0-linux-x86_64 + ENV PATH "/root/bin:$PATH" ENV TERM xterm diff --git a/docker/Dockerfile.release b/docker/Dockerfile.release index 8c8ce73..3b93d26 100644 --- a/docker/Dockerfile.release +++ b/docker/Dockerfile.release @@ -9,6 +9,7 @@ ENV PROXY=${use_proxy:+'https_proxy=http://127.0.0.1:7890 http_proxy=http://127. COPY ./whls/* ${work_dir}/ RUN cd ${work_dir} \ - && env ${PROXY} pip install *.whl transformers==4.33.0 datasets pillow SentencePiece accelerate transformers_stream_generator tiktoken peft bitsandbytes scipy \ + && env ${PROXY} pip install *.whl \ + && env ${PROXY} pip install transformers==4.33.0 datasets pillow SentencePiece accelerate transformers_stream_generator tiktoken peft bitsandbytes scipy \ && env ${PROXY} pip install torchvision==0.18.0 --no-deps \ && rm -rf ${work_dir}/* /root/.cache/pip diff --git a/docker/build_whls.sh b/docker/build_whls.sh index 8f1ef2e..eaaccd6 100755 --- a/docker/build_whls.sh +++ b/docker/build_whls.sh @@ -34,7 +34,6 @@ function build_pytorch { function build_torch_xla { pushd pytorch/xla - python setup.py clean env ${proxy} TF_CUDA_COMPUTE_CAPABILITIES="${cuda_compute_capabilities//\ /,}" \ TORCH_CUDA_ARCH_LIST="${cuda_compute_capabilities}" \ BUILD_CPP_TESTS=0 \ @@ -50,7 +49,9 @@ function build_torch_xla { python setup.py ${build_mode} if [ "${build_mode}" = "bdist_wheel" ]; then cp dist/*.whl ${work_dir}/whls/ - cp third_party/flash-attention/dist/*.whl ${work_dir}/whls/ + # TODO: Directly installing FlashAttention 2.5.6 via `pip install` can cause an torch related undefined symbol error. + # Install FlashAttention in the Dockerfile.release after torchacc adapts FA to version 2.5.8. + wget https://odps-release.oss-cn-zhangjiakou.aliyuncs.com/torchacc/whls/flash_attn-2.5.6-cp310-cp310-linux_x86_64.whl -P ${work_dir}/whls/ fi popd }