Skip to content

Commit

Permalink
Merge branch 'master' into xpu
Browse files Browse the repository at this point in the history
  • Loading branch information
ShawnXuan authored Dec 31, 2024
2 parents f359e8c + 9358ac7 commit 2bac1d5
Show file tree
Hide file tree
Showing 50 changed files with 154 additions and 451 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/canary.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ jobs:
ONEFLOW_SRC: .
MANYLINUX_CACHE_DIR: ~/manylinux-cache-dir/canary-cu112
WHEELHOUSE_DIR: manylinux-wheelhouse
COMPUTE_PLATFORM: cu117
COMPUTE_PLATFORM: cu118
OSS_BUCKET: oneflow-staging
OSS_WHEEL_HOUSE_DIR: ${{ matrix.entry }}/commit/${{ github.sha }}
OSS_GITHUB_REF_DIR: ${{ matrix.entry }}/${{ github.ref }}
Expand All @@ -54,7 +54,7 @@ jobs:
- name: Checkout Oneflow-Inc/oneflow
if: ${{ github.event.inputs.oneflow-ref == '' }}
uses: actions/checkout@v2
- uses: Oneflow-Inc/get-oneflow@whl-skip-nccl
- uses: Oneflow-Inc/get-oneflow@ci-test-with-cu118
name: Build manylinux
id: build-cuda
with:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/community_release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ on:
schedule:
# beijing: 6 pm.
# utc: 10 am.
- cron: "0 10 * * *"
- cron: "0 10 * * sat"
workflow_dispatch:
inputs:
priv_branch:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/on_merge.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,6 @@ jobs:
if: github.event.pull_request.merged == true
runs-on: ubuntu-latest
steps:
- uses: Oneflow-Inc/get-oneflow/update-benchmark-history@whl-skip-nccl
- uses: Oneflow-Inc/get-oneflow/update-benchmark-history@ci-test-with-cu118
name: Update benchmark history
timeout-minutes: 10
2 changes: 1 addition & 1 deletion .github/workflows/priv_release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ on:
schedule:
# beijing: 12 pm.
# utc: 4 am.
- cron: "0 4 * * *"
- cron: "0 4 * * sun"
workflow_dispatch:
inputs:
priv_branch:
Expand Down
11 changes: 7 additions & 4 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ jobs:
ref: ${{ inputs.branch }}
repository: ${{ secrets.ONEFLOW_PRIV_ORG }}/oneflow
token: ${{ secrets.ONEFLOW_PRIV_GH_TOKEN }}
- uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@whl-skip-nccl
- uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@ci-test-with-cu118
name: Find build cache
id: find-cache
timeout-minutes: 5
Expand Down Expand Up @@ -149,7 +149,7 @@ jobs:
if: ${{ inputs.is_priv }}
run: |
env
- uses: Oneflow-Inc/get-oneflow@whl-skip-nccl
- uses: Oneflow-Inc/get-oneflow@ci-test-with-cu118
name: Build ${{ matrix.entry }}
if: ${{ matrix.entry =='cu118' || startsWith(matrix.entry, 'cu12') }}
with:
Expand All @@ -170,11 +170,12 @@ jobs:
nightly-date: ${{ needs.generate-build-matrix.outputs.formatted_date }}
use-nvidia-wheels: ${{ matrix.entry !='cu112' }}
python-versions: |
3.12
3.11
3.10
3.9
3.8
- uses: Oneflow-Inc/get-oneflow@whl-skip-nccl
- uses: Oneflow-Inc/get-oneflow@ci-test-with-cu118
name: Build ${{ matrix.entry }}
if: ${{ startsWith(matrix.entry, 'cu') && matrix.entry !='cu118' && !startsWith(matrix.entry, 'cu12') }}
with:
Expand All @@ -195,11 +196,12 @@ jobs:
nightly-date: ${{ needs.generate-build-matrix.outputs.formatted_date }}
use-nvidia-wheels: ${{ matrix.entry !='cu112' }}
python-versions: |
3.12
3.11
3.10
3.9
3.8
- uses: Oneflow-Inc/get-oneflow@whl-skip-nccl
- uses: Oneflow-Inc/get-oneflow@ci-test-with-cu118
name: Build ${{ matrix.entry }}
if: ${{ matrix.entry =='cpu' }}
with:
Expand All @@ -219,6 +221,7 @@ jobs:
nightly: ${{ inputs.is_priv || github.event_name == 'schedule' || github.ref == 'refs/heads/release/add_nightly_date_index'}}
nightly-date: ${{ needs.generate-build-matrix.outputs.formatted_date }}
python-versions: |
3.12
3.11
3.10
3.9
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/simple.yml
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ jobs:
fi
- name: Upload logs
if: always() && contains(github.event.pull_request.labels.*.name, 'need-simple-ci-upload-artifact')
uses: actions/upload-artifact@v2
uses: actions/upload-artifact@v3
with:
name: logs-${{ matrix.test_suite }}-${{ matrix.cmake_generator }}-${{ matrix.cmake_build_type }}-shared-${{ matrix.build_shared_libs }}
path: |
Expand All @@ -244,7 +244,7 @@ jobs:
repository: Oneflow-Inc/conda-env
ref: 30a7f00eb48ee9009d85a848e720823e5054c66b
path: conda-env
- uses: Oneflow-Inc/get-oneflow@whl-skip-nccl
- uses: Oneflow-Inc/get-oneflow@ci-test-with-cu118
name: Build with gcc7
if: ${{ matrix.build-type == 'gcc7'}}
with:
Expand All @@ -253,7 +253,7 @@ jobs:
oneflow-build-env: conda
conda-env-file: conda-env/dev/gcc7/environment-v2.yml
conda-env-name: oneflow-dev-gcc7-v2
- uses: Oneflow-Inc/get-oneflow@whl-skip-nccl
- uses: Oneflow-Inc/get-oneflow@ci-test-with-cu118
name: Build with clang10
if: ${{ matrix.build-type == 'clang10'}}
with:
Expand Down
52 changes: 26 additions & 26 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ jobs:
then
exit 0
fi
python3 -m pip install -U pip setuptools wheel
python3 -m pip install -U pip "setuptools<=68.2.2" wheel
python3 -m pip install 'cryptography<2.2' oss2
python3 tools/package_mirror.py -i $PWD
Expand Down Expand Up @@ -139,7 +139,7 @@ jobs:
git push
- name: Upload patch
if: ${{ failure() && steps.git_push.outcome == 'failure' }}
uses: actions/upload-artifact@v2
uses: actions/upload-artifact@v3
with:
name: license_and_format-${{ github.sha }}.patch
path: license_and_format.patch
Expand Down Expand Up @@ -176,7 +176,7 @@ jobs:
with:
ref: ${{ github.event.pull_request.head.sha }}
repository: ${{github.event.pull_request.head.repo.full_name}}
- uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@whl-skip-nccl
- uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@ci-test-with-cu118
name: find cache
id: find-cache
timeout-minutes: 5
Expand All @@ -188,7 +188,7 @@ jobs:
builder
oneflow-src: ${{ env.ONEFLOW_SRC }}
entries: |
cu116
cu118
cpu
cpu-asan-ubsan
cpu-tsan
Expand Down Expand Up @@ -223,7 +223,7 @@ jobs:
with:
ref: ${{ github.event.pull_request.head.sha }}
repository: ${{github.event.pull_request.head.repo.full_name}}
- uses: Oneflow-Inc/get-oneflow/cache-complete@whl-skip-nccl
- uses: Oneflow-Inc/get-oneflow/cache-complete@ci-test-with-cu118
name: Save cache if successful
id: save-cache
timeout-minutes: 5
Expand All @@ -237,7 +237,7 @@ jobs:
run: |
echo "::error file=test.yml,line=204,col=10::steps.save-cache.outputs.cache-hit != matrix.cache-hit"
exit 1
- uses: Oneflow-Inc/get-oneflow@whl-skip-nccl
- uses: Oneflow-Inc/get-oneflow@ci-test-with-cu118
name: Build manylinux ${{ matrix.entry }}
id: build-cpu
if: ${{ matrix.entry =='cpu' && !matrix.cache-hit }}
Expand All @@ -259,7 +259,7 @@ jobs:
python-versions: |
3.7
3.8
- uses: Oneflow-Inc/get-oneflow@whl-skip-nccl
- uses: Oneflow-Inc/get-oneflow@ci-test-with-cu118
name: Build manylinux ${{ matrix.entry }}
id: build-cpu-sanitizers
if: ${{ (matrix.entry == 'cpu-asan-ubsan' || matrix.entry == 'cpu-tsan') && !matrix.cache-hit && false }}
Expand All @@ -280,10 +280,10 @@ jobs:
clean-ccache: ${{ contains(github.event.pull_request.labels.*.name, 'need-clean-ccache') }}
python-versions: |
3.8
- uses: Oneflow-Inc/get-oneflow@whl-skip-nccl
- uses: Oneflow-Inc/get-oneflow@ci-test-with-cu118
name: Build manylinux ${{ matrix.entry }}
id: build-cuda
if: ${{ matrix.entry =='cu116' && !matrix.cache-hit }}
if: ${{ matrix.entry =='cu118' && !matrix.cache-hit }}
with:
cmake-init-cache: ${{ env.ONEFLOW_SRC }}/cmake/caches/ci/cuda.cmake
build-script: ${{ env.ONEFLOW_SRC }}/ci/manylinux/build-gcc9.sh
Expand All @@ -292,15 +292,15 @@ jobs:
wheelhouse-dir: ${{ env.WHEELHOUSE_DIR }}
clear-wheelhouse-dir: true
self-hosted: ${{ contains(matrix.runs-on, 'self-hosted') }}
cuda-version: "11.6"
cuda-version: "11.8"
manylinux-cache-dir: ${{ env.MANYLINUX_CACHE_DIR }}
docker-run-use-system-http-proxy: false
docker-run-use-lld: false
retry-failed-build: true
clean-ccache: ${{ contains(github.event.pull_request.labels.*.name, 'need-clean-ccache') }}
python-versions: |
3.7
- uses: Oneflow-Inc/get-oneflow@whl-skip-nccl
- uses: Oneflow-Inc/get-oneflow@ci-test-with-cu118
name: Build ${{ matrix.entry }}
if: ${{ matrix.entry == 'llvm15' && !matrix.cache-hit }}
with:
Expand Down Expand Up @@ -339,7 +339,7 @@ jobs:
})
- name: Upload packed liboneflow
if: ${{ !fromJson(matrix.cache-hit) && matrix.entry != 'llvm15' && matrix.entry != 'cpu-asan-ubsan' && matrix.entry != 'cpu-tsan' }}
uses: Oneflow-Inc/get-oneflow/digest/upload@whl-skip-nccl
uses: Oneflow-Inc/get-oneflow/digest/upload@ci-test-with-cu118
timeout-minutes: 10
with:
digest: ${{ steps.save-cache.outputs.build-digest }}
Expand All @@ -350,7 +350,7 @@ jobs:
dst-dir: cpack
- name: Upload whl
if: ${{ !fromJson(matrix.cache-hit) && matrix.entry != 'llvm15' && matrix.entry != 'cpu-asan-ubsan' && matrix.entry != 'cpu-tsan' }}
uses: Oneflow-Inc/get-oneflow/digest/upload@whl-skip-nccl
uses: Oneflow-Inc/get-oneflow/digest/upload@ci-test-with-cu118
timeout-minutes: 10
with:
digest: ${{ steps.save-cache.outputs.build-digest }}
Expand All @@ -375,7 +375,7 @@ jobs:
with:
ref: ${{ github.event.pull_request.head.sha }}
repository: ${{github.event.pull_request.head.repo.full_name}}
- uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/test@whl-skip-nccl
- uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/test@ci-test-with-cu118
name: find cache
id: find-cache
timeout-minutes: 5
Expand Down Expand Up @@ -406,7 +406,7 @@ jobs:
with:
ref: ${{ github.event.pull_request.head.sha }}
repository: ${{github.event.pull_request.head.repo.full_name}}
- uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/test@whl-skip-nccl
- uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/test@ci-test-with-cu118
name: find cache
id: find-cache
timeout-minutes: 5
Expand Down Expand Up @@ -488,7 +488,7 @@ jobs:
if: ${{ contains(matrix.runs-on, 'self-hosted') }}
run: |
docker rm -f ${{ env.TEST_CONTAINER_NAME }} || true
- uses: Oneflow-Inc/get-oneflow/cache-complete@whl-skip-nccl
- uses: Oneflow-Inc/get-oneflow/cache-complete@ci-test-with-cu118
name: Save cache if successful
id: save-cache
timeout-minutes: 5
Expand All @@ -504,7 +504,7 @@ jobs:
exit 1
- name: Download wheel and packed liboneflow
if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }}
uses: Oneflow-Inc/get-oneflow/digest/download@whl-skip-nccl
uses: Oneflow-Inc/get-oneflow/digest/download@ci-test-with-cu118
id: download-digest
timeout-minutes: 10
with:
Expand All @@ -514,7 +514,7 @@ jobs:
ssh-tank-path: ${{ env.SSH_TANK_PATH }}
- name: Get primary node
if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }}
uses: Oneflow-Inc/get-oneflow/master-address@whl-skip-nccl
uses: Oneflow-Inc/get-oneflow/master-address@ci-test-with-cu118
id: get-primary-node
with:
rank: ${{ matrix.rank }}
Expand Down Expand Up @@ -650,7 +650,7 @@ jobs:
TEST_CONTAINER_NAME: "pr-${{ github.event.pull_request.number }}-run-id-${{ github.run_id }}-${{ matrix.entry }}-test"
TEST_MANYLINUX_CONTAINER_NAME: "pr-${{ github.event.pull_request.number }}-run-id-${{ github.run_id }}-${{ matrix.entry }}-test-manylinux"
TEST_WITH_TF_IMG_TAG: registry.cn-beijing.aliyuncs.com/oneflow/test-with-tf-2.3.0:2f831e9354298a11447578e869d983959feb046f
TEST_MANYLINUX_IMG_TAG: registry.cn-beijing.aliyuncs.com/oneflow/manylinux2014_x86_64_cuda11.6:328e477069c80035adb3cd4db9632997e6284edd
TEST_MANYLINUX_IMG_TAG: registry.cn-beijing.aliyuncs.com/oneflow/manylinux2014_x86_64_cuda11.8:6455f9b8154333333e6285fde3747aaac4a92929
METRICS_DIR: metrics
steps:
- name: Set proxy
Expand Down Expand Up @@ -718,7 +718,7 @@ jobs:
if: ${{ contains(matrix.runs-on, 'self-hosted') }}
run: |
docker rm -f ${{ env.TEST_MANYLINUX_CONTAINER_NAME }} || true
- uses: Oneflow-Inc/get-oneflow/cache-complete@whl-skip-nccl
- uses: Oneflow-Inc/get-oneflow/cache-complete@ci-test-with-cu118
name: Save cache if successful
id: save-cache
timeout-minutes: 5
Expand All @@ -734,7 +734,7 @@ jobs:
exit 1
- name: Download wheel and packed liboneflow
if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }}
uses: Oneflow-Inc/get-oneflow/digest/download@whl-skip-nccl
uses: Oneflow-Inc/get-oneflow/digest/download@ci-test-with-cu118
id: download-digest
timeout-minutes: 10
with:
Expand All @@ -744,7 +744,7 @@ jobs:
ssh-tank-path: ${{ env.SSH_TANK_PATH }}
- name: Download ASAN and UBSAN wheel and packed liboneflow
if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && matrix.device == 'cpu' && false }}
uses: Oneflow-Inc/get-oneflow/digest/download@whl-skip-nccl
uses: Oneflow-Inc/get-oneflow/digest/download@ci-test-with-cu118
id: asan-ubsan-download-digest
timeout-minutes: 10
with:
Expand All @@ -754,7 +754,7 @@ jobs:
ssh-tank-path: ${{ env.SSH_TANK_PATH }}
- name: Download TSAN wheel and packed liboneflow
if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && matrix.device == 'cpu' && false }}
uses: Oneflow-Inc/get-oneflow/digest/download@whl-skip-nccl
uses: Oneflow-Inc/get-oneflow/digest/download@ci-test-with-cu118
id: tsan-download-digest
timeout-minutes: 10
with:
Expand Down Expand Up @@ -902,7 +902,7 @@ jobs:
run: |
ls ${ONEFLOW_WHEEL_PATH}
docker exec ${TEST_CONTAINER_NAME} python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
docker exec ${TEST_CONTAINER_NAME} python3 -m pip install --find-links=${ONEFLOW_WHEEL_PATH} oneflow
docker exec ${TEST_CONTAINER_NAME} python3 -m pip install -U --find-links=${ONEFLOW_WHEEL_PATH} oneflow
- name: Install downstream libs
if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }}
run: |
Expand Down Expand Up @@ -1080,7 +1080,7 @@ jobs:
- name: Benchmark Test
timeout-minutes: 100
if: ${{ !fromJson(matrix.cache-hit) && matrix.test-type == 'benchmark' && matrix.device == 'cuda' }}
uses: Oneflow-Inc/get-oneflow/pytest-benchmark@whl-skip-nccl
uses: Oneflow-Inc/get-oneflow/pytest-benchmark@ci-test-with-cu118
with:
collect-path: ${{ env.FLOW_VISION_SRC }}/benchmark
container-name: ${{ env.TEST_CONTAINER_NAME }}
Expand Down Expand Up @@ -1141,7 +1141,7 @@ jobs:
ref: ${{ github.event.pull_request.head.sha }}
repository: ${{github.event.pull_request.head.repo.full_name}}
fetch-depth: 0
- uses: Oneflow-Inc/get-oneflow/cache-complete@whl-skip-nccl
- uses: Oneflow-Inc/get-oneflow/cache-complete@ci-test-with-cu118
name: Save cache if successful
id: save-cache
timeout-minutes: 5
Expand Down
3 changes: 2 additions & 1 deletion ci/fixed-dev-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
numpy==1.22.1 ; python_version >= "3.10"
numpy==1.26.4 ; python_version >= "3.12"
numpy==1.22.1 ; python_version >= "3.10" and python_version < "3.12"
numpy==1.21.6 ; python_version >= "3.7" and python_version < "3.10"
2 changes: 1 addition & 1 deletion ci/manylinux/build-gcc9.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ ld --version
# clean python dir
cd ${ONEFLOW_CI_SRC_DIR}
${ONEFLOW_CI_PYTHON_EXE} -m pip install -i https://mirrors.aliyun.com/pypi/simple --user -r ci/fixed-dev-requirements.txt
${ONEFLOW_CI_PYTHON_EXE} -m pip install -i https://mirrors.aliyun.com/pypi/simple --user auditwheel
${ONEFLOW_CI_PYTHON_EXE} -m pip install -i https://mirrors.aliyun.com/pypi/simple --user auditwheel setuptools wheel
cd python

function clean_artifacts {
Expand Down
2 changes: 1 addition & 1 deletion ci/manylinux/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ ld --version
# clean python dir
cd ${ONEFLOW_CI_SRC_DIR}
${ONEFLOW_CI_PYTHON_EXE} -m pip install -i https://mirrors.aliyun.com/pypi/simple --user -r ci/fixed-dev-requirements.txt
${ONEFLOW_CI_PYTHON_EXE} -m pip install -i https://mirrors.aliyun.com/pypi/simple --user auditwheel
${ONEFLOW_CI_PYTHON_EXE} -m pip install -i https://mirrors.aliyun.com/pypi/simple --user auditwheel setuptools wheel
cd python

function clean_artifacts {
Expand Down
3 changes: 3 additions & 0 deletions cmake/caches/ci/cuda.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,6 @@ set(WITH_MLIR ON CACHE BOOL "")
set(BUILD_CPP_API ON CACHE BOOL "")
set(CUDA_NVCC_THREADS_NUMBER 8 CACHE STRING "")
set(BUILD_FOR_CI ON CACHE BOOL "")
set(CMAKE_CXX_FLAGS
"-Wno-unused-but-set-parameter -Wno-unused-variable -Wno-class-memaccess -Wno-cast-function-type -Wno-comment -Wno-reorder"
CACHE STRING "")
3 changes: 3 additions & 0 deletions cmake/caches/ci/release/cpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,6 @@ set(CUDA_NVCC_THREADS_NUMBER 8 CACHE STRING "")
set(CMAKE_C_COMPILER_LAUNCHER ccache CACHE STRING "")
set(CMAKE_CXX_COMPILER_LAUNCHER ccache CACHE STRING "")
set(CMAKE_CUDA_COMPILER_LAUNCHER ccache CACHE STRING "")
set(CMAKE_CXX_FLAGS
"-Wno-unused-but-set-parameter -Wno-unused-variable -Wno-class-memaccess -Wno-cast-function-type -Wno-comment -Wno-reorder"
CACHE STRING "")
3 changes: 3 additions & 0 deletions cmake/caches/ci/release/cu118.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,6 @@ set(CUDA_NVCC_THREADS_NUMBER 2 CACHE STRING "")
set(CMAKE_C_COMPILER_LAUNCHER ccache CACHE STRING "")
set(CMAKE_CXX_COMPILER_LAUNCHER ccache CACHE STRING "")
set(CMAKE_CUDA_COMPILER_LAUNCHER ccache CACHE STRING "")
set(CMAKE_CXX_FLAGS
"-Wno-unused-but-set-parameter -Wno-unused-variable -Wno-class-memaccess -Wno-cast-function-type -Wno-comment -Wno-reorder"
CACHE STRING "")
Loading

0 comments on commit 2bac1d5

Please sign in to comment.