From f0d13a6eb44d47a6288eba66e3cc777613bf9fde Mon Sep 17 00:00:00 2001 From: Shenghang Tsai Date: Sat, 3 Aug 2024 16:47:01 +0800 Subject: [PATCH 01/10] Python 3.12 support (#10548) --- .github/workflows/release.yml | 3 +++ ci/fixed-dev-requirements.txt | 3 ++- .../api/python/autograd/autograd_function_state.h | 1 + oneflow/api/python/caster/size.h | 1 + oneflow/api/python/exception/exception.h | 1 + oneflow/api/python/framework/memory_format.h | 1 + oneflow/api/python/framework/size.h | 1 + oneflow/api/python/framework/tensor.cpp | 1 + oneflow/api/python/framework/tensor.h | 1 + oneflow/api/python/framework/tensor_functions.cpp | 1 + .../api/python/framework/tensor_functions_util.h | 3 ++- oneflow/api/python/framework/tensortype.cpp | 1 + oneflow/api/python/framework/tensortype.h | 1 + oneflow/api/python/framework/typeinfo.h | 1 + oneflow/api/python/functional/indexing.h | 1 + oneflow/api/python/functional/python_arg.h | 1 + oneflow/api/python/functional/python_arg_parser.h | 1 + .../api/python/functional/python_return_types.h | 1 + oneflow/api/python/functional/tensor_api.cpp | 1 + oneflow/api/python/functional/value_types.h | 1 + oneflow/api/python/utils/tensor_utils.h | 1 + oneflow/extension/python/py_compute.h | 1 + oneflow/extension/python/py_kernel_registry.h | 1 + oneflow/extension/stack/python/custom_eval_frame.c | 1 + oneflow/extension/stack/python/custom_eval_frame.h | 1 + python/oneflow/framework/check_point_v2.py | 5 ++--- python/setup.py | 14 ++++++++++---- tools/functional/generate_dispatch_stateful_ops.py | 3 +++ tools/functional/generate_functional_api.py | 3 +++ tools/functional/generate_tensor_api.py | 4 ++++ 30 files changed, 51 insertions(+), 9 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 0e8b89883aa..abfb5e199c5 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -170,6 +170,7 @@ jobs: nightly-date: ${{ needs.generate-build-matrix.outputs.formatted_date }} use-nvidia-wheels: ${{ matrix.entry !='cu112' }} python-versions: | + 3.12 3.11 3.10 3.9 @@ -195,6 +196,7 @@ jobs: nightly-date: ${{ needs.generate-build-matrix.outputs.formatted_date }} use-nvidia-wheels: ${{ matrix.entry !='cu112' }} python-versions: | + 3.12 3.11 3.10 3.9 @@ -219,6 +221,7 @@ jobs: nightly: ${{ inputs.is_priv || github.event_name == 'schedule' || github.ref == 'refs/heads/release/add_nightly_date_index'}} nightly-date: ${{ needs.generate-build-matrix.outputs.formatted_date }} python-versions: | + 3.12 3.11 3.10 3.9 diff --git a/ci/fixed-dev-requirements.txt b/ci/fixed-dev-requirements.txt index 45709bd7c8b..9e647891375 100644 --- a/ci/fixed-dev-requirements.txt +++ b/ci/fixed-dev-requirements.txt @@ -1,2 +1,3 @@ -numpy==1.22.1 ; python_version >= "3.10" +numpy==1.26.4 ; python_version >= "3.12" +numpy==1.22.1 ; python_version >= "3.10" and python_version < "3.12" numpy==1.21.6 ; python_version >= "3.7" and python_version < "3.10" diff --git a/oneflow/api/python/autograd/autograd_function_state.h b/oneflow/api/python/autograd/autograd_function_state.h index 3873ad6f65d..b893b07110c 100644 --- a/oneflow/api/python/autograd/autograd_function_state.h +++ b/oneflow/api/python/autograd/autograd_function_state.h @@ -17,6 +17,7 @@ limitations under the License. #define ONEFLOW_API_PYTHON_AUTOGRAD_AUTOGRAD_FUNCTION_STATE_H_ #include +#undef _PyGC_FINALIZED #include #include "oneflow/core/framework/op_expr_grad_function.h" diff --git a/oneflow/api/python/caster/size.h b/oneflow/api/python/caster/size.h index 0d4b314851a..fa6e8d3fb7b 100644 --- a/oneflow/api/python/caster/size.h +++ b/oneflow/api/python/caster/size.h @@ -17,6 +17,7 @@ limitations under the License. #define ONEFLOW_API_PYTHON_CASTER_SIZE_H_ #include #include +#undef _PyGC_FINALIZED #include #include "oneflow/api/python/framework/size.h" diff --git a/oneflow/api/python/exception/exception.h b/oneflow/api/python/exception/exception.h index fdfd51cc64b..cac56c291eb 100644 --- a/oneflow/api/python/exception/exception.h +++ b/oneflow/api/python/exception/exception.h @@ -17,6 +17,7 @@ limitations under the License. #define ONEFLOW_API_PYTHON_COMMON_EXCEPTION_H_ #include +#undef _PyGC_FINALIZED #include #include "oneflow/core/common/exception.h" diff --git a/oneflow/api/python/framework/memory_format.h b/oneflow/api/python/framework/memory_format.h index bd312d68c50..92d518e347c 100644 --- a/oneflow/api/python/framework/memory_format.h +++ b/oneflow/api/python/framework/memory_format.h @@ -17,6 +17,7 @@ limitations under the License. #define ONEFLOW_API_PYTHON_FRAMEWORK_MEMORY_FORMAT_H_ #include +#undef _PyGC_FINALIZED #include #include "oneflow/core/common/memory_format.pb.h" diff --git a/oneflow/api/python/framework/size.h b/oneflow/api/python/framework/size.h index 2829853828f..31d565ef908 100644 --- a/oneflow/api/python/framework/size.h +++ b/oneflow/api/python/framework/size.h @@ -17,6 +17,7 @@ limitations under the License. #define ONEFLOW_API_PYTHON_FRAMEWORK_SIZE_H_ #include #include +#undef _PyGC_FINALIZED #include #include "oneflow/core/common/shape.h" diff --git a/oneflow/api/python/framework/tensor.cpp b/oneflow/api/python/framework/tensor.cpp index 49329ef78c7..36e38eeee5c 100644 --- a/oneflow/api/python/framework/tensor.cpp +++ b/oneflow/api/python/framework/tensor.cpp @@ -17,6 +17,7 @@ limitations under the License. #include #include +#undef _PyGC_FINALIZED #include "oneflow/api/python/exception/exception.h" #include "oneflow/api/python/framework/size.h" #include "oneflow/api/python/framework/tensortype.h" diff --git a/oneflow/api/python/framework/tensor.h b/oneflow/api/python/framework/tensor.h index 72e6b33b3a3..cefec0d3522 100644 --- a/oneflow/api/python/framework/tensor.h +++ b/oneflow/api/python/framework/tensor.h @@ -17,6 +17,7 @@ limitations under the License. #define ONEFLOW_API_PYTHON_FRAMEWORK_TENSOR_H_ #include +#undef _PyGC_FINALIZED #include "oneflow/core/framework/tensor.h" diff --git a/oneflow/api/python/framework/tensor_functions.cpp b/oneflow/api/python/framework/tensor_functions.cpp index 724bc06e739..dd650a255bb 100644 --- a/oneflow/api/python/framework/tensor_functions.cpp +++ b/oneflow/api/python/framework/tensor_functions.cpp @@ -15,6 +15,7 @@ limitations under the License. */ #include +#undef _PyGC_FINALIZED #include "oneflow/api/python/exception/exception.h" #include "oneflow/api/python/framework/size.h" #include "oneflow/api/python/framework/tensor_functions_util.h" diff --git a/oneflow/api/python/framework/tensor_functions_util.h b/oneflow/api/python/framework/tensor_functions_util.h index 462b74d90b9..a8a96459b98 100644 --- a/oneflow/api/python/framework/tensor_functions_util.h +++ b/oneflow/api/python/framework/tensor_functions_util.h @@ -14,6 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include +#undef _PyGC_FINALIZED #include #include "oneflow/api/python/exception/exception.h" #include "oneflow/api/python/functional/common.h" @@ -71,4 +72,4 @@ PyObject* PyParseArgs(PyObject* args, PyObject* kwargs, const char* func_name, } } // namespace one -} // namespace oneflow \ No newline at end of file +} // namespace oneflow diff --git a/oneflow/api/python/framework/tensortype.cpp b/oneflow/api/python/framework/tensortype.cpp index da2d0a7b8fc..d64c7dec0ba 100644 --- a/oneflow/api/python/framework/tensortype.cpp +++ b/oneflow/api/python/framework/tensortype.cpp @@ -14,6 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include +#undef _PyGC_FINALIZED #include #include "oneflow/api/python/framework/tensor.h" #include "oneflow/api/python/framework/tensortype.h" diff --git a/oneflow/api/python/framework/tensortype.h b/oneflow/api/python/framework/tensortype.h index 487225077cc..0c6ff1874ce 100644 --- a/oneflow/api/python/framework/tensortype.h +++ b/oneflow/api/python/framework/tensortype.h @@ -17,6 +17,7 @@ limitations under the License. #define ONEFLOW_API_PYTHON_FRAMEWORK_TENSORTYPE_H_ #include +#undef _PyGC_FINALIZED #include "oneflow/core/framework/dtype.h" #include "oneflow/core/framework/device.h" diff --git a/oneflow/api/python/framework/typeinfo.h b/oneflow/api/python/framework/typeinfo.h index 85645e1bcfa..cba59d5257d 100644 --- a/oneflow/api/python/framework/typeinfo.h +++ b/oneflow/api/python/framework/typeinfo.h @@ -18,6 +18,7 @@ limitations under the License. #define ONEFLOW_API_PYTHON_FRAMEWORK_TYPEINFO_H_ #include +#undef _PyGC_FINALIZED #include "oneflow/core/common/throw.h" #include "oneflow/core/framework/dtype.h" diff --git a/oneflow/api/python/functional/indexing.h b/oneflow/api/python/functional/indexing.h index 4e157ce5e94..2c3a06ec43d 100644 --- a/oneflow/api/python/functional/indexing.h +++ b/oneflow/api/python/functional/indexing.h @@ -17,6 +17,7 @@ limitations under the License. #define ONEFLOW_API_PYTHON_FUNCTIONAL_INDEXING_H_ #include +#undef _PyGC_FINALIZED #include "oneflow/api/python/functional/common.h" #include "oneflow/core/common/maybe.h" diff --git a/oneflow/api/python/functional/python_arg.h b/oneflow/api/python/functional/python_arg.h index 085e4ee6a48..58569fd4ab5 100644 --- a/oneflow/api/python/functional/python_arg.h +++ b/oneflow/api/python/functional/python_arg.h @@ -18,6 +18,7 @@ limitations under the License. #include #include +#undef _PyGC_FINALIZED #include "oneflow/core/common/throw.h" #include "oneflow/api/python/functional/value_types.h" diff --git a/oneflow/api/python/functional/python_arg_parser.h b/oneflow/api/python/functional/python_arg_parser.h index 5fdc6237c7b..01e2a9e541d 100644 --- a/oneflow/api/python/functional/python_arg_parser.h +++ b/oneflow/api/python/functional/python_arg_parser.h @@ -17,6 +17,7 @@ limitations under the License. #define ONEFLOW_API_PYTHON_FUNCTIONAL_PYTHON_ARG_PARSER_H_ #include +#undef _PyGC_FINALIZED #include "oneflow/api/python/functional/function_def.h" #include "oneflow/api/python/functional/python_arg.h" diff --git a/oneflow/api/python/functional/python_return_types.h b/oneflow/api/python/functional/python_return_types.h index df8de893109..13071c615ac 100644 --- a/oneflow/api/python/functional/python_return_types.h +++ b/oneflow/api/python/functional/python_return_types.h @@ -21,6 +21,7 @@ limitations under the License. #define ONEFLOW_API_PYTHON_FUNCTIONAL_PYTHON_RETURN_TYPES_H_ #include +#undef _PyGC_FINALIZED #include #include #include diff --git a/oneflow/api/python/functional/tensor_api.cpp b/oneflow/api/python/functional/tensor_api.cpp index 32e12312cd9..b34ea6441ec 100644 --- a/oneflow/api/python/functional/tensor_api.cpp +++ b/oneflow/api/python/functional/tensor_api.cpp @@ -14,6 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include +#undef _PyGC_FINALIZED #include #include "oneflow/api/python/utils/tensor_utils.h" diff --git a/oneflow/api/python/functional/value_types.h b/oneflow/api/python/functional/value_types.h index 09897de4229..25f3819309b 100644 --- a/oneflow/api/python/functional/value_types.h +++ b/oneflow/api/python/functional/value_types.h @@ -20,6 +20,7 @@ limitations under the License. #include #include #include +#undef _PyGC_FINALIZED #include "oneflow/core/common/data_type.pb.h" #include "oneflow/core/common/maybe.h" diff --git a/oneflow/api/python/utils/tensor_utils.h b/oneflow/api/python/utils/tensor_utils.h index 9896cc9b38b..ed35353e0f9 100644 --- a/oneflow/api/python/utils/tensor_utils.h +++ b/oneflow/api/python/utils/tensor_utils.h @@ -17,6 +17,7 @@ limitations under the License. #define ONEFLOW_API_PYTHON_UTILS_TENSOR_UTILS_H_ #include +#undef _PyGC_FINALIZED #include #include #include diff --git a/oneflow/extension/python/py_compute.h b/oneflow/extension/python/py_compute.h index 827f8f01eba..116f1b8134b 100644 --- a/oneflow/extension/python/py_compute.h +++ b/oneflow/extension/python/py_compute.h @@ -16,6 +16,7 @@ limitations under the License. #ifndef ONEFLOW_EXTENSION_PYTHON_PY_COMPUTE_H_ #define ONEFLOW_EXTENSION_PYTHON_PY_COMPUTE_H_ #include +#undef _PyGC_FINALIZED #include "oneflow/core/framework/framework.h" namespace oneflow { diff --git a/oneflow/extension/python/py_kernel_registry.h b/oneflow/extension/python/py_kernel_registry.h index 82ad83004da..ff2d3e9306f 100644 --- a/oneflow/extension/python/py_kernel_registry.h +++ b/oneflow/extension/python/py_kernel_registry.h @@ -18,6 +18,7 @@ limitations under the License. #include #include +#undef _PyGC_FINALIZED #include "oneflow/core/common/maybe.h" namespace oneflow { diff --git a/oneflow/extension/stack/python/custom_eval_frame.c b/oneflow/extension/stack/python/custom_eval_frame.c index c20a069e4ec..77dc4502f0c 100644 --- a/oneflow/extension/stack/python/custom_eval_frame.c +++ b/oneflow/extension/stack/python/custom_eval_frame.c @@ -21,6 +21,7 @@ limitations under the License. #define PY_SSIZE_T_CLEAN #include +#undef _PyGC_FINALIZED #include #include // see https://bugs.python.org/issue35886 diff --git a/oneflow/extension/stack/python/custom_eval_frame.h b/oneflow/extension/stack/python/custom_eval_frame.h index 44d2b81054c..642dc3c9d5b 100644 --- a/oneflow/extension/stack/python/custom_eval_frame.h +++ b/oneflow/extension/stack/python/custom_eval_frame.h @@ -21,6 +21,7 @@ extern "C" { #endif #include +#undef _PyGC_FINALIZED #if PY_VERSION_HEX >= 0x03090000 typedef PyObject* (*PyFrameEvalFunc)(struct _ts*, struct _frame*, int); diff --git a/python/oneflow/framework/check_point_v2.py b/python/oneflow/framework/check_point_v2.py index 2b0678dd2e9..84909609e5b 100644 --- a/python/oneflow/framework/check_point_v2.py +++ b/python/oneflow/framework/check_point_v2.py @@ -29,7 +29,6 @@ IO, BinaryIO, ) -from typing_extensions import TypeAlias from pathlib import Path import pickle import json @@ -58,10 +57,10 @@ PROTOCOL_VERSION = 1 ONEFLOW_MAGIC_KEY = "__oneflow__" -MAP_LOCATION: TypeAlias = Optional[ +MAP_LOCATION = Optional[ Union[Callable[[Tensor, str], Tensor], flow.device, str, flow.placement] ] -FILE_LIKE: TypeAlias = Union[os.PathLike, BinaryIO, IO[bytes], Path] +FILE_LIKE = Union[os.PathLike, BinaryIO, IO[bytes], Path] class _opener(object): diff --git a/python/setup.py b/python/setup.py index 9f1bb18eae4..598d3b2d8d0 100644 --- a/python/setup.py +++ b/python/setup.py @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. """ + from __future__ import absolute_import import argparse @@ -96,11 +97,16 @@ def has_ext_modules(self): def get_oneflow_internal_so_path(): - import imp - - fp, pathname, description = imp.find_module("_oneflow_internal", ["oneflow"]) + import importlib + + suffixes = importlib.machinery.EXTENSION_SUFFIXES + loader = importlib.machinery.ExtensionFileLoader + lazy_loader = importlib.util.LazyLoader.factory(loader) + finder = importlib.machinery.FileFinder("oneflow", (lazy_loader, suffixes)) + spec = finder.find_spec("_oneflow_internal") + pathname = spec.origin assert os.path.isfile(pathname) - return os.path.relpath(pathname, "oneflow") + return os.path.basename(pathname) package_data = {"oneflow": [get_oneflow_internal_so_path()] + include_files} diff --git a/tools/functional/generate_dispatch_stateful_ops.py b/tools/functional/generate_dispatch_stateful_ops.py index d54180f4b69..bac05e8c4f1 100644 --- a/tools/functional/generate_dispatch_stateful_ops.py +++ b/tools/functional/generate_dispatch_stateful_ops.py @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. """ + import os import re import argparse @@ -52,6 +53,7 @@ #define ONEFLOW_API_PYTHON_FUNCTIONAL_GENERATED_DISPATCH_OP_API_H_ #include +#undef _PyGC_FINALIZED #include "oneflow/core/common/optional.h" #include "oneflow/core/common/scalar.h" @@ -109,6 +111,7 @@ + """ #include +#undef _PyGC_FINALIZED #include "oneflow/api/python/of_api_registry.h" #include "oneflow/api/python/functional/common.h" diff --git a/tools/functional/generate_functional_api.py b/tools/functional/generate_functional_api.py index b454150224a..5d1cd82b34f 100644 --- a/tools/functional/generate_functional_api.py +++ b/tools/functional/generate_functional_api.py @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. """ + import os import re import argparse @@ -101,6 +102,7 @@ + """ #include +#undef _PyGC_FINALIZED namespace oneflow {{ namespace one {{ @@ -117,6 +119,7 @@ + """ #include +#undef _PyGC_FINALIZED #include "oneflow/api/python/of_api_registry.h" #include "oneflow/api/python/exception/exception.h" diff --git a/tools/functional/generate_tensor_api.py b/tools/functional/generate_tensor_api.py index 787ed856ec7..facd32f239c 100644 --- a/tools/functional/generate_tensor_api.py +++ b/tools/functional/generate_tensor_api.py @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. """ + import os import re import argparse @@ -52,6 +53,7 @@ #define ONEFLOW_API_PYTHON_FUNCTIONAL_GENERATED_TENSOR_API_H_ #include +#undef _PyGC_FINALIZED #include "oneflow/core/common/optional.h" #include "oneflow/core/common/scalar.h" @@ -95,6 +97,7 @@ + """ #include +#undef _PyGC_FINALIZED namespace oneflow {{ namespace one {{ @@ -111,6 +114,7 @@ + """ #include +#undef _PyGC_FINALIZED #include "oneflow/api/python/of_api_registry.h" #include "oneflow/api/python/exception/exception.h" From 3d73c9cd06d48744b9ecae3da4ad23bfcfb4c485 Mon Sep 17 00:00:00 2001 From: Shenghang Tsai Date: Fri, 15 Nov 2024 14:29:28 +0900 Subject: [PATCH 02/10] Fix newer OpenSSL build failure (#10565) --- .github/workflows/canary.yml | 2 +- .github/workflows/on_merge.yml | 2 +- .github/workflows/release.yml | 8 +++---- .github/workflows/simple.yml | 4 ++-- .github/workflows/test.yml | 38 +++++++++++++++++----------------- ci/manylinux/build-gcc9.sh | 2 +- ci/manylinux/build.sh | 2 +- 7 files changed, 29 insertions(+), 29 deletions(-) diff --git a/.github/workflows/canary.yml b/.github/workflows/canary.yml index 28500d0d38b..bdb7a9ef28a 100644 --- a/.github/workflows/canary.yml +++ b/.github/workflows/canary.yml @@ -54,7 +54,7 @@ jobs: - name: Checkout Oneflow-Inc/oneflow if: ${{ github.event.inputs.oneflow-ref == '' }} uses: actions/checkout@v2 - - uses: Oneflow-Inc/get-oneflow@whl-skip-nccl + - uses: Oneflow-Inc/get-oneflow@refactor-versions-wheels name: Build manylinux id: build-cuda with: diff --git a/.github/workflows/on_merge.yml b/.github/workflows/on_merge.yml index 27fd400e7b9..ed3a0a9bb11 100644 --- a/.github/workflows/on_merge.yml +++ b/.github/workflows/on_merge.yml @@ -15,6 +15,6 @@ jobs: if: github.event.pull_request.merged == true runs-on: ubuntu-latest steps: - - uses: Oneflow-Inc/get-oneflow/update-benchmark-history@whl-skip-nccl + - uses: Oneflow-Inc/get-oneflow/update-benchmark-history@refactor-versions-wheels name: Update benchmark history timeout-minutes: 10 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index abfb5e199c5..bb100b11e0b 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -70,7 +70,7 @@ jobs: ref: ${{ inputs.branch }} repository: ${{ secrets.ONEFLOW_PRIV_ORG }}/oneflow token: ${{ secrets.ONEFLOW_PRIV_GH_TOKEN }} - - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@whl-skip-nccl + - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@refactor-versions-wheels name: Find build cache id: find-cache timeout-minutes: 5 @@ -149,7 +149,7 @@ jobs: if: ${{ inputs.is_priv }} run: | env - - uses: Oneflow-Inc/get-oneflow@whl-skip-nccl + - uses: Oneflow-Inc/get-oneflow@refactor-versions-wheels name: Build ${{ matrix.entry }} if: ${{ matrix.entry =='cu118' || startsWith(matrix.entry, 'cu12') }} with: @@ -175,7 +175,7 @@ jobs: 3.10 3.9 3.8 - - uses: Oneflow-Inc/get-oneflow@whl-skip-nccl + - uses: Oneflow-Inc/get-oneflow@refactor-versions-wheels name: Build ${{ matrix.entry }} if: ${{ startsWith(matrix.entry, 'cu') && matrix.entry !='cu118' && !startsWith(matrix.entry, 'cu12') }} with: @@ -201,7 +201,7 @@ jobs: 3.10 3.9 3.8 - - uses: Oneflow-Inc/get-oneflow@whl-skip-nccl + - uses: Oneflow-Inc/get-oneflow@refactor-versions-wheels name: Build ${{ matrix.entry }} if: ${{ matrix.entry =='cpu' }} with: diff --git a/.github/workflows/simple.yml b/.github/workflows/simple.yml index 88b94579969..b3ff7b08b25 100644 --- a/.github/workflows/simple.yml +++ b/.github/workflows/simple.yml @@ -244,7 +244,7 @@ jobs: repository: Oneflow-Inc/conda-env ref: 30a7f00eb48ee9009d85a848e720823e5054c66b path: conda-env - - uses: Oneflow-Inc/get-oneflow@whl-skip-nccl + - uses: Oneflow-Inc/get-oneflow@refactor-versions-wheels name: Build with gcc7 if: ${{ matrix.build-type == 'gcc7'}} with: @@ -253,7 +253,7 @@ jobs: oneflow-build-env: conda conda-env-file: conda-env/dev/gcc7/environment-v2.yml conda-env-name: oneflow-dev-gcc7-v2 - - uses: Oneflow-Inc/get-oneflow@whl-skip-nccl + - uses: Oneflow-Inc/get-oneflow@refactor-versions-wheels name: Build with clang10 if: ${{ matrix.build-type == 'clang10'}} with: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index c8f9a5edda7..8919c24aed9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -176,7 +176,7 @@ jobs: with: ref: ${{ github.event.pull_request.head.sha }} repository: ${{github.event.pull_request.head.repo.full_name}} - - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@whl-skip-nccl + - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@refactor-versions-wheels name: find cache id: find-cache timeout-minutes: 5 @@ -223,7 +223,7 @@ jobs: with: ref: ${{ github.event.pull_request.head.sha }} repository: ${{github.event.pull_request.head.repo.full_name}} - - uses: Oneflow-Inc/get-oneflow/cache-complete@whl-skip-nccl + - uses: Oneflow-Inc/get-oneflow/cache-complete@refactor-versions-wheels name: Save cache if successful id: save-cache timeout-minutes: 5 @@ -237,7 +237,7 @@ jobs: run: | echo "::error file=test.yml,line=204,col=10::steps.save-cache.outputs.cache-hit != matrix.cache-hit" exit 1 - - uses: Oneflow-Inc/get-oneflow@whl-skip-nccl + - uses: Oneflow-Inc/get-oneflow@refactor-versions-wheels name: Build manylinux ${{ matrix.entry }} id: build-cpu if: ${{ matrix.entry =='cpu' && !matrix.cache-hit }} @@ -259,7 +259,7 @@ jobs: python-versions: | 3.7 3.8 - - uses: Oneflow-Inc/get-oneflow@whl-skip-nccl + - uses: Oneflow-Inc/get-oneflow@refactor-versions-wheels name: Build manylinux ${{ matrix.entry }} id: build-cpu-sanitizers if: ${{ (matrix.entry == 'cpu-asan-ubsan' || matrix.entry == 'cpu-tsan') && !matrix.cache-hit && false }} @@ -280,7 +280,7 @@ jobs: clean-ccache: ${{ contains(github.event.pull_request.labels.*.name, 'need-clean-ccache') }} python-versions: | 3.8 - - uses: Oneflow-Inc/get-oneflow@whl-skip-nccl + - uses: Oneflow-Inc/get-oneflow@refactor-versions-wheels name: Build manylinux ${{ matrix.entry }} id: build-cuda if: ${{ matrix.entry =='cu116' && !matrix.cache-hit }} @@ -300,7 +300,7 @@ jobs: clean-ccache: ${{ contains(github.event.pull_request.labels.*.name, 'need-clean-ccache') }} python-versions: | 3.7 - - uses: Oneflow-Inc/get-oneflow@whl-skip-nccl + - uses: Oneflow-Inc/get-oneflow@refactor-versions-wheels name: Build ${{ matrix.entry }} if: ${{ matrix.entry == 'llvm15' && !matrix.cache-hit }} with: @@ -339,7 +339,7 @@ jobs: }) - name: Upload packed liboneflow if: ${{ !fromJson(matrix.cache-hit) && matrix.entry != 'llvm15' && matrix.entry != 'cpu-asan-ubsan' && matrix.entry != 'cpu-tsan' }} - uses: Oneflow-Inc/get-oneflow/digest/upload@whl-skip-nccl + uses: Oneflow-Inc/get-oneflow/digest/upload@refactor-versions-wheels timeout-minutes: 10 with: digest: ${{ steps.save-cache.outputs.build-digest }} @@ -350,7 +350,7 @@ jobs: dst-dir: cpack - name: Upload whl if: ${{ !fromJson(matrix.cache-hit) && matrix.entry != 'llvm15' && matrix.entry != 'cpu-asan-ubsan' && matrix.entry != 'cpu-tsan' }} - uses: Oneflow-Inc/get-oneflow/digest/upload@whl-skip-nccl + uses: Oneflow-Inc/get-oneflow/digest/upload@refactor-versions-wheels timeout-minutes: 10 with: digest: ${{ steps.save-cache.outputs.build-digest }} @@ -375,7 +375,7 @@ jobs: with: ref: ${{ github.event.pull_request.head.sha }} repository: ${{github.event.pull_request.head.repo.full_name}} - - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/test@whl-skip-nccl + - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/test@refactor-versions-wheels name: find cache id: find-cache timeout-minutes: 5 @@ -406,7 +406,7 @@ jobs: with: ref: ${{ github.event.pull_request.head.sha }} repository: ${{github.event.pull_request.head.repo.full_name}} - - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/test@whl-skip-nccl + - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/test@refactor-versions-wheels name: find cache id: find-cache timeout-minutes: 5 @@ -488,7 +488,7 @@ jobs: if: ${{ contains(matrix.runs-on, 'self-hosted') }} run: | docker rm -f ${{ env.TEST_CONTAINER_NAME }} || true - - uses: Oneflow-Inc/get-oneflow/cache-complete@whl-skip-nccl + - uses: Oneflow-Inc/get-oneflow/cache-complete@refactor-versions-wheels name: Save cache if successful id: save-cache timeout-minutes: 5 @@ -504,7 +504,7 @@ jobs: exit 1 - name: Download wheel and packed liboneflow if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} - uses: Oneflow-Inc/get-oneflow/digest/download@whl-skip-nccl + uses: Oneflow-Inc/get-oneflow/digest/download@refactor-versions-wheels id: download-digest timeout-minutes: 10 with: @@ -514,7 +514,7 @@ jobs: ssh-tank-path: ${{ env.SSH_TANK_PATH }} - name: Get primary node if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} - uses: Oneflow-Inc/get-oneflow/master-address@whl-skip-nccl + uses: Oneflow-Inc/get-oneflow/master-address@refactor-versions-wheels id: get-primary-node with: rank: ${{ matrix.rank }} @@ -718,7 +718,7 @@ jobs: if: ${{ contains(matrix.runs-on, 'self-hosted') }} run: | docker rm -f ${{ env.TEST_MANYLINUX_CONTAINER_NAME }} || true - - uses: Oneflow-Inc/get-oneflow/cache-complete@whl-skip-nccl + - uses: Oneflow-Inc/get-oneflow/cache-complete@refactor-versions-wheels name: Save cache if successful id: save-cache timeout-minutes: 5 @@ -734,7 +734,7 @@ jobs: exit 1 - name: Download wheel and packed liboneflow if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} - uses: Oneflow-Inc/get-oneflow/digest/download@whl-skip-nccl + uses: Oneflow-Inc/get-oneflow/digest/download@refactor-versions-wheels id: download-digest timeout-minutes: 10 with: @@ -744,7 +744,7 @@ jobs: ssh-tank-path: ${{ env.SSH_TANK_PATH }} - name: Download ASAN and UBSAN wheel and packed liboneflow if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && matrix.device == 'cpu' && false }} - uses: Oneflow-Inc/get-oneflow/digest/download@whl-skip-nccl + uses: Oneflow-Inc/get-oneflow/digest/download@refactor-versions-wheels id: asan-ubsan-download-digest timeout-minutes: 10 with: @@ -754,7 +754,7 @@ jobs: ssh-tank-path: ${{ env.SSH_TANK_PATH }} - name: Download TSAN wheel and packed liboneflow if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && matrix.device == 'cpu' && false }} - uses: Oneflow-Inc/get-oneflow/digest/download@whl-skip-nccl + uses: Oneflow-Inc/get-oneflow/digest/download@refactor-versions-wheels id: tsan-download-digest timeout-minutes: 10 with: @@ -1080,7 +1080,7 @@ jobs: - name: Benchmark Test timeout-minutes: 100 if: ${{ !fromJson(matrix.cache-hit) && matrix.test-type == 'benchmark' && matrix.device == 'cuda' }} - uses: Oneflow-Inc/get-oneflow/pytest-benchmark@whl-skip-nccl + uses: Oneflow-Inc/get-oneflow/pytest-benchmark@refactor-versions-wheels with: collect-path: ${{ env.FLOW_VISION_SRC }}/benchmark container-name: ${{ env.TEST_CONTAINER_NAME }} @@ -1141,7 +1141,7 @@ jobs: ref: ${{ github.event.pull_request.head.sha }} repository: ${{github.event.pull_request.head.repo.full_name}} fetch-depth: 0 - - uses: Oneflow-Inc/get-oneflow/cache-complete@whl-skip-nccl + - uses: Oneflow-Inc/get-oneflow/cache-complete@refactor-versions-wheels name: Save cache if successful id: save-cache timeout-minutes: 5 diff --git a/ci/manylinux/build-gcc9.sh b/ci/manylinux/build-gcc9.sh index f2a239a7b2b..f4bb3ab8169 100644 --- a/ci/manylinux/build-gcc9.sh +++ b/ci/manylinux/build-gcc9.sh @@ -6,7 +6,7 @@ ld --version # clean python dir cd ${ONEFLOW_CI_SRC_DIR} ${ONEFLOW_CI_PYTHON_EXE} -m pip install -i https://mirrors.aliyun.com/pypi/simple --user -r ci/fixed-dev-requirements.txt -${ONEFLOW_CI_PYTHON_EXE} -m pip install -i https://mirrors.aliyun.com/pypi/simple --user auditwheel +${ONEFLOW_CI_PYTHON_EXE} -m pip install -i https://mirrors.aliyun.com/pypi/simple --user auditwheel setuptools wheel cd python function clean_artifacts { diff --git a/ci/manylinux/build.sh b/ci/manylinux/build.sh index 24a08986294..554f5fb096c 100644 --- a/ci/manylinux/build.sh +++ b/ci/manylinux/build.sh @@ -5,7 +5,7 @@ ld --version # clean python dir cd ${ONEFLOW_CI_SRC_DIR} ${ONEFLOW_CI_PYTHON_EXE} -m pip install -i https://mirrors.aliyun.com/pypi/simple --user -r ci/fixed-dev-requirements.txt -${ONEFLOW_CI_PYTHON_EXE} -m pip install -i https://mirrors.aliyun.com/pypi/simple --user auditwheel +${ONEFLOW_CI_PYTHON_EXE} -m pip install -i https://mirrors.aliyun.com/pypi/simple --user auditwheel setuptools wheel cd python function clean_artifacts { From a27f657f870262d4157499ee2191b089b0d89e69 Mon Sep 17 00:00:00 2001 From: Shenghang Tsai Date: Mon, 2 Dec 2024 11:06:29 +0800 Subject: [PATCH 03/10] CXX flags to make it less verbose in GitHub Action (#10569) --- .github/workflows/simple.yml | 2 +- .github/workflows/test.yml | 4 ++-- cmake/caches/ci/release/cpu.cmake | 3 +++ cmake/caches/ci/release/cu118.cmake | 3 +++ cmake/caches/ci/release/cuda.cmake | 3 +++ 5 files changed, 12 insertions(+), 3 deletions(-) diff --git a/.github/workflows/simple.yml b/.github/workflows/simple.yml index b3ff7b08b25..034a17fb1d4 100644 --- a/.github/workflows/simple.yml +++ b/.github/workflows/simple.yml @@ -219,7 +219,7 @@ jobs: fi - name: Upload logs if: always() && contains(github.event.pull_request.labels.*.name, 'need-simple-ci-upload-artifact') - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: logs-${{ matrix.test_suite }}-${{ matrix.cmake_generator }}-${{ matrix.cmake_build_type }}-shared-${{ matrix.build_shared_libs }} path: | diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8919c24aed9..6eb827b0d9f 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -72,7 +72,7 @@ jobs: then exit 0 fi - python3 -m pip install -U pip setuptools wheel + python3 -m pip install -U pip "setuptools<=68.2.2" wheel python3 -m pip install 'cryptography<2.2' oss2 python3 tools/package_mirror.py -i $PWD @@ -139,7 +139,7 @@ jobs: git push - name: Upload patch if: ${{ failure() && steps.git_push.outcome == 'failure' }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: license_and_format-${{ github.sha }}.patch path: license_and_format.patch diff --git a/cmake/caches/ci/release/cpu.cmake b/cmake/caches/ci/release/cpu.cmake index 0042b244450..d5b069195b3 100644 --- a/cmake/caches/ci/release/cpu.cmake +++ b/cmake/caches/ci/release/cpu.cmake @@ -13,3 +13,6 @@ set(CUDA_NVCC_THREADS_NUMBER 8 CACHE STRING "") set(CMAKE_C_COMPILER_LAUNCHER ccache CACHE STRING "") set(CMAKE_CXX_COMPILER_LAUNCHER ccache CACHE STRING "") set(CMAKE_CUDA_COMPILER_LAUNCHER ccache CACHE STRING "") +set(CMAKE_CXX_FLAGS + "-Wno-unused-but-set-parameter -Wno-unused-variable -Wno-class-memaccess -Wno-cast-function-type -Wno-comment -Wno-reorder" + CACHE STRING "") diff --git a/cmake/caches/ci/release/cu118.cmake b/cmake/caches/ci/release/cu118.cmake index 270afb4409e..cf8eeb72187 100644 --- a/cmake/caches/ci/release/cu118.cmake +++ b/cmake/caches/ci/release/cu118.cmake @@ -15,3 +15,6 @@ set(CUDA_NVCC_THREADS_NUMBER 2 CACHE STRING "") set(CMAKE_C_COMPILER_LAUNCHER ccache CACHE STRING "") set(CMAKE_CXX_COMPILER_LAUNCHER ccache CACHE STRING "") set(CMAKE_CUDA_COMPILER_LAUNCHER ccache CACHE STRING "") +set(CMAKE_CXX_FLAGS + "-Wno-unused-but-set-parameter -Wno-unused-variable -Wno-class-memaccess -Wno-cast-function-type -Wno-comment -Wno-reorder" + CACHE STRING "") diff --git a/cmake/caches/ci/release/cuda.cmake b/cmake/caches/ci/release/cuda.cmake index 07db0e4c12d..778ac9c9391 100644 --- a/cmake/caches/ci/release/cuda.cmake +++ b/cmake/caches/ci/release/cuda.cmake @@ -14,3 +14,6 @@ set(CUDA_NVCC_THREADS_NUMBER 2 CACHE STRING "") set(CMAKE_C_COMPILER_LAUNCHER ccache CACHE STRING "") set(CMAKE_CXX_COMPILER_LAUNCHER ccache CACHE STRING "") set(CMAKE_CUDA_COMPILER_LAUNCHER ccache CACHE STRING "") +set(CMAKE_CXX_FLAGS + "-Wno-unused-but-set-parameter -Wno-unused-variable -Wno-class-memaccess -Wno-cast-function-type -Wno-comment -Wno-reorder" + CACHE STRING "") From a09987ac78cc3689616b3814b587858576c1323a Mon Sep 17 00:00:00 2001 From: Shenghang Tsai Date: Wed, 18 Dec 2024 14:59:32 +0800 Subject: [PATCH 04/10] CI uses cu118 (#10359) Co-authored-by: oneflow-ci-bot --- .github/workflows/canary.yml | 2 +- .github/workflows/on_merge.yml | 2 +- .github/workflows/release.yml | 8 ++-- .github/workflows/simple.yml | 4 +- .github/workflows/test.yml | 48 +++++++++++----------- cmake/caches/ci/cuda.cmake | 3 ++ cmake/third_party/flash_attention.cmake | 9 ++-- python/oneflow/test/modules/test_normal.py | 6 ++- 8 files changed, 45 insertions(+), 37 deletions(-) diff --git a/.github/workflows/canary.yml b/.github/workflows/canary.yml index bdb7a9ef28a..32d05b277c7 100644 --- a/.github/workflows/canary.yml +++ b/.github/workflows/canary.yml @@ -54,7 +54,7 @@ jobs: - name: Checkout Oneflow-Inc/oneflow if: ${{ github.event.inputs.oneflow-ref == '' }} uses: actions/checkout@v2 - - uses: Oneflow-Inc/get-oneflow@refactor-versions-wheels + - uses: Oneflow-Inc/get-oneflow@ci-test-with-cu118 name: Build manylinux id: build-cuda with: diff --git a/.github/workflows/on_merge.yml b/.github/workflows/on_merge.yml index ed3a0a9bb11..2b8123bc7a3 100644 --- a/.github/workflows/on_merge.yml +++ b/.github/workflows/on_merge.yml @@ -15,6 +15,6 @@ jobs: if: github.event.pull_request.merged == true runs-on: ubuntu-latest steps: - - uses: Oneflow-Inc/get-oneflow/update-benchmark-history@refactor-versions-wheels + - uses: Oneflow-Inc/get-oneflow/update-benchmark-history@ci-test-with-cu118 name: Update benchmark history timeout-minutes: 10 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index bb100b11e0b..42247540cd9 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -70,7 +70,7 @@ jobs: ref: ${{ inputs.branch }} repository: ${{ secrets.ONEFLOW_PRIV_ORG }}/oneflow token: ${{ secrets.ONEFLOW_PRIV_GH_TOKEN }} - - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@refactor-versions-wheels + - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@ci-test-with-cu118 name: Find build cache id: find-cache timeout-minutes: 5 @@ -149,7 +149,7 @@ jobs: if: ${{ inputs.is_priv }} run: | env - - uses: Oneflow-Inc/get-oneflow@refactor-versions-wheels + - uses: Oneflow-Inc/get-oneflow@ci-test-with-cu118 name: Build ${{ matrix.entry }} if: ${{ matrix.entry =='cu118' || startsWith(matrix.entry, 'cu12') }} with: @@ -175,7 +175,7 @@ jobs: 3.10 3.9 3.8 - - uses: Oneflow-Inc/get-oneflow@refactor-versions-wheels + - uses: Oneflow-Inc/get-oneflow@ci-test-with-cu118 name: Build ${{ matrix.entry }} if: ${{ startsWith(matrix.entry, 'cu') && matrix.entry !='cu118' && !startsWith(matrix.entry, 'cu12') }} with: @@ -201,7 +201,7 @@ jobs: 3.10 3.9 3.8 - - uses: Oneflow-Inc/get-oneflow@refactor-versions-wheels + - uses: Oneflow-Inc/get-oneflow@ci-test-with-cu118 name: Build ${{ matrix.entry }} if: ${{ matrix.entry =='cpu' }} with: diff --git a/.github/workflows/simple.yml b/.github/workflows/simple.yml index 034a17fb1d4..ef672109ce0 100644 --- a/.github/workflows/simple.yml +++ b/.github/workflows/simple.yml @@ -244,7 +244,7 @@ jobs: repository: Oneflow-Inc/conda-env ref: 30a7f00eb48ee9009d85a848e720823e5054c66b path: conda-env - - uses: Oneflow-Inc/get-oneflow@refactor-versions-wheels + - uses: Oneflow-Inc/get-oneflow@ci-test-with-cu118 name: Build with gcc7 if: ${{ matrix.build-type == 'gcc7'}} with: @@ -253,7 +253,7 @@ jobs: oneflow-build-env: conda conda-env-file: conda-env/dev/gcc7/environment-v2.yml conda-env-name: oneflow-dev-gcc7-v2 - - uses: Oneflow-Inc/get-oneflow@refactor-versions-wheels + - uses: Oneflow-Inc/get-oneflow@ci-test-with-cu118 name: Build with clang10 if: ${{ matrix.build-type == 'clang10'}} with: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6eb827b0d9f..0c449ef960a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -176,7 +176,7 @@ jobs: with: ref: ${{ github.event.pull_request.head.sha }} repository: ${{github.event.pull_request.head.repo.full_name}} - - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@refactor-versions-wheels + - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@ci-test-with-cu118 name: find cache id: find-cache timeout-minutes: 5 @@ -188,7 +188,7 @@ jobs: builder oneflow-src: ${{ env.ONEFLOW_SRC }} entries: | - cu116 + cu118 cpu cpu-asan-ubsan cpu-tsan @@ -223,7 +223,7 @@ jobs: with: ref: ${{ github.event.pull_request.head.sha }} repository: ${{github.event.pull_request.head.repo.full_name}} - - uses: Oneflow-Inc/get-oneflow/cache-complete@refactor-versions-wheels + - uses: Oneflow-Inc/get-oneflow/cache-complete@ci-test-with-cu118 name: Save cache if successful id: save-cache timeout-minutes: 5 @@ -237,7 +237,7 @@ jobs: run: | echo "::error file=test.yml,line=204,col=10::steps.save-cache.outputs.cache-hit != matrix.cache-hit" exit 1 - - uses: Oneflow-Inc/get-oneflow@refactor-versions-wheels + - uses: Oneflow-Inc/get-oneflow@ci-test-with-cu118 name: Build manylinux ${{ matrix.entry }} id: build-cpu if: ${{ matrix.entry =='cpu' && !matrix.cache-hit }} @@ -259,7 +259,7 @@ jobs: python-versions: | 3.7 3.8 - - uses: Oneflow-Inc/get-oneflow@refactor-versions-wheels + - uses: Oneflow-Inc/get-oneflow@ci-test-with-cu118 name: Build manylinux ${{ matrix.entry }} id: build-cpu-sanitizers if: ${{ (matrix.entry == 'cpu-asan-ubsan' || matrix.entry == 'cpu-tsan') && !matrix.cache-hit && false }} @@ -280,10 +280,10 @@ jobs: clean-ccache: ${{ contains(github.event.pull_request.labels.*.name, 'need-clean-ccache') }} python-versions: | 3.8 - - uses: Oneflow-Inc/get-oneflow@refactor-versions-wheels + - uses: Oneflow-Inc/get-oneflow@ci-test-with-cu118 name: Build manylinux ${{ matrix.entry }} id: build-cuda - if: ${{ matrix.entry =='cu116' && !matrix.cache-hit }} + if: ${{ matrix.entry =='cu118' && !matrix.cache-hit }} with: cmake-init-cache: ${{ env.ONEFLOW_SRC }}/cmake/caches/ci/cuda.cmake build-script: ${{ env.ONEFLOW_SRC }}/ci/manylinux/build-gcc9.sh @@ -292,7 +292,7 @@ jobs: wheelhouse-dir: ${{ env.WHEELHOUSE_DIR }} clear-wheelhouse-dir: true self-hosted: ${{ contains(matrix.runs-on, 'self-hosted') }} - cuda-version: "11.6" + cuda-version: "11.8" manylinux-cache-dir: ${{ env.MANYLINUX_CACHE_DIR }} docker-run-use-system-http-proxy: false docker-run-use-lld: false @@ -300,7 +300,7 @@ jobs: clean-ccache: ${{ contains(github.event.pull_request.labels.*.name, 'need-clean-ccache') }} python-versions: | 3.7 - - uses: Oneflow-Inc/get-oneflow@refactor-versions-wheels + - uses: Oneflow-Inc/get-oneflow@ci-test-with-cu118 name: Build ${{ matrix.entry }} if: ${{ matrix.entry == 'llvm15' && !matrix.cache-hit }} with: @@ -339,7 +339,7 @@ jobs: }) - name: Upload packed liboneflow if: ${{ !fromJson(matrix.cache-hit) && matrix.entry != 'llvm15' && matrix.entry != 'cpu-asan-ubsan' && matrix.entry != 'cpu-tsan' }} - uses: Oneflow-Inc/get-oneflow/digest/upload@refactor-versions-wheels + uses: Oneflow-Inc/get-oneflow/digest/upload@ci-test-with-cu118 timeout-minutes: 10 with: digest: ${{ steps.save-cache.outputs.build-digest }} @@ -350,7 +350,7 @@ jobs: dst-dir: cpack - name: Upload whl if: ${{ !fromJson(matrix.cache-hit) && matrix.entry != 'llvm15' && matrix.entry != 'cpu-asan-ubsan' && matrix.entry != 'cpu-tsan' }} - uses: Oneflow-Inc/get-oneflow/digest/upload@refactor-versions-wheels + uses: Oneflow-Inc/get-oneflow/digest/upload@ci-test-with-cu118 timeout-minutes: 10 with: digest: ${{ steps.save-cache.outputs.build-digest }} @@ -375,7 +375,7 @@ jobs: with: ref: ${{ github.event.pull_request.head.sha }} repository: ${{github.event.pull_request.head.repo.full_name}} - - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/test@refactor-versions-wheels + - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/test@ci-test-with-cu118 name: find cache id: find-cache timeout-minutes: 5 @@ -406,7 +406,7 @@ jobs: with: ref: ${{ github.event.pull_request.head.sha }} repository: ${{github.event.pull_request.head.repo.full_name}} - - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/test@refactor-versions-wheels + - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/test@ci-test-with-cu118 name: find cache id: find-cache timeout-minutes: 5 @@ -488,7 +488,7 @@ jobs: if: ${{ contains(matrix.runs-on, 'self-hosted') }} run: | docker rm -f ${{ env.TEST_CONTAINER_NAME }} || true - - uses: Oneflow-Inc/get-oneflow/cache-complete@refactor-versions-wheels + - uses: Oneflow-Inc/get-oneflow/cache-complete@ci-test-with-cu118 name: Save cache if successful id: save-cache timeout-minutes: 5 @@ -504,7 +504,7 @@ jobs: exit 1 - name: Download wheel and packed liboneflow if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} - uses: Oneflow-Inc/get-oneflow/digest/download@refactor-versions-wheels + uses: Oneflow-Inc/get-oneflow/digest/download@ci-test-with-cu118 id: download-digest timeout-minutes: 10 with: @@ -514,7 +514,7 @@ jobs: ssh-tank-path: ${{ env.SSH_TANK_PATH }} - name: Get primary node if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} - uses: Oneflow-Inc/get-oneflow/master-address@refactor-versions-wheels + uses: Oneflow-Inc/get-oneflow/master-address@ci-test-with-cu118 id: get-primary-node with: rank: ${{ matrix.rank }} @@ -650,7 +650,7 @@ jobs: TEST_CONTAINER_NAME: "pr-${{ github.event.pull_request.number }}-run-id-${{ github.run_id }}-${{ matrix.entry }}-test" TEST_MANYLINUX_CONTAINER_NAME: "pr-${{ github.event.pull_request.number }}-run-id-${{ github.run_id }}-${{ matrix.entry }}-test-manylinux" TEST_WITH_TF_IMG_TAG: registry.cn-beijing.aliyuncs.com/oneflow/test-with-tf-2.3.0:2f831e9354298a11447578e869d983959feb046f - TEST_MANYLINUX_IMG_TAG: registry.cn-beijing.aliyuncs.com/oneflow/manylinux2014_x86_64_cuda11.6:328e477069c80035adb3cd4db9632997e6284edd + TEST_MANYLINUX_IMG_TAG: registry.cn-beijing.aliyuncs.com/oneflow/manylinux2014_x86_64_cuda11.8:6455f9b8154333333e6285fde3747aaac4a92929 METRICS_DIR: metrics steps: - name: Set proxy @@ -718,7 +718,7 @@ jobs: if: ${{ contains(matrix.runs-on, 'self-hosted') }} run: | docker rm -f ${{ env.TEST_MANYLINUX_CONTAINER_NAME }} || true - - uses: Oneflow-Inc/get-oneflow/cache-complete@refactor-versions-wheels + - uses: Oneflow-Inc/get-oneflow/cache-complete@ci-test-with-cu118 name: Save cache if successful id: save-cache timeout-minutes: 5 @@ -734,7 +734,7 @@ jobs: exit 1 - name: Download wheel and packed liboneflow if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} - uses: Oneflow-Inc/get-oneflow/digest/download@refactor-versions-wheels + uses: Oneflow-Inc/get-oneflow/digest/download@ci-test-with-cu118 id: download-digest timeout-minutes: 10 with: @@ -744,7 +744,7 @@ jobs: ssh-tank-path: ${{ env.SSH_TANK_PATH }} - name: Download ASAN and UBSAN wheel and packed liboneflow if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && matrix.device == 'cpu' && false }} - uses: Oneflow-Inc/get-oneflow/digest/download@refactor-versions-wheels + uses: Oneflow-Inc/get-oneflow/digest/download@ci-test-with-cu118 id: asan-ubsan-download-digest timeout-minutes: 10 with: @@ -754,7 +754,7 @@ jobs: ssh-tank-path: ${{ env.SSH_TANK_PATH }} - name: Download TSAN wheel and packed liboneflow if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && matrix.device == 'cpu' && false }} - uses: Oneflow-Inc/get-oneflow/digest/download@refactor-versions-wheels + uses: Oneflow-Inc/get-oneflow/digest/download@ci-test-with-cu118 id: tsan-download-digest timeout-minutes: 10 with: @@ -902,7 +902,7 @@ jobs: run: | ls ${ONEFLOW_WHEEL_PATH} docker exec ${TEST_CONTAINER_NAME} python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple - docker exec ${TEST_CONTAINER_NAME} python3 -m pip install --find-links=${ONEFLOW_WHEEL_PATH} oneflow + docker exec ${TEST_CONTAINER_NAME} python3 -m pip install -U --find-links=${ONEFLOW_WHEEL_PATH} oneflow - name: Install downstream libs if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }} run: | @@ -1080,7 +1080,7 @@ jobs: - name: Benchmark Test timeout-minutes: 100 if: ${{ !fromJson(matrix.cache-hit) && matrix.test-type == 'benchmark' && matrix.device == 'cuda' }} - uses: Oneflow-Inc/get-oneflow/pytest-benchmark@refactor-versions-wheels + uses: Oneflow-Inc/get-oneflow/pytest-benchmark@ci-test-with-cu118 with: collect-path: ${{ env.FLOW_VISION_SRC }}/benchmark container-name: ${{ env.TEST_CONTAINER_NAME }} @@ -1141,7 +1141,7 @@ jobs: ref: ${{ github.event.pull_request.head.sha }} repository: ${{github.event.pull_request.head.repo.full_name}} fetch-depth: 0 - - uses: Oneflow-Inc/get-oneflow/cache-complete@refactor-versions-wheels + - uses: Oneflow-Inc/get-oneflow/cache-complete@ci-test-with-cu118 name: Save cache if successful id: save-cache timeout-minutes: 5 diff --git a/cmake/caches/ci/cuda.cmake b/cmake/caches/ci/cuda.cmake index 759854afe86..0ee1e5dd8aa 100644 --- a/cmake/caches/ci/cuda.cmake +++ b/cmake/caches/ci/cuda.cmake @@ -16,3 +16,6 @@ set(WITH_MLIR ON CACHE BOOL "") set(BUILD_CPP_API ON CACHE BOOL "") set(CUDA_NVCC_THREADS_NUMBER 8 CACHE STRING "") set(BUILD_FOR_CI ON CACHE BOOL "") +set(CMAKE_CXX_FLAGS + "-Wno-unused-but-set-parameter -Wno-unused-variable -Wno-class-memaccess -Wno-cast-function-type -Wno-comment -Wno-reorder" + CACHE STRING "") diff --git a/cmake/third_party/flash_attention.cmake b/cmake/third_party/flash_attention.cmake index 6958afadaef..c8678fa3b19 100644 --- a/cmake/third_party/flash_attention.cmake +++ b/cmake/third_party/flash_attention.cmake @@ -7,8 +7,9 @@ find_package(Git QUIET REQUIRED) set(FLASH_ATTENTION_PROJECT flash_attention) -set(FLASH_ATTENTION_URL https://github.com/Oneflow-Inc/flash-attention-v2.git) -set(FLASH_ATTENTION_TAG eed2e82b880e06237af3e50ceac4cf6728b15645) +set(FLASH_ATTENTION_URL + https://oneflow-static.oss-cn-beijing.aliyuncs.com/third_party_mirror/flash-attention-v2-eed2e82b880e06237af3e50ceac4cf6728b15645.zip +) set(FLASH_ATTENTION_INSTALL_DIR ${THIRD_PARTY_DIR}/flash_attention) set(FLASH_ATTENTION_INCLUDE_DIR ${FLASH_ATTENTION_INSTALL_DIR}/include CACHE PATH "" FORCE) @@ -19,8 +20,8 @@ if(THIRD_PARTY) ExternalProject_Add( ${FLASH_ATTENTION_PROJECT} PREFIX flash_attention - GIT_REPOSITORY ${FLASH_ATTENTION_URL} - GIT_TAG ${FLASH_ATTENTION_TAG} + URL ${FLASH_ATTENTION_URL} + URL_HASH MD5=63192a05973f614aff594a8bd11813ce UPDATE_COMMAND "" BUILD_BYPRODUCTS ${FLASH_ATTENTION_LIBRARIES} CMAKE_ARGS -DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE} diff --git a/python/oneflow/test/modules/test_normal.py b/python/oneflow/test/modules/test_normal.py index 9213d55d0ad..adbae22b804 100644 --- a/python/oneflow/test/modules/test_normal.py +++ b/python/oneflow/test/modules/test_normal.py @@ -62,7 +62,11 @@ def helper(self, device, dtype, ptype, t_transform, std_transform): t_transform(q[99:100]).std().item(), std_transform(1), atol=0.3, rtol=0 ) ) - self.assertTrue(flow.allclose(t_transform(q[0:1]).clone(), t_transform(q_row1))) + self.assertTrue( + flow.allclose( + t_transform(q[0:1]).clone(), t_transform(q_row1), atol=0.3, rtol=0.3, + ) + ) mean = flow.empty(100, 100, dtype=dtype, device=device) mean[:50].fill_(ptype(0)) From 15d2682c564e823dcb9b0a0b97acf3d913718c5c Mon Sep 17 00:00:00 2001 From: Shenghang Tsai Date: Wed, 18 Dec 2024 15:19:33 +0800 Subject: [PATCH 05/10] Use cu118 in canary (#10606) --- .github/workflows/canary.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/canary.yml b/.github/workflows/canary.yml index 32d05b277c7..16b7f45af99 100644 --- a/.github/workflows/canary.yml +++ b/.github/workflows/canary.yml @@ -34,7 +34,7 @@ jobs: ONEFLOW_SRC: . MANYLINUX_CACHE_DIR: ~/manylinux-cache-dir/canary-cu112 WHEELHOUSE_DIR: manylinux-wheelhouse - COMPUTE_PLATFORM: cu117 + COMPUTE_PLATFORM: cu118 OSS_BUCKET: oneflow-staging OSS_WHEEL_HOUSE_DIR: ${{ matrix.entry }}/commit/${{ github.sha }} OSS_GITHUB_REF_DIR: ${{ matrix.entry }}/${{ github.ref }} From dd72bc9b7efa6ffec8f56e73e82057bc16c24d57 Mon Sep 17 00:00:00 2001 From: XIE Xuan Date: Wed, 18 Dec 2024 19:36:16 +0800 Subject: [PATCH 06/10] update error tol (#10608) --- python/oneflow/test/modules/test_normal.py | 266 ------------------ .../test_scaled_dot_product_attention.py | 132 --------- 2 files changed, 398 deletions(-) delete mode 100644 python/oneflow/test/modules/test_normal.py delete mode 100644 python/oneflow/test/modules/test_scaled_dot_product_attention.py diff --git a/python/oneflow/test/modules/test_normal.py b/python/oneflow/test/modules/test_normal.py deleted file mode 100644 index adbae22b804..00000000000 --- a/python/oneflow/test/modules/test_normal.py +++ /dev/null @@ -1,266 +0,0 @@ -""" -Copyright 2020 The OneFlow Authors. All rights reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -import unittest -import numpy as np -from collections import OrderedDict - -from oneflow.test_utils.test_util import GenArgList, type_name_to_flow_type -from oneflow.test_utils.automated_test_util import * -import oneflow as flow - - -def _test_normal(test_case, mean, std, shape, device, dtype): - dtype = type_name_to_flow_type[dtype] - device = flow.device(device) - y1 = flow.normal(mean, std, shape, dtype=dtype, device=device) - y2 = flow.normal(mean, std, size=shape, dtype=dtype, device=device) - test_case.assertFalse(np.array_equal(y1.numpy(), y2.numpy())) - test_case.assertEqual(shape, y1.shape) - test_case.assertEqual(dtype, y1.dtype) - test_case.assertEqual(shape, y2.shape) - test_case.assertEqual(dtype, y2.dtype) - - # NOTE(Feng Wen): The test code helper is modified from def test_normal(self, device, dtype): - # https://github.com/pytorch/pytorch/blob/e63c502baa4a6f2109749984be701e722b3b7232/test/test_tensor_creation_ops.py#L3073-L3219 - def helper(self, device, dtype, ptype, t_transform, std_transform): - q = flow.empty(100, 100, dtype=dtype, device=device) - - q.normal_() - self.assertTrue(np.allclose(t_transform(q).mean().item(), 0, atol=0.2, rtol=0)) - self.assertTrue( - np.allclose(t_transform(q).std().item(), std_transform(1), atol=0.2, rtol=0) - ) - - q.normal_(2, 3) - self.assertTrue(np.allclose(t_transform(q).mean().item(), 2, atol=0.3, rtol=0)) - self.assertTrue( - np.allclose(t_transform(q).std().item(), std_transform(3), atol=0.3, rtol=0) - ) - - q = flow.empty(100, 100, dtype=dtype, device=device) - q_row1 = q[0:1].clone() - q[99:100].normal_() - self.assertTrue( - np.allclose(t_transform(q[99:100]).mean().item(), 0, atol=0.4, rtol=0) - ) - self.assertTrue( - np.allclose( - t_transform(q[99:100]).std().item(), std_transform(1), atol=0.3, rtol=0 - ) - ) - self.assertTrue( - flow.allclose( - t_transform(q[0:1]).clone(), t_transform(q_row1), atol=0.3, rtol=0.3, - ) - ) - - mean = flow.empty(100, 100, dtype=dtype, device=device) - mean[:50].fill_(ptype(0)) - mean[50:].fill_(ptype(1)) - - std = flow.empty(100, 100, dtype=flow.float, device=device) - std[:, :50] = 4 - std[:, 50:] = 1 - - r = flow.normal(mean) - self.assertEqual(r.dtype, dtype) - self.assertEqual(str(r.device), str(device)) - self.assertTrue( - np.allclose(t_transform(r[:50]).mean().item(), 0, atol=0.2, rtol=0) - ) - self.assertTrue(np.allclose(t_transform(r[50:]).mean(), 1, atol=0.2, rtol=0)) - self.assertTrue( - np.allclose(t_transform(r).std(), std_transform(1), atol=0.2, rtol=0) - ) - - r.fill_(42) - r = flow.normal(mean, 3) - self.assertEqual(r.dtype, dtype) - self.assertEqual(str(r.device), str(device)) - self.assertTrue(np.allclose(t_transform(r[:50]).mean(), 0, atol=0.2, rtol=0)) - self.assertTrue(np.allclose(t_transform(r[50:]).mean(), 1, atol=0.2, rtol=0)) - self.assertTrue( - np.allclose(t_transform(r).std(), std_transform(3), atol=0.2, rtol=0) - ) - - r.fill_(42) - flow.normal(mean, 3, out=r) - self.assertEqual(r.dtype, dtype) - self.assertEqual(str(r.device), str(device)) - self.assertTrue(np.allclose(t_transform(r[:50]).mean(), 0, atol=0.2, rtol=0)) - self.assertTrue(np.allclose(t_transform(r[50:]).mean(), 1, atol=0.2, rtol=0)) - self.assertTrue( - np.allclose(t_transform(r).std(), std_transform(3), atol=0.2, rtol=0) - ) - - r.fill_(42) - r = flow.normal(2, std) - self.assertEqual(str(r.device), str(device)) - self.assertTrue(np.allclose(r.mean().numpy(), 2, atol=0.2, rtol=0)) - self.assertTrue(np.allclose(r[:, :50].std().numpy(), 4, atol=0.3, rtol=0)) - self.assertTrue(np.allclose(r[:, 50:].std().numpy(), 1, atol=0.2, rtol=0)) - - r.fill_(42) - flow.normal(2, std, out=r) - self.assertFalse(r.dtype.is_complex) - self.assertEqual(str(r.device), str(device)) - self.assertTrue(np.allclose(r.mean().numpy(), 2, atol=0.2, rtol=0)) - self.assertTrue(np.allclose(r[:, :50].std().numpy(), 4, atol=0.3, rtol=0)) - self.assertTrue(np.allclose(r[:, 50:].std().numpy(), 1, atol=0.2, rtol=0)) - - r.fill_(42) - r = flow.normal(mean, std) - self.assertEqual(r.dtype, dtype) - self.assertEqual(str(r.device), str(device)) - self.assertTrue( - np.allclose(t_transform(r[:50]).mean().numpy(), 0, atol=0.2, rtol=0) - ) - self.assertTrue( - np.allclose(t_transform(r[50:]).mean().numpy(), 1, atol=0.2, rtol=0) - ) - self.assertTrue( - np.allclose( - t_transform(r[:, :50]).std().numpy(), std_transform(4), atol=0.3, rtol=0 - ) - ) - self.assertTrue( - np.allclose( - t_transform(r[:, 50:]).std().numpy(), std_transform(1), atol=0.2, rtol=0 - ) - ) - - r.fill_(42) - flow.normal(mean, std, out=r) - self.assertEqual(r.dtype, dtype) - self.assertEqual(str(r.device), str(device)) - self.assertTrue( - np.allclose(t_transform(r[:50]).mean().numpy(), 0, atol=0.2, rtol=0) - ) - self.assertTrue( - np.allclose(t_transform(r[50:]).mean().numpy(), 1, atol=0.2, rtol=0) - ) - self.assertTrue( - np.allclose( - t_transform(r[:, :50]).std().numpy(), std_transform(4), atol=0.3, rtol=0 - ) - ) - self.assertTrue( - np.allclose( - t_transform(r[:, 50:]).std().numpy(), std_transform(1), atol=0.2, rtol=0 - ) - ) - - # test empty mean/std - out = flow.normal(mean=flow.empty((0, 2)), std=flow.empty((0, 1))) - self.assertEqual(out.size(), flow.Size([0, 2])) - - r.fill_(42) - r = flow.normal(2, 3, (100, 100), dtype=dtype, device=device) - self.assertEqual(r.dtype, dtype) - self.assertEqual(str(r.device), str(device)) - self.assertTrue(np.allclose(t_transform(r).mean().numpy(), 2, atol=0.3, rtol=0)) - self.assertTrue( - np.allclose( - t_transform(r).std().numpy(), std_transform(3), atol=0.3, rtol=0 - ) - ) - - r.fill_(42) - flow.normal(2, 3, (100, 100), dtype=dtype, device=device, out=r) - self.assertEqual(r.dtype, dtype) - self.assertEqual(str(r.device), str(device)) - self.assertTrue(np.allclose(t_transform(r).mean().numpy(), 2, atol=0.3, rtol=0)) - self.assertTrue( - np.allclose( - t_transform(r).std().numpy(), std_transform(3), atol=0.3, rtol=0 - ) - ) - - # float std 0 with float mean - r.fill_(42) - r = flow.normal(2, 0, (10, 10), dtype=dtype, device=device) - self.assertEqual(r.dtype, dtype) - self.assertEqual(str(r.device), str(device)) - self.assertTrue(np.allclose(r.numpy(), 2)) - - # float std 0 with tensor mean - r.fill_(42) - mean_rand = flow.randn(10, 10, dtype=dtype, device=device) - flow.normal(mean_rand, 0, out=r) - self.assertEqual(r.dtype, dtype) - self.assertEqual(str(r.device), str(device)) - self.assertTrue(np.allclose(mean_rand.numpy(), r.numpy(), atol=0, rtol=0)) - - # tensor std 0 with float mean - r.fill_(42) - std_zeros = flow.zeros(10, 10, dtype=dtype, device=device) - flow.normal(2, std_zeros, out=r) - self.assertEqual(r.dtype, dtype) - self.assertEqual(str(r.device), str(device)) - self.assertTrue(np.allclose(r.numpy(), 2)) - - # tensor std 0 with tensor mean - r.fill_(42) - flow.normal(mean_rand, std_zeros, out=r) - self.assertEqual(r.dtype, dtype) - self.assertEqual(str(r.device), str(device)) - self.assertTrue(np.allclose(mean_rand.numpy(), r.numpy(), atol=0, rtol=0)) - - helper(test_case, device, dtype, lambda x: x, lambda t: t, lambda mean: mean) - - -def _test_with_generator(test_case, mean, std, shape, device, dtype): - dtype = type_name_to_flow_type[dtype] - gen = flow.Generator() - gen.manual_seed(0) - y1 = flow.normal( - mean, std, shape, generator=gen, dtype=dtype, device=flow.device(device) - ) - gen.manual_seed(0) - y2 = flow.normal( - mean, std, shape, generator=gen, dtype=dtype, device=flow.device(device) - ) - test_case.assertTrue(np.array_equal(y1.numpy(), y2.numpy())) - - -def _test_backward(test_case, mean, std, shape, device, dtype): - dtype = type_name_to_flow_type[dtype] - x = flow.normal( - mean, std, shape, dtype=dtype, device=flow.device(device), requires_grad=True - ) - y = x.sum() - y.backward() - test_case.assertTrue(np.array_equal(np.ones(shape), x.grad.numpy())) - - -@flow.unittest.skip_unless_1n1d() -class TestNormModule(flow.unittest.TestCase): - def test_norm(test_case): - arg_dict = OrderedDict() - arg_dict["fun"] = [_test_normal, _test_with_generator, _test_backward] - arg_dict["mean"] = [-1, 0, 1] - arg_dict["std"] = [1, 2, 8] - arg_dict["shape"] = [(2, 3), (2, 3, 4), (2, 3, 4, 5)] - arg_dict["device"] = ["cpu", "cuda"] - arg_dict["dtype"] = ["float32", "double"] - - for arg in GenArgList(arg_dict): - arg[0](test_case, *arg[1:]) - - -if __name__ == "__main__": - unittest.main() diff --git a/python/oneflow/test/modules/test_scaled_dot_product_attention.py b/python/oneflow/test/modules/test_scaled_dot_product_attention.py deleted file mode 100644 index 08bb9e45a2b..00000000000 --- a/python/oneflow/test/modules/test_scaled_dot_product_attention.py +++ /dev/null @@ -1,132 +0,0 @@ -""" -Copyright 2020 The OneFlow Authors. All rights reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" -import unittest -from collections import OrderedDict -import numpy as np -from oneflow.test_utils.test_util import GenArgList -import math -import os - -import oneflow as flow - - -def _scaled_dot_product_attention( - query, key, value, -): - # input dims will equal 3 or 4. - if key.ndim == 4: - key = key.permute(0, 1, 3, 2) - elif key.ndim == 3: - key = key.permute(0, 2, 1) - scores = flow.matmul(query, key) / math.sqrt(query.shape[-1]) - attn = flow.softmax(scores, dim=-1) - out = flow.matmul(attn, value) - return out - - -def _test_scaled_dot_product_attention( - test_case, batch_size, num_head_pair, seq_len_pair, head_size, dtype, -): - num_heads = num_head_pair[0] - num_heads_k = num_head_pair[1] - seq_len_q = seq_len_pair[0] - seq_len_kv = seq_len_pair[1] - query_raw = np.random.uniform( - low=-1, high=1, size=(batch_size, num_heads, seq_len_q, head_size) - ) - key_raw = np.random.uniform( - low=-1, high=1, size=(batch_size, num_heads_k, seq_len_kv, head_size) - ) - value_raw = np.random.uniform( - low=-1, high=1, size=(batch_size, num_heads_k, seq_len_kv, head_size) - ) - query_fused = flow.tensor(query_raw, dtype=dtype, device="cuda", requires_grad=True) - query_ref = flow.tensor(query_raw, dtype=dtype, device="cuda", requires_grad=True) - key_fused = flow.tensor(key_raw, dtype=dtype, device="cuda", requires_grad=True) - key_ref = flow.tensor(key_raw, dtype=dtype, device="cuda", requires_grad=True) - value_fused = flow.tensor(value_raw, dtype=dtype, device="cuda", requires_grad=True) - value_ref = flow.tensor(value_raw, dtype=dtype, device="cuda", requires_grad=True) - - fused_out = flow._C.scaled_dot_product_attention( - query=query_fused, key=key_fused, value=value_fused, - ) - if num_heads == num_heads_k: - ref_out = _scaled_dot_product_attention(query_ref, key_ref, value_ref,) - else: # For GQA - ref_out = flow.empty(query_fused.shape, device="cuda", dtype=dtype) - stride = num_heads / num_heads_k - for i in range(0, num_heads): - j = int(i / stride) - ref_out[:, i, :, :] = _scaled_dot_product_attention( - query_ref[:, i, :, :], key_ref[:, j, :, :], value_ref[:, j, :, :] - ) - - total_out = ref_out.sum() + fused_out.sum() - total_out.backward() - if dtype == flow.float16: - error_tol = 1e-2 - elif dtype == flow.bfloat16: - error_tol = 1e-1 - else: - error_tol = 1e-3 - - test_case.assertTrue( - np.allclose(ref_out.numpy(), fused_out.numpy(), atol=error_tol, rtol=error_tol) - ) - test_case.assertTrue( - np.allclose( - query_fused.grad.numpy(), - query_ref.grad.numpy(), - atol=error_tol, - rtol=error_tol, - ) - ) - test_case.assertTrue( - np.allclose( - key_fused.grad.numpy(), key_ref.grad.numpy(), atol=error_tol, rtol=error_tol - ) - ) - test_case.assertTrue( - np.allclose( - value_fused.grad.numpy(), - value_ref.grad.numpy(), - atol=error_tol, - rtol=error_tol, - ) - ) - - -@unittest.skipIf(os.getenv("ONEFLOW_TEST_CPU_ONLY"), "only test cpu cases") -@flow.unittest.skip_unless_1n1d() -class TestScaledDotProductAttention(flow.unittest.TestCase): - def test_scaled_dot_product_attention(test_case): - args_dict = OrderedDict() - args_dict["test_fun"] = [_test_scaled_dot_product_attention] - args_dict["batchsize"] = [1, 2, 4] - args_dict["num_head_pair"] = [[16, 16], [16, 8]] - args_dict["seqlen_pair"] = [[4096, 4096], [4096, 77], [1024, 1024], [1024, 77]] - args_dict["head_size"] = [40, 80, 160, 41] - args_dict["dtype"] = [flow.float16, flow.bfloat16] - - if flow._oneflow_internal.flags.with_cuda(): - if flow._oneflow_internal.flags.cuda_version() >= 11070: - if flow.cuda.get_device_capability()[0] >= 8: - for arg in GenArgList(args_dict): - arg[0](test_case, *arg[1:]) - - -if __name__ == "__main__": - unittest.main() From 4bdf038d050dfd3bc7ae7cbf4ae07e8cc4982357 Mon Sep 17 00:00:00 2001 From: Peihong Liu Date: Thu, 19 Dec 2024 15:21:47 +0800 Subject: [PATCH 07/10] for compiling with cuda 12.x (#10568) --- cmake/cuda.cmake | 10 ++++++++++ cmake/third_party/FindCUDNN.cmake | 9 ++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake index 578786351a0..63cdb68fb2a 100644 --- a/cmake/cuda.cmake +++ b/cmake/cuda.cmake @@ -89,6 +89,16 @@ if(BUILD_CUDA) # GeForce RTX 30xx list(APPEND CMAKE_CUDA_ARCHITECTURES 86-real) endif() + + if(CUDA_VERSION VERSION_GREATER_EQUAL "11.8") + # GeForce RTX 40xx + list(APPEND CMAKE_CUDA_ARCHITECTURES 89-real) + endif() + + if(CUDA_VERSION VERSION_GREATER_EQUAL "12.0") + # H100, H20 + list(APPEND CMAKE_CUDA_ARCHITECTURES 90-real) + endif() endif() foreach(CUDA_ARCH ${CMAKE_CUDA_ARCHITECTURES}) diff --git a/cmake/third_party/FindCUDNN.cmake b/cmake/third_party/FindCUDNN.cmake index 3e076419a3a..716898e6dd7 100644 --- a/cmake/third_party/FindCUDNN.cmake +++ b/cmake/third_party/FindCUDNN.cmake @@ -66,7 +66,14 @@ if(CUDNN_FOUND) set(CUDNN_INCLUDE_DIRS ${CUDNN_INCLUDE_DIR}) - if(NOT CUDNN_STATIC AND CUDNN_VERSION_MAJOR GREATER_EQUAL 8) + if(NOT CUDNN_STATIC AND CUDNN_VERSION_MAJOR GREATER_EQUAL 9) + # skipping: libcudnn_adv_infer.so libcudnn_adv_train.so + set(CUDNN_DYNAMIC_NAMES libcudnn_cnn.so libcudnn_ops.so) + get_filename_component(CUDNN_LIBRARY_DIRECTORY ${CUDNN_LIBRARY} DIRECTORY) + foreach(CUDNN_DYNAMIC_NAME ${CUDNN_DYNAMIC_NAMES}) + list(APPEND CUDNN_LIBRARIES ${CUDNN_LIBRARY_DIRECTORY}/${CUDNN_DYNAMIC_NAME}) + endforeach() + elseif(NOT CUDNN_STATIC AND CUDNN_VERSION_MAJOR GREATER_EQUAL 8) # skipping: libcudnn_adv_infer.so libcudnn_adv_train.so set(CUDNN_DYNAMIC_NAMES libcudnn_cnn_infer.so libcudnn_cnn_train.so libcudnn_ops_infer.so libcudnn_ops_train.so) From f7fa76fda283a7bc9c4c5846caf398206ef78abd Mon Sep 17 00:00:00 2001 From: Frank J <53087374+crazy-JiangDongHua@users.noreply.github.com> Date: Thu, 26 Dec 2024 11:01:36 +0800 Subject: [PATCH 08/10] fix bug of group norm and layer norm for npu (#10609) Co-authored-by: oneflow-ci-bot --- oneflow/user/ops/group_norm_op.cpp | 3 +++ oneflow/user/ops/layer_norm_op.cpp | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/oneflow/user/ops/group_norm_op.cpp b/oneflow/user/ops/group_norm_op.cpp index db97f13adf3..cd748943622 100644 --- a/oneflow/user/ops/group_norm_op.cpp +++ b/oneflow/user/ops/group_norm_op.cpp @@ -19,9 +19,12 @@ limitations under the License. namespace oneflow { +DEFINE_ENV_BOOL(ONEFLOW_GROUP_NORM_USE_FP16_DIRECTLY, false); + namespace { oneflow::DataType InferGnParamDataType(const DataType x_data_type) { + if (EnvBool()) { return x_data_type; } return (x_data_type == DataType::kFloat16 || x_data_type == DataType::kBFloat16) ? DataType::kFloat : x_data_type; diff --git a/oneflow/user/ops/layer_norm_op.cpp b/oneflow/user/ops/layer_norm_op.cpp index 55eec5bb2a9..6a6d9f85e70 100644 --- a/oneflow/user/ops/layer_norm_op.cpp +++ b/oneflow/user/ops/layer_norm_op.cpp @@ -18,6 +18,8 @@ limitations under the License. namespace oneflow { +DEFINE_ENV_BOOL(ONEFLOW_LAYER_NORM_PARAM_KEEP_DIM, false); + namespace { int64_t ShiftNegativeAxisIfNeed(const Shape& shape, int64_t axis) { @@ -31,6 +33,11 @@ Shape InferBnParamShape(const Shape& x_shape, const int64_t begin_norm_axis) { DimVector bn_param_shape_dim_vec; bn_param_shape_dim_vec.insert(bn_param_shape_dim_vec.end(), x_shape.dim_vec().cbegin(), x_shape.dim_vec().cbegin() + begin_norm_axis); + if (EnvBool()) { + while (bn_param_shape_dim_vec.size() < x_shape.dim_vec().size()) { + bn_param_shape_dim_vec.push_back(1); + } + } const Shape bn_param_shape(bn_param_shape_dim_vec); return bn_param_shape; } From 998dc17aa6ac62a18dbe894337ee288c2434fa86 Mon Sep 17 00:00:00 2001 From: Luyang Date: Thu, 26 Dec 2024 15:11:39 +0800 Subject: [PATCH 09/10] fix device_type2sub_tsk_gph_builder_ (#10563) --- oneflow/core/graph/task_graph.cpp | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/oneflow/core/graph/task_graph.cpp b/oneflow/core/graph/task_graph.cpp index 10280a8dfe5..082b45ba845 100644 --- a/oneflow/core/graph/task_graph.cpp +++ b/oneflow/core/graph/task_graph.cpp @@ -880,14 +880,16 @@ DEFINE_BLD_SUB_TASK_GRAPH_METHOD(BldSubTskGphByBoxing) { if (device_type != DeviceType::kCPU && device_type2sub_tsk_gph_builder_.find(device_type) != device_type2sub_tsk_gph_builder_.end()) { - status = CHECK_JUST( // NOLINT + auto maybe_status = // NOLINT device_type2sub_tsk_gph_builder_ // NOLINT .at(device_type) // NOLINT ->Build(sub_tsk_gph_builder_ctx_.get(), in_nodes, &out_nodes, // NOLINT &sorted_ctrl_tasks, src_parallel_desc, dst_parallel_desc, lbi, // NOLINT blob_desc, src_nd_sbp, dst_nd_sbp, // NOLINT - *(CHECK_JUST(src_op_node->op().GetOpTimeShape()).get()))); // NOLINT - } else { + *(CHECK_JUST(src_op_node->op().GetOpTimeShape()).get())); // NOLINT + if (maybe_status.IsOk()) { status = CHECK_JUST(maybe_status); } + } + if (!status) { status = CHECK_JUST(hierarchical_sub_tsk_gph_builder_->Build( sub_tsk_gph_builder_ctx_.get(), in_nodes, &out_nodes, &sorted_ctrl_tasks, src_parallel_desc, dst_parallel_desc, lbi, blob_desc, src_nd_sbp, dst_nd_sbp, @@ -1052,6 +1054,12 @@ Maybe GlobalTaskGraph::Init() { OpGraph* op_graph = Singleton::Get(); sub_tsk_gph_builder_ctx_.reset(new SubTskGphBuilderCtx(this)); boxing_logger_ = CreateBoxingLogger(); + // Register the corresponding task graph builder based on the device type and store them to map + const auto* global_device_type_create_sub_tsk_gph_builder_fn = + GlobalDeviceType2CreateSubTskGphBuilderFn(); + for (const auto& pair : *global_device_type_create_sub_tsk_gph_builder_fn) { + device_type2sub_tsk_gph_builder_.emplace(pair.first, pair.second()); + } hierarchical_sub_tsk_gph_builder_.reset(new DispatchHierarchicalSubTskGphBuilder()); HashMap> op_node2sorted_comp_tasks; @@ -1088,6 +1096,13 @@ Maybe BoxingTaskGraph::Init( OpGraph* op_graph = Singleton::Get(); sub_tsk_gph_builder_ctx_.reset(new SubTskGphBuilderCtx(this)); boxing_logger_ = CreateBoxingLogger(); + // Register the corresponding task graph builder based on the device type and store them to map + const auto* global_device_type_create_sub_tsk_gph_builder_fn = + GlobalDeviceType2CreateSubTskGphBuilderFn(); + for (const auto& pair : *global_device_type_create_sub_tsk_gph_builder_fn) { + device_type2sub_tsk_gph_builder_.emplace(pair.first, pair.second()); + } + hierarchical_sub_tsk_gph_builder_.reset(new DispatchHierarchicalSubTskGphBuilder()); const auto& TryCreateSortedCompTaskNodes = [&](const OpNode* op_node) { From 9358ac78eb029c70a4227ebd59f8547433182fc2 Mon Sep 17 00:00:00 2001 From: Shenghang Tsai Date: Fri, 27 Dec 2024 11:30:46 +0800 Subject: [PATCH 10/10] Only release custom OneFlow once a week (#10611) --- .github/workflows/community_release.yml | 2 +- .github/workflows/priv_release.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/community_release.yml b/.github/workflows/community_release.yml index af2fa6ef516..ab588d08463 100644 --- a/.github/workflows/community_release.yml +++ b/.github/workflows/community_release.yml @@ -7,7 +7,7 @@ on: schedule: # beijing: 6 pm. # utc: 10 am. - - cron: "0 10 * * *" + - cron: "0 10 * * sat" workflow_dispatch: inputs: priv_branch: diff --git a/.github/workflows/priv_release.yml b/.github/workflows/priv_release.yml index 5bdf4afea36..1e65b383a28 100644 --- a/.github/workflows/priv_release.yml +++ b/.github/workflows/priv_release.yml @@ -7,7 +7,7 @@ on: schedule: # beijing: 12 pm. # utc: 4 am. - - cron: "0 4 * * *" + - cron: "0 4 * * sun" workflow_dispatch: inputs: priv_branch: