diff --git a/.github/scripts/validate_binaries.sh b/.github/scripts/validate_binaries.sh index a2eecb8693..2b9983fd13 100755 --- a/.github/scripts/validate_binaries.sh +++ b/.github/scripts/validate_binaries.sh @@ -21,15 +21,18 @@ else # Conda pinned see issue: https://github.com/ContinuumIO/anaconda-issues/issues/13350 conda install -y conda=23.11.0 fi - # Please note ffmpeg is required for torchaudio, see https://github.com/pytorch/pytorch/issues/96159 - conda create -y -n ${ENV_NAME} python=${MATRIX_PYTHON_VERSION} numpy ffmpeg - conda activate ${ENV_NAME} - # Remove when https://github.com/pytorch/builder/issues/1985 is fixed - if [[ ${MATRIX_GPU_ARCH_TYPE} == 'cuda-aarch64' ]]; then - pip3 install numpy --force-reinstall + if [[ ${MATRIX_PYTHON_VERSION} == "3.13t" ]]; then + conda create -y -n ${ENV_NAME} python=3.13 python-freethreading -c conda-forge + conda activate ${ENV_NAME} + TORCH_ONLY='true' + else + # Please note ffmpeg is required for torchaudio, see https://github.com/pytorch/pytorch/issues/96159 + conda create -y -n ${ENV_NAME} python=${MATRIX_PYTHON_VERSION} ffmpeg + conda activate ${ENV_NAME} fi + pip3 install numpy --force-reinstall INSTALLATION=${MATRIX_INSTALLATION/"conda install"/"conda install -y"} TEST_SUFFIX="" @@ -79,8 +82,8 @@ else pushd ${PWD}/.ci/pytorch/ - # TODO: enable torch-compile on ROCM - if [[ ${MATRIX_GPU_ARCH_TYPE} == "rocm" ]]; then + # TODO: enable torch-compile on ROCM and on 3.13t + if [[ ${MATRIX_GPU_ARCH_TYPE} == "rocm" || ${MATRIX_PYTHON_VERSION} == "3.13t" ]]; then TEST_SUFFIX=${TEST_SUFFIX}" --torch-compile-check disabled" fi diff --git a/.github/workflows/validate-linux-binaries.yml b/.github/workflows/validate-linux-binaries.yml index 036c51d425..3e1d698c4d 100644 --- a/.github/workflows/validate-linux-binaries.yml +++ b/.github/workflows/validate-linux-binaries.yml @@ -158,9 +158,10 @@ jobs: eval "$(conda shell.bash hook)" printf '%s\n' ${{ toJson(inputs.release-matrix) }} > release_matrix.json + CUDA_VERSION_STABLE=$(python3 ../../test-infra/tools/scripts/get_stable_cuda_version.py --channel ${MATRIX_CHANNEL}) # Special case PyPi installation package. And Install of PyPi package via poetry if [[ ${MATRIX_PACKAGE_TYPE} == "manywheel" && \ - ${MATRIX_GPU_ARCH_VERSION} == "12.4" && \ + ${MATRIX_GPU_ARCH_VERSION} == "${CUDA_VERSION_STABLE}" && \ ${MATRIX_CHANNEL} == "release" && \ ${USE_ONLY_DL_PYTORCH_ORG} == "false" ]]; then source ../../test-infra/.github/scripts/validate_pipy.sh @@ -183,15 +184,16 @@ jobs: timeout: 180 script: | set -ex - cd .ci/pytorch/ python3 -m ensurepip --upgrade + CUDA_VERSION=$(python3 ../../test-infra/tools/scripts/get_stable_cuda_version.py --channel ${{ inputs.channel }}) + CUDA_VERSION_NODOT=$(echo $CUDA_VERSION | tr -d '.') - DWN_PYTORCH_ORG="https://download.pytorch.org/whl/nightly/cu124" + DWN_PYTORCH_ORG="https://download.pytorch.org/whl/nightly/cu${CUDA_VERSION_NODOT}" if [[ ${{ inputs.channel }} == 'test' ]]; then - DWN_PYTORCH_ORG="https://download.pytorch.org/whl/test/cu124" + DWN_PYTORCH_ORG="https://download.pytorch.org/whl/test/cu${CUDA_VERSION_NODOT}" elif [[ ${{ inputs.channel }} == 'release' ]]; then - DWN_PYTORCH_ORG="https://download.pytorch.org/whl/cu124" + DWN_PYTORCH_ORG="https://download.pytorch.org/whl/cu${CUDA_VERSION_NODOT}" fi python3 -m pip install torch --index-url ${DWN_PYTORCH_ORG} - python3 -c "import torch" + python3 .ci/pytorch/smoke_test/smoke_test.py --package torchonly diff --git a/.github/workflows/validate-pypi-wheel-binary-size.yml b/.github/workflows/validate-pypi-wheel-binary-size.yml index c2b3bbab90..348982bd34 100644 --- a/.github/workflows/validate-pypi-wheel-binary-size.yml +++ b/.github/workflows/validate-pypi-wheel-binary-size.yml @@ -32,7 +32,12 @@ jobs: pip3 install -r tools/binary_size_validation/requirements.txt - name: Run validation run: | + # shellcheck disable=SC2086 + CUDA_VERSION=$(python3 tools/scripts/get_stable_cuda_version.py --channel ${CHANNEL}) + # shellcheck disable=SC2086 + CUDA_VERSION_NODOT=$(echo ${CUDA_VERSION} | tr -d '.') + # shellcheck disable=SC2086 python tools/binary_size_validation/binary_size_validation.py \ - --url https://download.pytorch.org/whl/${CHANNEL}/cu124/torch/ \ - --include "linux" --only-latest-version --threshold 750 + --url https://download.pytorch.org/whl/${CHANNEL}/cu${CUDA_VERSION_NODOT}/torch/ \ + --include "manylinux" --only-latest-version --threshold 750 diff --git a/tools/scripts/generate_binary_build_matrix.py b/tools/scripts/generate_binary_build_matrix.py index 2919647e8b..fd48eec6db 100755 --- a/tools/scripts/generate_binary_build_matrix.py +++ b/tools/scripts/generate_binary_build_matrix.py @@ -44,6 +44,12 @@ "12.8": {"cuda": "12.8.0", "cudnn": "9"}, } +STABLE_CUDA_VERSIONS = { + "nightly": "12.6", + "test": "12.4", + "release": "12.4", +} + CUDA_AARCH64_ARCHES = ["12.6-aarch64", "12.8-aarch64"] PACKAGE_TYPES = ["wheel", "conda", "libtorch"] diff --git a/tools/scripts/get_stable_cuda_version.py b/tools/scripts/get_stable_cuda_version.py new file mode 100644 index 0000000000..f1c85c0f2b --- /dev/null +++ b/tools/scripts/get_stable_cuda_version.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 + +"""Return stable CUDA version for the current channel""" + +import argparse +import json +import os +import sys +from typing import List + + +def main(args: List[str]) -> None: + import generate_binary_build_matrix + + parser = argparse.ArgumentParser() + parser.add_argument( + "--channel", + help="Channel to use, default nightly", + type=str, + choices=["nightly", "test", "release"], + default=os.getenv("CHANNEL", "nightly"), + ) + options = parser.parse_args(args) + print(generate_binary_build_matrix.STABLE_CUDA_VERSIONS[options.channel]) + + +if __name__ == "__main__": + main(sys.argv[1:])