Skip to content

Commit

Permalink
Validate framework changes, intorduce get_stable_cuda.py add py3.13t …
Browse files Browse the repository at this point in the history
…required changes (#6339)

1. Introduce get_stable_cuda, this way we don't need to hardcode current
stable cuda version in validations
2. Add py3.13t versions
  • Loading branch information
atalman authored Feb 26, 2025
1 parent 2a18211 commit 607e1b0
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 16 deletions.
19 changes: 11 additions & 8 deletions .github/scripts/validate_binaries.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,18 @@ else
# Conda pinned see issue: https://github.com/ContinuumIO/anaconda-issues/issues/13350
conda install -y conda=23.11.0
fi
# Please note ffmpeg is required for torchaudio, see https://github.com/pytorch/pytorch/issues/96159
conda create -y -n ${ENV_NAME} python=${MATRIX_PYTHON_VERSION} numpy ffmpeg
conda activate ${ENV_NAME}

# Remove when https://github.com/pytorch/builder/issues/1985 is fixed
if [[ ${MATRIX_GPU_ARCH_TYPE} == 'cuda-aarch64' ]]; then
pip3 install numpy --force-reinstall
if [[ ${MATRIX_PYTHON_VERSION} == "3.13t" ]]; then
conda create -y -n ${ENV_NAME} python=3.13 python-freethreading -c conda-forge
conda activate ${ENV_NAME}
TORCH_ONLY='true'
else
# Please note ffmpeg is required for torchaudio, see https://github.com/pytorch/pytorch/issues/96159
conda create -y -n ${ENV_NAME} python=${MATRIX_PYTHON_VERSION} ffmpeg
conda activate ${ENV_NAME}
fi

pip3 install numpy --force-reinstall
INSTALLATION=${MATRIX_INSTALLATION/"conda install"/"conda install -y"}
TEST_SUFFIX=""

Expand Down Expand Up @@ -79,8 +82,8 @@ else

pushd ${PWD}/.ci/pytorch/

# TODO: enable torch-compile on ROCM
if [[ ${MATRIX_GPU_ARCH_TYPE} == "rocm" ]]; then
# TODO: enable torch-compile on ROCM and on 3.13t
if [[ ${MATRIX_GPU_ARCH_TYPE} == "rocm" || ${MATRIX_PYTHON_VERSION} == "3.13t" ]]; then
TEST_SUFFIX=${TEST_SUFFIX}" --torch-compile-check disabled"
fi

Expand Down
14 changes: 8 additions & 6 deletions .github/workflows/validate-linux-binaries.yml
Original file line number Diff line number Diff line change
Expand Up @@ -158,9 +158,10 @@ jobs:
eval "$(conda shell.bash hook)"
printf '%s\n' ${{ toJson(inputs.release-matrix) }} > release_matrix.json
CUDA_VERSION_STABLE=$(python3 ../../test-infra/tools/scripts/get_stable_cuda_version.py --channel ${MATRIX_CHANNEL})
# Special case PyPi installation package. And Install of PyPi package via poetry
if [[ ${MATRIX_PACKAGE_TYPE} == "manywheel" && \
${MATRIX_GPU_ARCH_VERSION} == "12.4" && \
${MATRIX_GPU_ARCH_VERSION} == "${CUDA_VERSION_STABLE}" && \
${MATRIX_CHANNEL} == "release" && \
${USE_ONLY_DL_PYTORCH_ORG} == "false" ]]; then
source ../../test-infra/.github/scripts/validate_pipy.sh
Expand All @@ -183,15 +184,16 @@ jobs:
timeout: 180
script: |
set -ex
cd .ci/pytorch/
python3 -m ensurepip --upgrade
CUDA_VERSION=$(python3 ../../test-infra/tools/scripts/get_stable_cuda_version.py --channel ${{ inputs.channel }})
CUDA_VERSION_NODOT=$(echo $CUDA_VERSION | tr -d '.')
DWN_PYTORCH_ORG="https://download.pytorch.org/whl/nightly/cu124"
DWN_PYTORCH_ORG="https://download.pytorch.org/whl/nightly/cu${CUDA_VERSION_NODOT}"
if [[ ${{ inputs.channel }} == 'test' ]]; then
DWN_PYTORCH_ORG="https://download.pytorch.org/whl/test/cu124"
DWN_PYTORCH_ORG="https://download.pytorch.org/whl/test/cu${CUDA_VERSION_NODOT}"
elif [[ ${{ inputs.channel }} == 'release' ]]; then
DWN_PYTORCH_ORG="https://download.pytorch.org/whl/cu124"
DWN_PYTORCH_ORG="https://download.pytorch.org/whl/cu${CUDA_VERSION_NODOT}"
fi
python3 -m pip install torch --index-url ${DWN_PYTORCH_ORG}
python3 -c "import torch"
python3 .ci/pytorch/smoke_test/smoke_test.py --package torchonly
9 changes: 7 additions & 2 deletions .github/workflows/validate-pypi-wheel-binary-size.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,12 @@ jobs:
pip3 install -r tools/binary_size_validation/requirements.txt
- name: Run validation
run: |
# shellcheck disable=SC2086
CUDA_VERSION=$(python3 tools/scripts/get_stable_cuda_version.py --channel ${CHANNEL})
# shellcheck disable=SC2086
CUDA_VERSION_NODOT=$(echo ${CUDA_VERSION} | tr -d '.')
# shellcheck disable=SC2086
python tools/binary_size_validation/binary_size_validation.py \
--url https://download.pytorch.org/whl/${CHANNEL}/cu124/torch/ \
--include "linux" --only-latest-version --threshold 750
--url https://download.pytorch.org/whl/${CHANNEL}/cu${CUDA_VERSION_NODOT}/torch/ \
--include "manylinux" --only-latest-version --threshold 750
6 changes: 6 additions & 0 deletions tools/scripts/generate_binary_build_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@
"12.8": {"cuda": "12.8.0", "cudnn": "9"},
}

STABLE_CUDA_VERSIONS = {
"nightly": "12.6",
"test": "12.4",
"release": "12.4",
}

CUDA_AARCH64_ARCHES = ["12.6-aarch64", "12.8-aarch64"]

PACKAGE_TYPES = ["wheel", "conda", "libtorch"]
Expand Down
28 changes: 28 additions & 0 deletions tools/scripts/get_stable_cuda_version.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/usr/bin/env python3

"""Return stable CUDA version for the current channel"""

import argparse
import json
import os
import sys
from typing import List


def main(args: List[str]) -> None:
import generate_binary_build_matrix

parser = argparse.ArgumentParser()
parser.add_argument(
"--channel",
help="Channel to use, default nightly",
type=str,
choices=["nightly", "test", "release"],
default=os.getenv("CHANNEL", "nightly"),
)
options = parser.parse_args(args)
print(generate_binary_build_matrix.STABLE_CUDA_VERSIONS[options.channel])


if __name__ == "__main__":
main(sys.argv[1:])

0 comments on commit 607e1b0

Please sign in to comment.