Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Validate framework changes, intorduce get_stable_cuda.py add py3.13t required changes #6339

Merged
merged 10 commits into from
Feb 26, 2025
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 11 additions & 8 deletions .github/scripts/validate_binaries.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,18 @@ else
# Conda pinned see issue: https://github.com/ContinuumIO/anaconda-issues/issues/13350
conda install -y conda=23.11.0
fi
# Please note ffmpeg is required for torchaudio, see https://github.com/pytorch/pytorch/issues/96159
conda create -y -n ${ENV_NAME} python=${MATRIX_PYTHON_VERSION} numpy ffmpeg
conda activate ${ENV_NAME}

# Remove when https://github.com/pytorch/builder/issues/1985 is fixed
if [[ ${MATRIX_GPU_ARCH_TYPE} == 'cuda-aarch64' ]]; then
pip3 install numpy --force-reinstall
if [[ ${MATRIX_PYTHON_VERSION} == "3.13t" ]]; then
conda create -y -n ${ENV_NAME} python=3.13 python-freethreading -c conda-forge
conda activate ${ENV_NAME}
TORCH_ONLY='true'
else
# Please note ffmpeg is required for torchaudio, see https://github.com/pytorch/pytorch/issues/96159
conda create -y -n ${ENV_NAME} python=${MATRIX_PYTHON_VERSION} ffmpeg
conda activate ${ENV_NAME}
fi

pip3 install numpy --force-reinstall
INSTALLATION=${MATRIX_INSTALLATION/"conda install"/"conda install -y"}
TEST_SUFFIX=""

Expand Down Expand Up @@ -79,8 +82,8 @@ else

pushd ${PWD}/.ci/pytorch/

# TODO: enable torch-compile on ROCM
if [[ ${MATRIX_GPU_ARCH_TYPE} == "rocm" ]]; then
# TODO: enable torch-compile on ROCM and on 3.13t
if [[ ${MATRIX_GPU_ARCH_TYPE} == "rocm" || ${MATRIX_PYTHON_VERSION} == "3.13t"]]; then
TEST_SUFFIX=${TEST_SUFFIX}" --torch-compile-check disabled"
fi

Expand Down
9 changes: 6 additions & 3 deletions .github/workflows/validate-linux-binaries.yml
Original file line number Diff line number Diff line change
Expand Up @@ -185,12 +185,15 @@ jobs:
set -ex
cd .ci/pytorch/
python3 -m ensurepip --upgrade
export MATRIX_GPU_ARCH_VERSION=$(python3 .../../test-infra/tools/scripts/get_stable_cuda_version.py --channel ${{ inputs.channel }})
export MATRIX_GPU_ARCH_TYPE="cuda"
export CUDA_VERSION_NODOT=$(echo $MATRIX_GPU_ARCH_VERSION | tr -d '.')

DWN_PYTORCH_ORG="https://download.pytorch.org/whl/nightly/cu124"
DWN_PYTORCH_ORG="https://download.pytorch.org/whl/nightly/cu${CUDA_VERSION_NODOT}"
if [[ ${{ inputs.channel }} == 'test' ]]; then
DWN_PYTORCH_ORG="https://download.pytorch.org/whl/test/cu124"
DWN_PYTORCH_ORG="https://download.pytorch.org/whl/test/cu${CUDA_VERSION_NODOT}"
elif [[ ${{ inputs.channel }} == 'release' ]]; then
DWN_PYTORCH_ORG="https://download.pytorch.org/whl/cu124"
DWN_PYTORCH_ORG="https://download.pytorch.org/whl/cu${CUDA_VERSION_NODOT}"
fi

python3 -m pip install torch --index-url ${DWN_PYTORCH_ORG}
Expand Down
5 changes: 4 additions & 1 deletion .github/workflows/validate-pypi-wheel-binary-size.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ jobs:
- name: Run validation
run: |
# shellcheck disable=SC2086
CUDA_VERSION=$(python3 tools/scripts/get_stable_cuda_version.py --channel ${{ inputs.channel }})
CUDA_VERSION_NODOT=$(echo $CUDA_VERSION | tr -d '.')

python tools/binary_size_validation/binary_size_validation.py \
--url https://download.pytorch.org/whl/${CHANNEL}/cu124/torch/ \
--url https://download.pytorch.org/whl/${CHANNEL}/cu${CUDA_VERSION_NODOT}/torch/ \
--include "linux" --only-latest-version --threshold 750
8 changes: 8 additions & 0 deletions tools/scripts/generate_binary_build_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@
"12.8": {"cuda": "12.8.0", "cudnn": "9"},
}

STABLE_CUDA_VERSIONS = {
"nightly": "12.6",
"test": "12.4",
"release": "12.4",
}

CUDA_AARCH64_ARCHES = ["12.6-aarch64", "12.8-aarch64"]

PACKAGE_TYPES = ["wheel", "conda", "libtorch"]
Expand Down Expand Up @@ -98,6 +104,8 @@
ENABLE = "enable"
DISABLE = "disable"

def get_stable_cuda_version() -> str:
return STABLE_CUDA_VERSION["nightly"]

def arch_type(arch_version: str) -> str:
if arch_version in CUDA_ARCHES:
Expand Down
28 changes: 28 additions & 0 deletions tools/scripts/get_stable_cuda_version.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/usr/bin/env python3

"""Return stable CUDA version for the current channel
"""

import argparse
import json
import os
import sys
from typing import List

def main(args: List[str]) -> None:
import generate_binary_build_matrix

parser = argparse.ArgumentParser()
parser.add_argument(
"--channel",
help="Channel to use, default nightly",
type=str,
choices=["nightly", "test", "release", "all"],
default=os.getenv("CHANNEL", "nightly"),
)
options = parser.parse_args(args)
print(generate_binary_build_matrix.STABLE_CUDA_VERSIONS[options.channel])


if __name__ == "__main__":
main(sys.argv[1:])
Loading