Skip to content

Commit

Permalink
Migrate update-s3-html job to test-infra (#4879)
Browse files Browse the repository at this point in the history
  • Loading branch information
atalman authored Jan 15, 2024
1 parent 8acbaa9 commit ca6be2a
Show file tree
Hide file tree
Showing 5 changed files with 631 additions and 0 deletions.
45 changes: 45 additions & 0 deletions .github/workflows/update-s3-html.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
name: Update S3 HTML indices for download.pytorch.org

on:
schedule:
# Update the indices every 30 minutes
- cron: "*/30 * * * *"
workflow_dispatch:

permissions:
id-token: write
contents: read

jobs:
update:
runs-on: ubuntu-22.04
environment: pytorchbot-env
strategy:
matrix:
prefix: ["whl", "whl/test", "whl/nightly", "whl/lts/1.8"]
fail-fast: False
container:
image: continuumio/miniconda3:4.12.0
steps:
- name: configure aws credentials
id: aws_creds
uses: aws-actions/configure-aws-credentials@v3
with:
role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_s3_update
aws-region: us-east-1
- name: Checkout repository test-infra
uses: actions/checkout@v3
with:
repository: pytorch/test-infra
ref: ${{ github.ref }}
- name: Update s3 html index
run: |
set -ex
# Create Conda Environment
conda create --quiet -y --prefix run_env python="3.8"
conda activate ./run_env
# Install requirements
pip install -r s3_management/requirements.txt
python s3_management/manage.py --generate-pep503 ${{ matrix.prefix }}
3 changes: 3 additions & 0 deletions s3_management/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# s3_management

This directory houses scripts to maintain the s3 HTML indices for https://download.pytorch.org/whl
73 changes: 73 additions & 0 deletions s3_management/backup_conda.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#!/usr/bin/env python3
# Downloads domain pytorch and library packages from channel
# And backs them up to S3
# Do not use unless you know what you are doing
# Usage: python backup_conda.py --version 1.6.0

import boto3
from typing import List, Optional
import conda.api
import urllib
import os
import hashlib
import argparse

S3 = boto3.resource('s3')
BUCKET = S3.Bucket('pytorch-backup')
_known_subdirs = ["linux-64", "osx-64", "osx-arm64", "win-64"]


def compute_md5(path:str) -> str:
with open(path, "rb") as f:
return hashlib.md5(f.read()).hexdigest()


def download_conda_package(package:str, version:Optional[str] = None,
depends:Optional[str] = None, channel:Optional[str] = None) -> List[str]:
packages = conda.api.SubdirData.query_all(package,
channels = [channel] if channel is not None else None,
subdirs = _known_subdirs)
rc = []

for pkg in packages:
if version is not None and pkg.version != version:
continue
if depends is not None and depends not in pkg.depends:
continue

print(f"Downloading {pkg.url}...")
os.makedirs(pkg.subdir, exist_ok = True)
fname = f"{pkg.subdir}/{pkg.fn}"
if not os.path.exists(fname):
with open(fname, "wb") as f, urllib.request.urlopen(pkg.url) as url:
f.write(url.read())
if compute_md5(fname) != pkg.md5:
print(f"md5 of {fname} is {compute_md5(fname)} does not match {pkg.md5}")
continue
rc.append(fname)

return rc

def upload_to_s3(prefix: str, fnames: List[str]) -> None:
for fname in fnames:
BUCKET.upload_file(fname, f"{prefix}/{fname}")
print(fname)



if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--version",
help="PyTorch Version to backup",
type=str,
required = True
)
options = parser.parse_args()
rc = download_conda_package("pytorch", channel = "pytorch", version = options.version)
upload_to_s3(f"v{options.version}/conda", rc)

for libname in ["torchvision", "torchaudio", "torchtext"]:
print(f"processing {libname}")
rc = download_conda_package(libname, channel = "pytorch", depends = f"pytorch {options.version}")
upload_to_s3(f"v{options.version}/conda", rc)
Loading

0 comments on commit ca6be2a

Please sign in to comment.