diff --git a/.circleci/config.yml b/.circleci/config.yml index 56202b7..acd6ffa 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,5 +1,18 @@ version: 2.1 +executors: + windows-cpu: + machine: + resource_class: windows.xlarge + image: windows-server-2019-vs2019:stable + shell: bash.exe + + windows-gpu: + machine: + resource_class: windows.gpu.nvidia.medium + image: windows-server-2019-nvidia:stable + shell: bash.exe + commands: checkout_merge: @@ -29,48 +42,6 @@ commands: fi echo "export UPLOAD_CHANNEL=${our_upload_channel}" >> ${BASH_ENV} - install-conda: - steps: - - run: - name: Installing Conda - command: | - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh - bash Miniconda3-latest-Linux-x86_64.sh -b -p $HOME/conda - export PATH=$HOME/conda/bin:$PATH - echo 'export PATH=$HOME/conda/bin:$PATH' >> $BASH_ENV - conda install -y python=3 - - install-pytorch: - steps: - - run: - name: Installing PyTorch - command: | - conda install pytorch -yc pytorch-nightly - conda install -y numpy scipy - - install-cuda: - steps: - - run: - name: Installing CUDA - command: | - wget http://developer.download.nvidia.com/compute/cuda/10.2/Prod/local_installers/cuda-repo-ubuntu1604-10-2-local-10.2.89-440.33.01_1.0-1_amd64.deb - sudo dpkg -i cuda-repo-ubuntu1604-10-2-local-10.2.89-440.33.01_1.0-1_amd64.deb - sudo apt-key add /var/cuda-repo-10-2-local-10.2.89-440.33.01/7fa2af80.pub - sudo apt-get update - sudo apt-get -y install cuda - - build-csprng: - steps: - - run: - name: Building CSPRNG - command: python setup.py install - - run-tests: - steps: - - run: - name: Running tests - command: python test/test_csprng.py - binary_common: &binary_common parameters: # Edit these defaults to do a release` @@ -105,48 +76,6 @@ binary_common: &binary_common jobs: - build-nvcc: - machine: - image: ubuntu-1604:201903-01 - resource_class: gpu.small - steps: - - checkout - - install-conda - - install-pytorch - - install-cuda - - build-csprng - - run-tests - - build-cc: - machine: - image: ubuntu-1604:201903-01 - resource_class: large - steps: - - checkout - - install-conda - - install-pytorch - - build-csprng - - run-tests - - build-cc-pip: - machine: - image: ubuntu-1604:201903-01 - resource_class: large - steps: - - checkout - - run: - name: Setting Python 3 and upgrade pip - command: | - pyenv local 3.7.0 - pip install -U pip - - run: - name: Installing PyTorch - command: | - pip install --pre torch -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html - pip install numpy scipy - - build-csprng - - run-tests - binary_linux_wheel: <<: *binary_common docker: @@ -169,7 +98,9 @@ jobs: resource_class: 2xlarge+ steps: - checkout_merge - - run: packaging/build_conda.sh + - run: + no_output_timeout: 20m + command: packaging/build_conda.sh - store_artifacts: path: /opt/conda/conda-bld/linux-64 - persist_to_workspace: @@ -179,58 +110,63 @@ jobs: - store_test_results: path: build_results/ -# binary_win_conda: -# <<: *binary_common -# executor: windows-cpu -# steps: -# - checkout_merge -# - run: -# name: Build conda packages -# command: | -# set -ex -# source packaging/windows/internal/vc_install_helper.sh -# packaging/windows/internal/cuda_install.bat -# eval "$('/C/tools/miniconda3/Scripts/conda.exe' 'shell.bash' 'hook')" -# conda activate base -# conda install -yq conda-build "conda-package-handling!=1.5.0" -# packaging/build_conda.sh -# rm /C/tools/miniconda3/conda-bld/win-64/vs${VC_YEAR}*.tar.bz2 -# - store_artifacts: -# path: C:/tools/miniconda3/conda-bld/win-64 -# - persist_to_workspace: -# root: C:/tools/miniconda3/conda-bld/win-64 -# paths: -# - "*" -# - store_test_results: -# path: build_results/ -# -# binary_win_wheel: -# <<: *binary_common -# executor: windows-cpu -# steps: -# - checkout_merge -# - run: -# name: Build wheel packages -# command: | -# set -ex -# source packaging/windows/internal/vc_install_helper.sh -# packaging/windows/internal/cuda_install.bat -# packaging/build_wheel.sh -# - store_artifacts: -# path: dist -# - persist_to_workspace: -# root: dist -# paths: -# - "*" -# - store_test_results: -# path: build_results/ + binary_win_conda: + <<: *binary_common + executor: windows-cpu + steps: + - checkout_merge + - run: + name: Build conda packages + no_output_timeout: 20m + command: | + set -ex + source packaging/windows/internal/vc_install_helper.sh + packaging/windows/internal/cuda_install.bat + eval "$('/C/tools/miniconda3/Scripts/conda.exe' 'shell.bash' 'hook')" + conda activate base + conda install -yq conda-build "conda-package-handling!=1.5.0" + packaging/build_conda.sh + rm /C/tools/miniconda3/conda-bld/win-64/vs${VC_YEAR}*.tar.bz2 + - store_artifacts: + path: C:/tools/miniconda3/conda-bld/win-64 + - persist_to_workspace: + root: C:/tools/miniconda3/conda-bld/win-64 + paths: + - "*" + - store_test_results: + path: build_results/ + + binary_win_wheel: + <<: *binary_common + executor: windows-cpu + steps: + - checkout_merge + - run: + name: Build wheel packages + command: | + set -ex + source packaging/windows/internal/vc_install_helper.sh + packaging/windows/internal/cuda_install.bat + packaging/build_wheel.sh + - store_artifacts: + path: dist + - persist_to_workspace: + root: dist + paths: + - "*" + - store_test_results: + path: build_results/ binary_macos_wheel: <<: *binary_common macos: - xcode: "9.0" + xcode: "12.0" steps: - checkout_merge +# - run: +# name: Install libomp +# command: HOMEBREW_NO_AUTO_UPDATE=1 brew install libomp +# # Disable brew auto update which is very slow - run: # Cannot easily deduplicate this as source'ing activate # will set environment variables which we need to propagate @@ -250,9 +186,13 @@ jobs: binary_macos_conda: <<: *binary_common macos: - xcode: "9.0" + xcode: "12.0" steps: - checkout_merge +# - run: +# name: Install libomp +# command: HOMEBREW_NO_AUTO_UPDATE=1 brew install libomp +# # Disable brew auto update which is very slow - run: command: | curl -o conda.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh @@ -308,6 +248,7 @@ jobs: for pkg in ~/workspace/*.whl; do aws s3 cp "$pkg" "s3://pytorch/whl/${UPLOAD_CHANNEL}/<< parameters.subfolder >>" --acl public-read done + unittest_linux_cpu: <<: *binary_common docker: @@ -315,101 +256,201 @@ jobs: resource_class: 2xlarge+ steps: - checkout - - install-conda - - install-pytorch - - build-csprng - - run-tests + - run: + name: Generate cache key + # This will refresh cache on Sundays, nightly build should generate new cache. + command: echo "$(date +"%Y-%U")" > .circleci-weekly + - restore_cache: + + keys: + - env-v2-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} + + - run: + name: Setup + command: .circleci/unittest/linux/scripts/setup_env.sh + - save_cache: + + key: env-v2-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} + + paths: + - conda + - env + - run: + name: Install torchcsprng + command: .circleci/unittest/linux/scripts/install.sh + - run: + name: Run tests + command: .circleci/unittest/linux/scripts/run_test.sh + - run: + name: Post process + command: .circleci/unittest/linux/scripts/post_process.sh + - store_test_results: + path: test-results unittest_linux_gpu: <<: *binary_common machine: image: ubuntu-1604-cuda-10.1:201909-23 - resource_class: gpu.small + resource_class: gpu.nvidia.small.multi environment: image_name: "pytorch/manylinux-cuda101" + PYTHON_VERSION: << parameters.python_version >> steps: - checkout - - install-conda - - install-pytorch - - install-cuda - - build-csprng - - run-tests + - run: + name: Generate cache key + # This will refresh cache on Sundays, nightly build should generate new cache. + command: echo "$(date +"%Y-%U")" > .circleci-weekly + - restore_cache: -# unittest_windows_cpu: -# <<: *binary_common -# executor: -# name: windows-cpu -# steps: -# - checkout -# - run: -# name: Generate cache key -# # This will refresh cache on Sundays, nightly build should generate new cache. -# command: echo "$(date +"%Y-%U")" > .circleci-weekly -# - restore_cache: -# -# keys: -# - env-v2-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} -# -# - run: -# name: Setup -# command: .circleci/unittest/windows/scripts/setup_env.sh -# - save_cache: -# -# key: env-v2-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} -# -# paths: -# - conda -# - env -# - run: -# name: Install torchvision -# command: .circleci/unittest/windows/scripts/install.sh -# - run: -# name: Run tests -# command: .circleci/unittest/windows/scripts/run_test.sh -# - run: -# name: Post process -# command: .circleci/unittest/windows/scripts/post_process.sh -# - store_test_results: -# path: test-results -# -# unittest_windows_gpu: -# <<: *binary_common -# executor: -# name: windows-gpu -# environment: -# CUDA_VERSION: "10.1" -# steps: -# - checkout -# - run: -# name: Generate cache key -# # This will refresh cache on Sundays, nightly build should generate new cache. -# command: echo "$(date +"%Y-%U")" > .circleci-weekly -# - restore_cache: -# -# keys: -# - env-v1-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} -# -# - run: -# name: Setup -# command: .circleci/unittest/windows/scripts/setup_env.sh -# - save_cache: -# -# key: env-v1-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} -# -# paths: -# - conda -# - env -# - run: -# name: Install torchvision -# command: .circleci/unittest/windows/scripts/install.sh -# - run: -# name: Run tests -# command: .circleci/unittest/windows/scripts/run_test.sh + keys: + - env-v2-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} + + - run: + name: Setup + command: docker run -e PYTHON_VERSION -t --gpus all -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/setup_env.sh + - save_cache: + + key: env-v2-linux-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} + + paths: + - conda + - env + - run: + name: Install torchcsprng + command: docker run -t --gpus all -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/install.sh + - run: + name: Run tests + command: docker run -t --gpus all -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/run_test.sh + - run: + name: Post Process + command: docker run -t --gpus all -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/post_process.sh + - store_test_results: + path: test-results + + unittest_windows_cpu: + <<: *binary_common + executor: + name: windows-cpu + steps: + - checkout + - run: + name: Generate cache key + # This will refresh cache on Sundays, nightly build should generate new cache. + command: echo "$(date +"%Y-%U")" > .circleci-weekly + - restore_cache: + + keys: + - env-v2-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} + + - run: + name: Setup + command: .circleci/unittest/windows/scripts/setup_env.sh + - save_cache: + + key: env-v2-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} + + paths: + - conda + - env + - run: + name: Install torchcsprng + command: .circleci/unittest/windows/scripts/install.sh + - run: + name: Run tests + command: .circleci/unittest/windows/scripts/run_test.sh + - run: + name: Post process + command: .circleci/unittest/windows/scripts/post_process.sh + - store_test_results: + path: test-results + + unittest_windows_gpu: + <<: *binary_common + executor: + name: windows-gpu + environment: + CUDA_VERSION: "10.1" + PYTHON_VERSION: << parameters.python_version >> + steps: + - checkout + - run: + name: Generate cache key + # This will refresh cache on Sundays, nightly build should generate new cache. + command: echo "$(date +"%Y-%U")" > .circleci-weekly + - restore_cache: + + keys: + - env-v1-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} + + - run: + name: Setup + command: .circleci/unittest/windows/scripts/setup_env.sh + - save_cache: + + key: env-v1-windows-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/windows/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} + + paths: + - conda + - env + - run: + name: Install torchcsprng + command: .circleci/unittest/windows/scripts/install.sh + - run: + name: Run tests + command: .circleci/unittest/windows/scripts/run_test.sh + - run: + name: Post process + command: .circleci/unittest/windows/scripts/post_process.sh + - store_test_results: + path: test-results + + unittest_macos_cpu: + <<: *binary_common + macos: + xcode: "12.0" + resource_class: large + steps: + - checkout + - designate_upload_channel + - run: + name: Install wget + command: HOMEBREW_NO_AUTO_UPDATE=1 brew install wget + # Disable brew auto update which is very slow # - run: -# name: Post process -# command: .circleci/unittest/windows/scripts/post_process.sh -# - store_test_results: -# path: test-results +# name: Install libomp +# command: HOMEBREW_NO_AUTO_UPDATE=1 brew install libomp +# # Disable brew auto update which is very slow + - run: + name: Generate cache key + # This will refresh cache on Sundays, nightly build should generate new cache. + command: echo "$(date +"%Y-%U")" > .circleci-weekly + - restore_cache: + + keys: + - env-v3-macos-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} + + - run: + name: Setup + command: .circleci/unittest/linux/scripts/setup_env.sh + - save_cache: + + key: env-v3-macos-{{ arch }}-py<< parameters.python_version >>-{{ checksum ".circleci/unittest/linux/scripts/environment.yml" }}-{{ checksum ".circleci-weekly" }} + + paths: + - conda + - env + - run: + name: Install torchcsprng + command: .circleci/unittest/linux/scripts/install.sh + - run: + name: Run tests + command: .circleci/unittest/linux/scripts/run_test.sh + - run: + name: Post process + command: .circleci/unittest/linux/scripts/post_process.sh + - store_test_results: + path: test-results workflows: build: @@ -420,11 +461,6 @@ workflows: name: binary_linux_wheel_py3.6_cpu python_version: '3.6' wheel_docker_image: pytorch/manylinux-cuda102 - - binary_linux_wheel: - cu_version: cu92 - name: binary_linux_wheel_py3.6_cu92 - python_version: '3.6' - wheel_docker_image: pytorch/manylinux-cuda92 - binary_linux_wheel: cu_version: cu101 name: binary_linux_wheel_py3.6_cu101 @@ -435,16 +471,16 @@ workflows: name: binary_linux_wheel_py3.6_cu102 python_version: '3.6' wheel_docker_image: pytorch/manylinux-cuda102 + - binary_linux_wheel: + cu_version: cu111 + name: binary_linux_wheel_py3.6_cu111 + python_version: '3.6' + wheel_docker_image: pytorch/manylinux-cuda111 - binary_linux_wheel: cu_version: cpu name: binary_linux_wheel_py3.7_cpu python_version: '3.7' wheel_docker_image: pytorch/manylinux-cuda102 - - binary_linux_wheel: - cu_version: cu92 - name: binary_linux_wheel_py3.7_cu92 - python_version: '3.7' - wheel_docker_image: pytorch/manylinux-cuda92 - binary_linux_wheel: cu_version: cu101 name: binary_linux_wheel_py3.7_cu101 @@ -455,16 +491,16 @@ workflows: name: binary_linux_wheel_py3.7_cu102 python_version: '3.7' wheel_docker_image: pytorch/manylinux-cuda102 + - binary_linux_wheel: + cu_version: cu111 + name: binary_linux_wheel_py3.7_cu111 + python_version: '3.7' + wheel_docker_image: pytorch/manylinux-cuda111 - binary_linux_wheel: cu_version: cpu name: binary_linux_wheel_py3.8_cpu python_version: '3.8' wheel_docker_image: pytorch/manylinux-cuda102 - - binary_linux_wheel: - cu_version: cu92 - name: binary_linux_wheel_py3.8_cu92 - python_version: '3.8' - wheel_docker_image: pytorch/manylinux-cuda92 - binary_linux_wheel: cu_version: cu101 name: binary_linux_wheel_py3.8_cu101 @@ -475,6 +511,31 @@ workflows: name: binary_linux_wheel_py3.8_cu102 python_version: '3.8' wheel_docker_image: pytorch/manylinux-cuda102 + - binary_linux_wheel: + cu_version: cu111 + name: binary_linux_wheel_py3.8_cu111 + python_version: '3.8' + wheel_docker_image: pytorch/manylinux-cuda111 + - binary_linux_wheel: + cu_version: cpu + name: binary_linux_wheel_py3.9_cpu + python_version: '3.9' + wheel_docker_image: pytorch/manylinux-cuda102 + - binary_linux_wheel: + cu_version: cu101 + name: binary_linux_wheel_py3.9_cu101 + python_version: '3.9' + wheel_docker_image: pytorch/manylinux-cuda101 + - binary_linux_wheel: + cu_version: cu102 + name: binary_linux_wheel_py3.9_cu102 + python_version: '3.9' + wheel_docker_image: pytorch/manylinux-cuda102 + - binary_linux_wheel: + cu_version: cu111 + name: binary_linux_wheel_py3.9_cu111 + python_version: '3.9' + wheel_docker_image: pytorch/manylinux-cuda111 - binary_macos_wheel: cu_version: cpu name: binary_macos_wheel_py3.6_cpu @@ -490,134 +551,160 @@ workflows: name: binary_macos_wheel_py3.8_cpu python_version: '3.8' wheel_docker_image: pytorch/manylinux-cuda102 -# - binary_win_wheel: -# cu_version: cpu -# filters: -# branches: -# only: master -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: binary_win_wheel_py3.6_cpu -# python_version: '3.6' -# - binary_win_wheel: -# cu_version: cu92 -# filters: -# branches: -# only: master -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: binary_win_wheel_py3.6_cu92 -# python_version: '3.6' -# - binary_win_wheel: -# cu_version: cu101 -# filters: -# branches: -# only: master -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: binary_win_wheel_py3.6_cu101 -# python_version: '3.6' -# - binary_win_wheel: -# cu_version: cu102 -# filters: -# branches: -# only: master -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: binary_win_wheel_py3.6_cu102 -# python_version: '3.6' -# - binary_win_wheel: -# cu_version: cpu -# filters: -# branches: -# only: master -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: binary_win_wheel_py3.7_cpu -# python_version: '3.7' -# - binary_win_wheel: -# cu_version: cu92 -# filters: -# branches: -# only: master -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: binary_win_wheel_py3.7_cu92 -# python_version: '3.7' -# - binary_win_wheel: -# cu_version: cu101 -# filters: -# branches: -# only: master -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: binary_win_wheel_py3.7_cu101 -# python_version: '3.7' -# - binary_win_wheel: -# cu_version: cu102 -# filters: -# branches: -# only: master -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: binary_win_wheel_py3.7_cu102 -# python_version: '3.7' -# - binary_win_wheel: -# cu_version: cpu -# name: binary_win_wheel_py3.8_cpu -# python_version: '3.8' -# - binary_win_wheel: -# cu_version: cu92 -# filters: -# branches: -# only: master -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: binary_win_wheel_py3.8_cu92 -# python_version: '3.8' -# - binary_win_wheel: -# cu_version: cu101 -# filters: -# branches: -# only: master -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: binary_win_wheel_py3.8_cu101 -# python_version: '3.8' -# - binary_win_wheel: -# cu_version: cu102 -# name: binary_win_wheel_py3.8_cu102 -# python_version: '3.8' - - binary_linux_conda: + - binary_macos_wheel: cu_version: cpu - name: binary_linux_conda_py3.6_cpu - python_version: '3.6' + name: binary_macos_wheel_py3.9_cpu + python_version: '3.9' wheel_docker_image: pytorch/manylinux-cuda102 - - binary_linux_conda: - cu_version: cu92 - name: binary_linux_conda_py3.6_cu92 + - binary_win_wheel: + cu_version: cpu + filters: + branches: + only: master + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: binary_win_wheel_py3.6_cpu python_version: '3.6' - wheel_docker_image: pytorch/manylinux-cuda92 - - binary_linux_conda: + - binary_win_wheel: cu_version: cu101 - name: binary_linux_conda_py3.6_cu101 + filters: + branches: + only: master + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: binary_win_wheel_py3.6_cu101 python_version: '3.6' - wheel_docker_image: pytorch/manylinux-cuda101 - - binary_linux_conda: + - binary_win_wheel: cu_version: cu102 - name: binary_linux_conda_py3.6_cu102 + filters: + branches: + only: master + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: binary_win_wheel_py3.6_cu102 python_version: '3.6' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_linux_conda: + - binary_win_wheel: + cu_version: cu111 + filters: + branches: + only: master + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: binary_win_wheel_py3.6_cu111 + python_version: '3.6' + - binary_win_wheel: cu_version: cpu - name: binary_linux_conda_py3.7_cpu - python_version: '3.7' - wheel_docker_image: pytorch/manylinux-cuda102 - - binary_linux_conda: - cu_version: cu92 - name: binary_linux_conda_py3.7_cu92 + filters: + branches: + only: master + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: binary_win_wheel_py3.7_cpu python_version: '3.7' - wheel_docker_image: pytorch/manylinux-cuda92 + - binary_win_wheel: + cu_version: cu101 + filters: + branches: + only: master + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: binary_win_wheel_py3.7_cu101 + python_version: '3.7' + - binary_win_wheel: + cu_version: cu102 + filters: + branches: + only: master + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: binary_win_wheel_py3.7_cu102 + python_version: '3.7' + - binary_win_wheel: + cu_version: cu111 + filters: + branches: + only: master + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: binary_win_wheel_py3.7_cu111 + python_version: '3.7' + - binary_win_wheel: + cu_version: cpu + name: binary_win_wheel_py3.8_cpu + python_version: '3.8' + - binary_win_wheel: + cu_version: cu101 + filters: + branches: + only: master + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: binary_win_wheel_py3.8_cu101 + python_version: '3.8' + - binary_win_wheel: + cu_version: cu102 + filters: + branches: + only: master + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: binary_win_wheel_py3.8_cu102 + python_version: '3.8' + - binary_win_wheel: + cu_version: cu111 + name: binary_win_wheel_py3.8_cu111 + python_version: '3.8' + - binary_win_wheel: + cu_version: cpu + name: binary_win_wheel_py3.9_cpu + python_version: '3.9' + - binary_win_wheel: + cu_version: cu101 + filters: + branches: + only: master + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: binary_win_wheel_py3.9_cu101 + python_version: '3.9' + - binary_win_wheel: + cu_version: cu102 + filters: + branches: + only: master + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: binary_win_wheel_py3.9_cu102 + python_version: '3.9' + - binary_win_wheel: + cu_version: cu111 + name: binary_win_wheel_py3.9_cu111 + python_version: '3.9' + - binary_linux_conda: + cu_version: cpu + name: binary_linux_conda_py3.6_cpu + python_version: '3.6' + wheel_docker_image: pytorch/manylinux-cuda102 + - binary_linux_conda: + cu_version: cu101 + name: binary_linux_conda_py3.6_cu101 + python_version: '3.6' + wheel_docker_image: pytorch/manylinux-cuda101 + - binary_linux_conda: + cu_version: cu102 + name: binary_linux_conda_py3.6_cu102 + python_version: '3.6' + wheel_docker_image: pytorch/manylinux-cuda102 + - binary_linux_conda: + cu_version: cu111 + name: binary_linux_conda_py3.6_cu111 + python_version: '3.6' + wheel_docker_image: pytorch/manylinux-cuda111 + - binary_linux_conda: + cu_version: cpu + name: binary_linux_conda_py3.7_cpu + python_version: '3.7' + wheel_docker_image: pytorch/manylinux-cuda102 - binary_linux_conda: cu_version: cu101 name: binary_linux_conda_py3.7_cu101 @@ -628,16 +715,16 @@ workflows: name: binary_linux_conda_py3.7_cu102 python_version: '3.7' wheel_docker_image: pytorch/manylinux-cuda102 + - binary_linux_conda: + cu_version: cu111 + name: binary_linux_conda_py3.7_cu111 + python_version: '3.7' + wheel_docker_image: pytorch/manylinux-cuda111 - binary_linux_conda: cu_version: cpu name: binary_linux_conda_py3.8_cpu python_version: '3.8' wheel_docker_image: pytorch/manylinux-cuda102 - - binary_linux_conda: - cu_version: cu92 - name: binary_linux_conda_py3.8_cu92 - python_version: '3.8' - wheel_docker_image: pytorch/manylinux-cuda92 - binary_linux_conda: cu_version: cu101 name: binary_linux_conda_py3.8_cu101 @@ -648,6 +735,31 @@ workflows: name: binary_linux_conda_py3.8_cu102 python_version: '3.8' wheel_docker_image: pytorch/manylinux-cuda102 + - binary_linux_conda: + cu_version: cu111 + name: binary_linux_conda_py3.8_cu111 + python_version: '3.8' + wheel_docker_image: pytorch/manylinux-cuda111 + - binary_linux_conda: + cu_version: cpu + name: binary_linux_conda_py3.9_cpu + python_version: '3.9' + wheel_docker_image: pytorch/manylinux-cuda102 + - binary_linux_conda: + cu_version: cu101 + name: binary_linux_conda_py3.9_cu101 + python_version: '3.9' + wheel_docker_image: pytorch/manylinux-cuda101 + - binary_linux_conda: + cu_version: cu102 + name: binary_linux_conda_py3.9_cu102 + python_version: '3.9' + wheel_docker_image: pytorch/manylinux-cuda102 + - binary_linux_conda: + cu_version: cu111 + name: binary_linux_conda_py3.9_cu111 + python_version: '3.9' + wheel_docker_image: pytorch/manylinux-cuda111 - binary_macos_conda: cu_version: cpu name: binary_macos_conda_py3.6_cpu @@ -663,104 +775,135 @@ workflows: name: binary_macos_conda_py3.8_cpu python_version: '3.8' wheel_docker_image: pytorch/manylinux-cuda102 -# - binary_win_conda: -# cu_version: cpu -# filters: -# branches: -# only: master -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: binary_win_conda_py3.6_cpu -# python_version: '3.6' -# - binary_win_conda: -# cu_version: cu92 -# filters: -# branches: -# only: master -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: binary_win_conda_py3.6_cu92 -# python_version: '3.6' -# - binary_win_conda: -# cu_version: cu101 -# filters: -# branches: -# only: master -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: binary_win_conda_py3.6_cu101 -# python_version: '3.6' -# - binary_win_conda: -# cu_version: cu102 -# filters: -# branches: -# only: master -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: binary_win_conda_py3.6_cu102 -# python_version: '3.6' -# - binary_win_conda: -# cu_version: cpu -# filters: -# branches: -# only: master -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: binary_win_conda_py3.7_cpu -# python_version: '3.7' -# - binary_win_conda: -# cu_version: cu92 -# filters: -# branches: -# only: master -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: binary_win_conda_py3.7_cu92 -# python_version: '3.7' -# - binary_win_conda: -# cu_version: cu101 -# filters: -# branches: -# only: master -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: binary_win_conda_py3.7_cu101 -# python_version: '3.7' -# - binary_win_conda: -# cu_version: cu102 -# filters: -# branches: -# only: master -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: binary_win_conda_py3.7_cu102 -# python_version: '3.7' -# - binary_win_conda: +# - binary_macos_conda: # cu_version: cpu -# name: binary_win_conda_py3.8_cpu -# python_version: '3.8' -# - binary_win_conda: -# cu_version: cu92 -# filters: -# branches: -# only: master -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: binary_win_conda_py3.8_cu92 -# python_version: '3.8' -# - binary_win_conda: -# cu_version: cu101 -# filters: -# branches: -# only: master -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: binary_win_conda_py3.8_cu101 -# python_version: '3.8' -# - binary_win_conda: -# cu_version: cu102 -# name: binary_win_conda_py3.8_cu102 -# python_version: '3.8' +# name: binary_macos_conda_py3.9_cpu +# python_version: '3.9' +# wheel_docker_image: pytorch/manylinux-cuda102 + - binary_win_conda: + cu_version: cpu + filters: + branches: + only: master + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: binary_win_conda_py3.6_cpu + python_version: '3.6' + - binary_win_conda: + cu_version: cu101 + filters: + branches: + only: master + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: binary_win_conda_py3.6_cu101 + python_version: '3.6' + - binary_win_conda: + cu_version: cu102 + filters: + branches: + only: master + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: binary_win_conda_py3.6_cu102 + python_version: '3.6' + - binary_win_conda: + cu_version: cu111 + filters: + branches: + only: master + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: binary_win_conda_py3.6_cu111 + python_version: '3.6' + - binary_win_conda: + cu_version: cpu + filters: + branches: + only: master + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: binary_win_conda_py3.7_cpu + python_version: '3.7' + - binary_win_conda: + cu_version: cu101 + filters: + branches: + only: master + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: binary_win_conda_py3.7_cu101 + python_version: '3.7' + - binary_win_conda: + cu_version: cu102 + filters: + branches: + only: master + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: binary_win_conda_py3.7_cu102 + python_version: '3.7' + - binary_win_conda: + cu_version: cu111 + filters: + branches: + only: master + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: binary_win_conda_py3.7_cu111 + python_version: '3.7' + - binary_win_conda: + cu_version: cpu + name: binary_win_conda_py3.8_cpu + python_version: '3.8' + - binary_win_conda: + cu_version: cu101 + filters: + branches: + only: master + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: binary_win_conda_py3.8_cu101 + python_version: '3.8' + - binary_win_conda: + cu_version: cu102 + filters: + branches: + only: master + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: binary_win_conda_py3.8_cu102 + python_version: '3.8' + - binary_win_conda: + cu_version: cu111 + name: binary_win_conda_py3.8_cu111 + python_version: '3.8' + - binary_win_conda: + cu_version: cpu + name: binary_win_conda_py3.9_cpu + python_version: '3.9' + - binary_win_conda: + cu_version: cu101 + filters: + branches: + only: master + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: binary_win_conda_py3.9_cu101 + python_version: '3.9' + - binary_win_conda: + cu_version: cu102 + filters: + branches: + only: master + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: binary_win_conda_py3.9_cu102 + python_version: '3.9' + - binary_win_conda: + cu_version: cu111 + name: binary_win_conda_py3.9_cu111 + python_version: '3.9' # - python_lint # - python_type_check # - clang_format @@ -801,40 +944,68 @@ workflows: cu_version: cu101 name: unittest_linux_gpu_py3.8 python_version: '3.8' -# - unittest_windows_cpu: -# cu_version: cpu -# name: unittest_windows_cpu_py3.6 -# python_version: '3.6' -# - unittest_windows_cpu: -# cu_version: cpu -# name: unittest_windows_cpu_py3.7 -# python_version: '3.7' -# - unittest_windows_cpu: + - unittest_linux_gpu: + cu_version: cu101 + name: unittest_linux_gpu_py3.9 + python_version: '3.9' + - unittest_windows_cpu: + cu_version: cpu + name: unittest_windows_cpu_py3.6 + python_version: '3.6' + - unittest_windows_cpu: + cu_version: cpu + name: unittest_windows_cpu_py3.7 + python_version: '3.7' + - unittest_windows_cpu: + cu_version: cpu + name: unittest_windows_cpu_py3.8 + python_version: '3.8' + - unittest_windows_cpu: + cu_version: cpu + name: unittest_windows_cpu_py3.9 + python_version: '3.9' + - unittest_windows_gpu: + cu_version: cu101 + filters: + branches: + only: + - master + - nightly + name: unittest_windows_gpu_py3.6 + python_version: '3.6' + - unittest_windows_gpu: + cu_version: cu101 + filters: + branches: + only: + - master + - nightly + name: unittest_windows_gpu_py3.7 + python_version: '3.7' + - unittest_windows_gpu: + cu_version: cu101 + name: unittest_windows_gpu_py3.8 + python_version: '3.8' + - unittest_windows_gpu: + cu_version: cu101 + name: unittest_windows_gpu_py3.9 + python_version: '3.9' + - unittest_macos_cpu: + cu_version: cpu + name: unittest_macos_cpu_py3.6 + python_version: '3.6' + - unittest_macos_cpu: + cu_version: cpu + name: unittest_macos_cpu_py3.7 + python_version: '3.7' + - unittest_macos_cpu: + cu_version: cpu + name: unittest_macos_cpu_py3.8 + python_version: '3.8' +# - unittest_macos_cpu: # cu_version: cpu -# name: unittest_windows_cpu_py3.8 -# python_version: '3.8' -# - unittest_windows_gpu: -# cu_version: cu101 -# filters: -# branches: -# only: -# - master -# - nightly -# name: unittest_windows_gpu_py3.6 -# python_version: '3.6' -# - unittest_windows_gpu: -# cu_version: cu101 -# filters: -# branches: -# only: -# - master -# - nightly -# name: unittest_windows_gpu_py3.7 -# python_version: '3.7' -# - unittest_windows_gpu: -# cu_version: cu101 -# name: unittest_windows_gpu_py3.8 -# python_version: '3.8' +# name: unittest_macos_cpu_py3.9 +# python_version: '3.9' nightly: jobs: # - circleci_consistency @@ -862,27 +1033,6 @@ workflows: requires: - nightly_binary_linux_wheel_py3.6_cpu subfolder: cpu/ - - binary_linux_wheel: - cu_version: cu92 - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_wheel_py3.6_cu92 - python_version: '3.6' - wheel_docker_image: pytorch/manylinux-cuda92 - - binary_wheel_upload: - context: org-member - filters: - branches: - only: nightly - tags: - only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_wheel_py3.6_cu92_upload - requires: - - nightly_binary_linux_wheel_py3.6_cu92 - subfolder: cu92/ - binary_linux_wheel: cu_version: cu101 filters: @@ -926,15 +1076,15 @@ workflows: - nightly_binary_linux_wheel_py3.6_cu102 subfolder: cu102/ - binary_linux_wheel: - cu_version: cpu + cu_version: cu111 filters: branches: only: nightly tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_wheel_py3.7_cpu - python_version: '3.7' - wheel_docker_image: pytorch/manylinux-cuda102 + name: nightly_binary_linux_wheel_py3.6_cu111 + python_version: '3.6' + wheel_docker_image: pytorch/manylinux-cuda111 - binary_wheel_upload: context: org-member filters: @@ -942,20 +1092,20 @@ workflows: only: nightly tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_wheel_py3.7_cpu_upload + name: nightly_binary_linux_wheel_py3.6_cu111_upload requires: - - nightly_binary_linux_wheel_py3.7_cpu - subfolder: cpu/ + - nightly_binary_linux_wheel_py3.6_cu111 + subfolder: cu111/ - binary_linux_wheel: - cu_version: cu92 + cu_version: cpu filters: branches: only: nightly tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_wheel_py3.7_cu92 + name: nightly_binary_linux_wheel_py3.7_cpu python_version: '3.7' - wheel_docker_image: pytorch/manylinux-cuda92 + wheel_docker_image: pytorch/manylinux-cuda102 - binary_wheel_upload: context: org-member filters: @@ -963,10 +1113,10 @@ workflows: only: nightly tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_wheel_py3.7_cu92_upload + name: nightly_binary_linux_wheel_py3.7_cpu_upload requires: - - nightly_binary_linux_wheel_py3.7_cu92 - subfolder: cu92/ + - nightly_binary_linux_wheel_py3.7_cpu + subfolder: cpu/ - binary_linux_wheel: cu_version: cu101 filters: @@ -1010,15 +1160,15 @@ workflows: - nightly_binary_linux_wheel_py3.7_cu102 subfolder: cu102/ - binary_linux_wheel: - cu_version: cpu + cu_version: cu111 filters: branches: only: nightly tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_wheel_py3.8_cpu - python_version: '3.8' - wheel_docker_image: pytorch/manylinux-cuda102 + name: nightly_binary_linux_wheel_py3.7_cu111 + python_version: '3.7' + wheel_docker_image: pytorch/manylinux-cuda111 - binary_wheel_upload: context: org-member filters: @@ -1026,20 +1176,20 @@ workflows: only: nightly tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_wheel_py3.8_cpu_upload + name: nightly_binary_linux_wheel_py3.7_cu111_upload requires: - - nightly_binary_linux_wheel_py3.8_cpu - subfolder: cpu/ + - nightly_binary_linux_wheel_py3.7_cu111 + subfolder: cu111/ - binary_linux_wheel: - cu_version: cu92 + cu_version: cpu filters: branches: only: nightly tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_wheel_py3.8_cu92 + name: nightly_binary_linux_wheel_py3.8_cpu python_version: '3.8' - wheel_docker_image: pytorch/manylinux-cuda92 + wheel_docker_image: pytorch/manylinux-cuda102 - binary_wheel_upload: context: org-member filters: @@ -1047,10 +1197,10 @@ workflows: only: nightly tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_wheel_py3.8_cu92_upload + name: nightly_binary_linux_wheel_py3.8_cpu_upload requires: - - nightly_binary_linux_wheel_py3.8_cu92 - subfolder: cu92/ + - nightly_binary_linux_wheel_py3.8_cpu + subfolder: cpu/ - binary_linux_wheel: cu_version: cu101 filters: @@ -1093,6 +1243,111 @@ workflows: requires: - nightly_binary_linux_wheel_py3.8_cu102 subfolder: cu102/ + - binary_linux_wheel: + cu_version: cu111 + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_linux_wheel_py3.8_cu111 + python_version: '3.8' + wheel_docker_image: pytorch/manylinux-cuda111 + - binary_wheel_upload: + context: org-member + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_linux_wheel_py3.8_cu111_upload + requires: + - nightly_binary_linux_wheel_py3.8_cu111 + subfolder: cu111/ + - binary_linux_wheel: + cu_version: cpu + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_linux_wheel_py3.9_cpu + python_version: '3.9' + wheel_docker_image: pytorch/manylinux-cuda102 + - binary_wheel_upload: + context: org-member + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_linux_wheel_py39_cpu_upload + requires: + - nightly_binary_linux_wheel_py3.9_cpu + subfolder: cpu/ + - binary_linux_wheel: + cu_version: cu101 + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_linux_wheel_py3.9_cu101 + python_version: '3.9' + wheel_docker_image: pytorch/manylinux-cuda101 + - binary_wheel_upload: + context: org-member + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_linux_wheel_py3.9_cu101_upload + requires: + - nightly_binary_linux_wheel_py3.9_cu101 + subfolder: cu101/ + - binary_linux_wheel: + cu_version: cu102 + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_linux_wheel_py3.9_cu102 + python_version: '3.9' + wheel_docker_image: pytorch/manylinux-cuda102 + - binary_wheel_upload: + context: org-member + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_linux_wheel_py3.9_cu102_upload + requires: + - nightly_binary_linux_wheel_py3.9_cu102 + subfolder: cu102/ + - binary_linux_wheel: + cu_version: cu111 + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_linux_wheel_py3.9_cu111 + python_version: '3.9' + wheel_docker_image: pytorch/manylinux-cuda111 + - binary_wheel_upload: + context: org-member + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_linux_wheel_py3.9_cu111_upload + requires: + - nightly_binary_linux_wheel_py3.9_cu111 + subfolder: cu111/ - binary_macos_wheel: cu_version: cpu filters: @@ -1156,256 +1411,756 @@ workflows: requires: - nightly_binary_macos_wheel_py3.8_cpu subfolder: '' -# - binary_win_wheel: -# cu_version: cpu -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_wheel_py3.6_cpu -# python_version: '3.6' -# - binary_wheel_upload: -# context: org-member -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_wheel_py3.6_cpu_upload -# requires: -# - nightly_binary_win_wheel_py3.6_cpu -# subfolder: cpu/ -# - binary_win_wheel: -# cu_version: cu92 -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_wheel_py3.6_cu92 -# python_version: '3.6' -# - binary_wheel_upload: -# context: org-member -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_wheel_py3.6_cu92_upload -# requires: -# - nightly_binary_win_wheel_py3.6_cu92 -# subfolder: cu92/ -# - binary_win_wheel: -# cu_version: cu101 -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_wheel_py3.6_cu101 -# python_version: '3.6' -# - binary_wheel_upload: -# context: org-member -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_wheel_py3.6_cu101_upload -# requires: -# - nightly_binary_win_wheel_py3.6_cu101 -# subfolder: cu101/ -# - binary_win_wheel: -# cu_version: cu102 -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_wheel_py3.6_cu102 -# python_version: '3.6' -# - binary_wheel_upload: -# context: org-member -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_wheel_py3.6_cu102_upload -# requires: -# - nightly_binary_win_wheel_py3.6_cu102 -# subfolder: cu102/ -# - binary_win_wheel: -# cu_version: cpu -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_wheel_py3.7_cpu -# python_version: '3.7' -# - binary_wheel_upload: -# context: org-member -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_wheel_py3.7_cpu_upload -# requires: -# - nightly_binary_win_wheel_py3.7_cpu -# subfolder: cpu/ -# - binary_win_wheel: -# cu_version: cu92 -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_wheel_py3.7_cu92 -# python_version: '3.7' -# - binary_wheel_upload: -# context: org-member -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_wheel_py3.7_cu92_upload -# requires: -# - nightly_binary_win_wheel_py3.7_cu92 -# subfolder: cu92/ -# - binary_win_wheel: -# cu_version: cu101 -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_wheel_py3.7_cu101 -# python_version: '3.7' -# - binary_wheel_upload: -# context: org-member -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_wheel_py3.7_cu101_upload -# requires: -# - nightly_binary_win_wheel_py3.7_cu101 -# subfolder: cu101/ -# - binary_win_wheel: -# cu_version: cu102 -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_wheel_py3.7_cu102 -# python_version: '3.7' -# - binary_wheel_upload: -# context: org-member -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_wheel_py3.7_cu102_upload -# requires: -# - nightly_binary_win_wheel_py3.7_cu102 -# subfolder: cu102/ -# - binary_win_wheel: -# cu_version: cpu -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_wheel_py3.8_cpu -# python_version: '3.8' -# - binary_wheel_upload: -# context: org-member -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_wheel_py3.8_cpu_upload -# requires: -# - nightly_binary_win_wheel_py3.8_cpu -# subfolder: cpu/ -# - binary_win_wheel: -# cu_version: cu92 -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_wheel_py3.8_cu92 -# python_version: '3.8' -# - binary_wheel_upload: -# context: org-member -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_wheel_py3.8_cu92_upload -# requires: -# - nightly_binary_win_wheel_py3.8_cu92 -# subfolder: cu92/ -# - binary_win_wheel: -# cu_version: cu101 -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_wheel_py3.8_cu101 -# python_version: '3.8' -# - binary_wheel_upload: -# context: org-member -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_wheel_py3.8_cu101_upload -# requires: -# - nightly_binary_win_wheel_py3.8_cu101 -# subfolder: cu101/ -# - binary_win_wheel: -# cu_version: cu102 + - binary_macos_wheel: + cu_version: cpu + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_macos_wheel_py3.9_cpu + python_version: '3.9' + wheel_docker_image: pytorch/manylinux-cuda102 + - binary_wheel_upload: + context: org-member + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_macos_wheel_py3.9_cpu_upload + requires: + - nightly_binary_macos_wheel_py3.9_cpu + subfolder: '' + - binary_win_wheel: + cu_version: cpu + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_win_wheel_py3.6_cpu + python_version: '3.6' + - binary_wheel_upload: + context: org-member + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_win_wheel_py3.6_cpu_upload + requires: + - nightly_binary_win_wheel_py3.6_cpu + subfolder: cpu/ + - binary_win_wheel: + cu_version: cu101 + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_win_wheel_py3.6_cu101 + python_version: '3.6' + - binary_wheel_upload: + context: org-member + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_win_wheel_py3.6_cu101_upload + requires: + - nightly_binary_win_wheel_py3.6_cu101 + subfolder: cu101/ + - binary_win_wheel: + cu_version: cu102 + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_win_wheel_py3.6_cu102 + python_version: '3.6' + - binary_wheel_upload: + context: org-member + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_win_wheel_py3.6_cu102_upload + requires: + - nightly_binary_win_wheel_py3.6_cu102 + subfolder: cu102/ + - binary_win_wheel: + cu_version: cu111 + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_win_wheel_py3.6_cu111 + python_version: '3.6' + - binary_wheel_upload: + context: org-member + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_win_wheel_py3.6_cu111_upload + requires: + - nightly_binary_win_wheel_py3.6_cu111 + subfolder: cu111/ + - binary_win_wheel: + cu_version: cpu + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_win_wheel_py3.7_cpu + python_version: '3.7' + - binary_wheel_upload: + context: org-member + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_win_wheel_py3.7_cpu_upload + requires: + - nightly_binary_win_wheel_py3.7_cpu + subfolder: cpu/ + - binary_win_wheel: + cu_version: cu101 + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_win_wheel_py3.7_cu101 + python_version: '3.7' + - binary_wheel_upload: + context: org-member + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_win_wheel_py3.7_cu101_upload + requires: + - nightly_binary_win_wheel_py3.7_cu101 + subfolder: cu101/ + - binary_win_wheel: + cu_version: cu102 + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_win_wheel_py3.7_cu102 + python_version: '3.7' + - binary_wheel_upload: + context: org-member + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_win_wheel_py3.7_cu102_upload + requires: + - nightly_binary_win_wheel_py3.7_cu102 + subfolder: cu102/ + - binary_win_wheel: + cu_version: cu111 + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_win_wheel_py3.7_cu111 + python_version: '3.7' + - binary_wheel_upload: + context: org-member + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_win_wheel_py3.7_cu111_upload + requires: + - nightly_binary_win_wheel_py3.7_cu111 + subfolder: cu111/ + - binary_win_wheel: + cu_version: cpu + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_win_wheel_py3.8_cpu + python_version: '3.8' + - binary_wheel_upload: + context: org-member + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_win_wheel_py3.8_cpu_upload + requires: + - nightly_binary_win_wheel_py3.8_cpu + subfolder: cpu/ + - binary_win_wheel: + cu_version: cu101 + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_win_wheel_py3.8_cu101 + python_version: '3.8' + - binary_wheel_upload: + context: org-member + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_win_wheel_py3.8_cu101_upload + requires: + - nightly_binary_win_wheel_py3.8_cu101 + subfolder: cu101/ + - binary_win_wheel: + cu_version: cu102 + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_win_wheel_py3.8_cu102 + python_version: '3.8' + - binary_wheel_upload: + context: org-member + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_win_wheel_py3.8_cu102_upload + requires: + - nightly_binary_win_wheel_py3.8_cu102 + subfolder: cu102/ + - binary_win_wheel: + cu_version: cu111 + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_win_wheel_py3.8_cu111 + python_version: '3.8' + - binary_wheel_upload: + context: org-member + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_win_wheel_py3.8_cu111_upload + requires: + - nightly_binary_win_wheel_py3.8_cu111 + subfolder: cu111/ + - binary_win_wheel: + cu_version: cpu + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_win_wheel_py3.9_cpu + python_version: '3.9' + - binary_wheel_upload: + context: org-member + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_win_wheel_py3.9_cpu_upload + requires: + - nightly_binary_win_wheel_py3.9_cpu + subfolder: cpu/ + - binary_win_wheel: + cu_version: cu101 + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_win_wheel_py3.9_cu101 + python_version: '3.9' + - binary_wheel_upload: + context: org-member + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_win_wheel_py3.9_cu101_upload + requires: + - nightly_binary_win_wheel_py3.9_cu101 + subfolder: cu101/ + - binary_win_wheel: + cu_version: cu102 + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_win_wheel_py3.9_cu102 + python_version: '3.9' + - binary_wheel_upload: + context: org-member + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_win_wheel_py3.9_cu102_upload + requires: + - nightly_binary_win_wheel_py3.9_cu102 + subfolder: cu102/ + - binary_win_wheel: + cu_version: cu111 + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_win_wheel_py3.9_cu111 + python_version: '3.9' + - binary_wheel_upload: + context: org-member + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_win_wheel_py3.9_cu111_upload + requires: + - nightly_binary_win_wheel_py3.9_cu111 + subfolder: cu111/ + - binary_linux_conda: + cu_version: cpu + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_linux_conda_py3.6_cpu + python_version: '3.6' + wheel_docker_image: pytorch/manylinux-cuda102 + - binary_conda_upload: + context: org-member + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_linux_conda_py3.6_cpu_upload + requires: + - nightly_binary_linux_conda_py3.6_cpu + - binary_linux_conda: + cu_version: cu101 + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_linux_conda_py3.6_cu101 + python_version: '3.6' + wheel_docker_image: pytorch/manylinux-cuda101 + - binary_conda_upload: + context: org-member + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_linux_conda_py3.6_cu101_upload + requires: + - nightly_binary_linux_conda_py3.6_cu101 + - binary_linux_conda: + cu_version: cu102 + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_linux_conda_py3.6_cu102 + python_version: '3.6' + wheel_docker_image: pytorch/manylinux-cuda102 + - binary_conda_upload: + context: org-member + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_linux_conda_py3.6_cu102_upload + requires: + - nightly_binary_linux_conda_py3.6_cu102 + - binary_linux_conda: + cu_version: cu111 + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_linux_conda_py3.6_cu111 + python_version: '3.6' + wheel_docker_image: pytorch/manylinux-cuda111 + - binary_conda_upload: + context: org-member + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_linux_conda_py3.6_cu111_upload + requires: + - nightly_binary_linux_conda_py3.6_cu111 + - binary_linux_conda: + cu_version: cpu + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_linux_conda_py3.7_cpu + python_version: '3.7' + wheel_docker_image: pytorch/manylinux-cuda102 + - binary_conda_upload: + context: org-member + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_linux_conda_py3.7_cpu_upload + requires: + - nightly_binary_linux_conda_py3.7_cpu + - binary_linux_conda: + cu_version: cu101 + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_linux_conda_py3.7_cu101 + python_version: '3.7' + wheel_docker_image: pytorch/manylinux-cuda101 + - binary_conda_upload: + context: org-member + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_linux_conda_py3.7_cu101_upload + requires: + - nightly_binary_linux_conda_py3.7_cu101 + - binary_linux_conda: + cu_version: cu102 + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_linux_conda_py3.7_cu102 + python_version: '3.7' + wheel_docker_image: pytorch/manylinux-cuda102 + - binary_conda_upload: + context: org-member + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_linux_conda_py3.7_cu102_upload + requires: + - nightly_binary_linux_conda_py3.7_cu102 + - binary_linux_conda: + cu_version: cu111 + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_linux_conda_py3.7_cu111 + python_version: '3.7' + wheel_docker_image: pytorch/manylinux-cuda111 + - binary_conda_upload: + context: org-member + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_linux_conda_py3.7_cu111_upload + requires: + - nightly_binary_linux_conda_py3.7_cu111 + - binary_linux_conda: + cu_version: cpu + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_linux_conda_py3.8_cpu + python_version: '3.8' + wheel_docker_image: pytorch/manylinux-cuda102 + - binary_conda_upload: + context: org-member + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_linux_conda_py3.8_cpu_upload + requires: + - nightly_binary_linux_conda_py3.8_cpu + - binary_linux_conda: + cu_version: cu101 + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_linux_conda_py3.8_cu101 + python_version: '3.8' + wheel_docker_image: pytorch/manylinux-cuda101 + - binary_conda_upload: + context: org-member + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_linux_conda_py3.8_cu101_upload + requires: + - nightly_binary_linux_conda_py3.8_cu101 + - binary_linux_conda: + cu_version: cu102 + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_linux_conda_py3.8_cu102 + python_version: '3.8' + wheel_docker_image: pytorch/manylinux-cuda102 + - binary_conda_upload: + context: org-member + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_linux_conda_py3.8_cu102_upload + requires: + - nightly_binary_linux_conda_py3.8_cu102 + - binary_linux_conda: + cu_version: cu111 + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_linux_conda_py3.8_cu111 + python_version: '3.8' + wheel_docker_image: pytorch/manylinux-cuda111 + - binary_conda_upload: + context: org-member + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_linux_conda_py3.8_cu111_upload + requires: + - nightly_binary_linux_conda_py3.8_cu111 + - binary_linux_conda: + cu_version: cpu + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_linux_conda_py3.9_cpu + python_version: '3.9' + wheel_docker_image: pytorch/manylinux-cuda102 + - binary_conda_upload: + context: org-member + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_linux_conda_py3.9_cpu_upload + requires: + - nightly_binary_linux_conda_py3.9_cpu + - binary_linux_conda: + cu_version: cu101 + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_linux_conda_py3.9_cu101 + python_version: '3.9' + wheel_docker_image: pytorch/manylinux-cuda101 + - binary_conda_upload: + context: org-member + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_linux_conda_py3.9_cu101_upload + requires: + - nightly_binary_linux_conda_py3.9_cu101 + - binary_linux_conda: + cu_version: cu102 + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_linux_conda_py3.9_cu102 + python_version: '3.9' + wheel_docker_image: pytorch/manylinux-cuda102 + - binary_conda_upload: + context: org-member + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_linux_conda_py3.9_cu102_upload + requires: + - nightly_binary_linux_conda_py3.9_cu102 + - binary_linux_conda: + cu_version: cu111 + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_linux_conda_py3.9_cu111 + python_version: '3.9' + wheel_docker_image: pytorch/manylinux-cuda111 + - binary_conda_upload: + context: org-member + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_linux_conda_py3.9_cu111_upload + requires: + - nightly_binary_linux_conda_py3.9_cu111 + - binary_macos_conda: + cu_version: cpu + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_macos_conda_py3.6_cpu + python_version: '3.6' + wheel_docker_image: pytorch/manylinux-cuda102 + - binary_conda_upload: + context: org-member + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_macos_conda_py3.6_cpu_upload + requires: + - nightly_binary_macos_conda_py3.6_cpu + - binary_macos_conda: + cu_version: cpu + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_macos_conda_py3.7_cpu + python_version: '3.7' + wheel_docker_image: pytorch/manylinux-cuda102 + - binary_conda_upload: + context: org-member + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_macos_conda_py3.7_cpu_upload + requires: + - nightly_binary_macos_conda_py3.7_cpu + - binary_macos_conda: + cu_version: cpu + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_macos_conda_py3.8_cpu + python_version: '3.8' + wheel_docker_image: pytorch/manylinux-cuda102 + - binary_conda_upload: + context: org-member + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_macos_conda_py3.8_cpu_upload + requires: + - nightly_binary_macos_conda_py3.8_cpu +# - binary_macos_conda: +# cu_version: cpu # filters: # branches: # only: nightly # tags: # only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_wheel_py3.8_cu102 -# python_version: '3.8' -# - binary_wheel_upload: +# name: nightly_binary_macos_conda_py3.9_cpu +# python_version: '3.9' +# wheel_docker_image: pytorch/manylinux-cuda102 +# - binary_conda_upload: # context: org-member # filters: # branches: # only: nightly # tags: # only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_wheel_py3.8_cu102_upload +# name: nightly_binary_macos_conda_py3.9_cpu_upload # requires: -# - nightly_binary_win_wheel_py3.8_cu102 -# subfolder: cu102/ - - binary_linux_conda: +# - nightly_binary_macos_conda_py3.9_cpu + - binary_win_conda: cu_version: cpu filters: branches: only: nightly tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.6_cpu + name: nightly_binary_win_conda_py3.6_cpu python_version: '3.6' - wheel_docker_image: pytorch/manylinux-cuda102 - binary_conda_upload: context: org-member filters: @@ -1413,19 +2168,18 @@ workflows: only: nightly tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.6_cpu_upload + name: nightly_binary_win_conda_py3.6_cpu_upload requires: - - nightly_binary_linux_conda_py3.6_cpu - - binary_linux_conda: - cu_version: cu92 + - nightly_binary_win_conda_py3.6_cpu + - binary_win_conda: + cu_version: cu101 filters: branches: only: nightly tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.6_cu92 + name: nightly_binary_win_conda_py3.6_cu101 python_version: '3.6' - wheel_docker_image: pytorch/manylinux-cuda92 - binary_conda_upload: context: org-member filters: @@ -1433,19 +2187,18 @@ workflows: only: nightly tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.6_cu92_upload + name: nightly_binary_win_conda_py3.6_cu101_upload requires: - - nightly_binary_linux_conda_py3.6_cu92 - - binary_linux_conda: - cu_version: cu101 + - nightly_binary_win_conda_py3.6_cu101 + - binary_win_conda: + cu_version: cu102 filters: branches: only: nightly tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.6_cu101 + name: nightly_binary_win_conda_py3.6_cu102 python_version: '3.6' - wheel_docker_image: pytorch/manylinux-cuda101 - binary_conda_upload: context: org-member filters: @@ -1453,19 +2206,18 @@ workflows: only: nightly tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.6_cu101_upload + name: nightly_binary_win_conda_py3.6_cu102_upload requires: - - nightly_binary_linux_conda_py3.6_cu101 - - binary_linux_conda: - cu_version: cu102 + - nightly_binary_win_conda_py3.6_cu102 + - binary_win_conda: + cu_version: cu111 filters: branches: only: nightly tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.6_cu102 + name: nightly_binary_win_conda_py3.6_cu111 python_version: '3.6' - wheel_docker_image: pytorch/manylinux-cuda102 - binary_conda_upload: context: org-member filters: @@ -1473,19 +2225,18 @@ workflows: only: nightly tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.6_cu102_upload + name: nightly_binary_win_conda_py3.6_cu111_upload requires: - - nightly_binary_linux_conda_py3.6_cu102 - - binary_linux_conda: + - nightly_binary_win_conda_py3.6_cu111 + - binary_win_conda: cu_version: cpu filters: branches: only: nightly tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.7_cpu + name: nightly_binary_win_conda_py3.7_cpu python_version: '3.7' - wheel_docker_image: pytorch/manylinux-cuda102 - binary_conda_upload: context: org-member filters: @@ -1493,19 +2244,18 @@ workflows: only: nightly tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.7_cpu_upload + name: nightly_binary_win_conda_py3.7_cpu_upload requires: - - nightly_binary_linux_conda_py3.7_cpu - - binary_linux_conda: - cu_version: cu92 + - nightly_binary_win_conda_py3.7_cpu + - binary_win_conda: + cu_version: cu101 filters: branches: only: nightly tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.7_cu92 + name: nightly_binary_win_conda_py3.7_cu101 python_version: '3.7' - wheel_docker_image: pytorch/manylinux-cuda92 - binary_conda_upload: context: org-member filters: @@ -1513,19 +2263,18 @@ workflows: only: nightly tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.7_cu92_upload + name: nightly_binary_win_conda_py3.7_cu101_upload requires: - - nightly_binary_linux_conda_py3.7_cu92 - - binary_linux_conda: - cu_version: cu101 + - nightly_binary_win_conda_py3.7_cu101 + - binary_win_conda: + cu_version: cu102 filters: branches: only: nightly tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.7_cu101 + name: nightly_binary_win_conda_py3.7_cu102 python_version: '3.7' - wheel_docker_image: pytorch/manylinux-cuda101 - binary_conda_upload: context: org-member filters: @@ -1533,19 +2282,18 @@ workflows: only: nightly tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.7_cu101_upload + name: nightly_binary_win_conda_py3.7_cu102_upload requires: - - nightly_binary_linux_conda_py3.7_cu101 - - binary_linux_conda: - cu_version: cu102 + - nightly_binary_win_conda_py3.7_cu102 + - binary_win_conda: + cu_version: cu111 filters: branches: only: nightly tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.7_cu102 + name: nightly_binary_win_conda_py3.7_cu111 python_version: '3.7' - wheel_docker_image: pytorch/manylinux-cuda102 - binary_conda_upload: context: org-member filters: @@ -1553,19 +2301,18 @@ workflows: only: nightly tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.7_cu102_upload + name: nightly_binary_win_conda_py3.7_cu111_upload requires: - - nightly_binary_linux_conda_py3.7_cu102 - - binary_linux_conda: + - nightly_binary_win_conda_py3.7_cu111 + - binary_win_conda: cu_version: cpu filters: branches: only: nightly tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.8_cpu + name: nightly_binary_win_conda_py3.8_cpu python_version: '3.8' - wheel_docker_image: pytorch/manylinux-cuda102 - binary_conda_upload: context: org-member filters: @@ -1573,19 +2320,18 @@ workflows: only: nightly tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.8_cpu_upload + name: nightly_binary_win_conda_py3.8_cpu_upload requires: - - nightly_binary_linux_conda_py3.8_cpu - - binary_linux_conda: - cu_version: cu92 + - nightly_binary_win_conda_py3.8_cpu + - binary_win_conda: + cu_version: cu101 filters: branches: only: nightly tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.8_cu92 + name: nightly_binary_win_conda_py3.8_cu101 python_version: '3.8' - wheel_docker_image: pytorch/manylinux-cuda92 - binary_conda_upload: context: org-member filters: @@ -1593,19 +2339,18 @@ workflows: only: nightly tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.8_cu92_upload + name: nightly_binary_win_conda_py3.8_cu101_upload requires: - - nightly_binary_linux_conda_py3.8_cu92 - - binary_linux_conda: - cu_version: cu101 + - nightly_binary_win_conda_py3.8_cu101 + - binary_win_conda: + cu_version: cu102 filters: branches: only: nightly tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.8_cu101 + name: nightly_binary_win_conda_py3.8_cu102 python_version: '3.8' - wheel_docker_image: pytorch/manylinux-cuda101 - binary_conda_upload: context: org-member filters: @@ -1613,19 +2358,18 @@ workflows: only: nightly tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.8_cu101_upload + name: nightly_binary_win_conda_py3.8_cu102_upload requires: - - nightly_binary_linux_conda_py3.8_cu101 - - binary_linux_conda: - cu_version: cu102 + - nightly_binary_win_conda_py3.8_cu102 + - binary_win_conda: + cu_version: cu111 filters: branches: only: nightly tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.8_cu102 + name: nightly_binary_win_conda_py3.8_cu111 python_version: '3.8' - wheel_docker_image: pytorch/manylinux-cuda102 - binary_conda_upload: context: org-member filters: @@ -1633,19 +2377,18 @@ workflows: only: nightly tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_linux_conda_py3.8_cu102_upload + name: nightly_binary_win_conda_py3.8_cu111_upload requires: - - nightly_binary_linux_conda_py3.8_cu102 - - binary_macos_conda: + - nightly_binary_win_conda_py3.8_cu111 + - binary_win_conda: cu_version: cpu filters: branches: only: nightly tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_macos_conda_py3.6_cpu - python_version: '3.6' - wheel_docker_image: pytorch/manylinux-cuda102 + name: nightly_binary_win_conda_py3.9_cpu + python_version: '3.9' - binary_conda_upload: context: org-member filters: @@ -1653,19 +2396,18 @@ workflows: only: nightly tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_macos_conda_py3.6_cpu_upload + name: nightly_binary_win_conda_py3.9_cpu_upload requires: - - nightly_binary_macos_conda_py3.6_cpu - - binary_macos_conda: - cu_version: cpu + - nightly_binary_win_conda_py3.9_cpu + - binary_win_conda: + cu_version: cu101 filters: branches: only: nightly tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_macos_conda_py3.7_cpu - python_version: '3.7' - wheel_docker_image: pytorch/manylinux-cuda102 + name: nightly_binary_win_conda_py3.9_cu101 + python_version: '3.9' - binary_conda_upload: context: org-member filters: @@ -1673,19 +2415,18 @@ workflows: only: nightly tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_macos_conda_py3.7_cpu_upload + name: nightly_binary_win_conda_py3.9_cu101_upload requires: - - nightly_binary_macos_conda_py3.7_cpu - - binary_macos_conda: - cu_version: cpu + - nightly_binary_win_conda_py3.9_cu101 + - binary_win_conda: + cu_version: cu102 filters: branches: only: nightly tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_macos_conda_py3.8_cpu - python_version: '3.8' - wheel_docker_image: pytorch/manylinux-cuda102 + name: nightly_binary_win_conda_py3.9_cu102 + python_version: '3.9' - binary_conda_upload: context: org-member filters: @@ -1693,234 +2434,25 @@ workflows: only: nightly tags: only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ - name: nightly_binary_macos_conda_py3.8_cpu_upload + name: nightly_binary_win_conda_py3.9_cu102_upload requires: - - nightly_binary_macos_conda_py3.8_cpu -# - binary_win_conda: -# cu_version: cpu -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_conda_py3.6_cpu -# python_version: '3.6' -# - binary_conda_upload: -# context: org-member -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_conda_py3.6_cpu_upload -# requires: -# - nightly_binary_win_conda_py3.6_cpu -# - binary_win_conda: -# cu_version: cu92 -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_conda_py3.6_cu92 -# python_version: '3.6' -# - binary_conda_upload: -# context: org-member -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_conda_py3.6_cu92_upload -# requires: -# - nightly_binary_win_conda_py3.6_cu92 -# - binary_win_conda: -# cu_version: cu101 -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_conda_py3.6_cu101 -# python_version: '3.6' -# - binary_conda_upload: -# context: org-member -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_conda_py3.6_cu101_upload -# requires: -# - nightly_binary_win_conda_py3.6_cu101 -# - binary_win_conda: -# cu_version: cu102 -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_conda_py3.6_cu102 -# python_version: '3.6' -# - binary_conda_upload: -# context: org-member -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_conda_py3.6_cu102_upload -# requires: -# - nightly_binary_win_conda_py3.6_cu102 -# - binary_win_conda: -# cu_version: cpu -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_conda_py3.7_cpu -# python_version: '3.7' -# - binary_conda_upload: -# context: org-member -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_conda_py3.7_cpu_upload -# requires: -# - nightly_binary_win_conda_py3.7_cpu -# - binary_win_conda: -# cu_version: cu92 -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_conda_py3.7_cu92 -# python_version: '3.7' -# - binary_conda_upload: -# context: org-member -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_conda_py3.7_cu92_upload -# requires: -# - nightly_binary_win_conda_py3.7_cu92 -# - binary_win_conda: -# cu_version: cu101 -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_conda_py3.7_cu101 -# python_version: '3.7' -# - binary_conda_upload: -# context: org-member -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_conda_py3.7_cu101_upload -# requires: -# - nightly_binary_win_conda_py3.7_cu101 -# - binary_win_conda: -# cu_version: cu102 -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_conda_py3.7_cu102 -# python_version: '3.7' -# - binary_conda_upload: -# context: org-member -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_conda_py3.7_cu102_upload -# requires: -# - nightly_binary_win_conda_py3.7_cu102 -# - binary_win_conda: -# cu_version: cpu -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_conda_py3.8_cpu -# python_version: '3.8' -# - binary_conda_upload: -# context: org-member -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_conda_py3.8_cpu_upload -# requires: -# - nightly_binary_win_conda_py3.8_cpu -# - binary_win_conda: -# cu_version: cu92 -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_conda_py3.8_cu92 -# python_version: '3.8' -# - binary_conda_upload: -# context: org-member -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_conda_py3.8_cu92_upload -# requires: -# - nightly_binary_win_conda_py3.8_cu92 -# - binary_win_conda: -# cu_version: cu101 -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_conda_py3.8_cu101 -# python_version: '3.8' -# - binary_conda_upload: -# context: org-member -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_conda_py3.8_cu101_upload -# requires: -# - nightly_binary_win_conda_py3.8_cu101 -# - binary_win_conda: -# cu_version: cu102 -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_conda_py3.8_cu102 -# python_version: '3.8' -# - binary_conda_upload: -# context: org-member -# filters: -# branches: -# only: nightly -# tags: -# only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ -# name: nightly_binary_win_conda_py3.8_cu102_upload -# requires: -# - nightly_binary_win_conda_py3.8_cu102 + - nightly_binary_win_conda_py3.9_cu102 + - binary_win_conda: + cu_version: cu111 + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_win_conda_py3.9_cu111 + python_version: '3.9' + - binary_conda_upload: + context: org-member + filters: + branches: + only: nightly + tags: + only: /v[0-9]+(\.[0-9]+)*-rc[0-9]+/ + name: nightly_binary_win_conda_py3.9_cu111_upload + requires: + - nightly_binary_win_conda_py3.9_cu111 diff --git a/.circleci/unittest/linux/scripts/environment.yml b/.circleci/unittest/linux/scripts/environment.yml new file mode 100644 index 0000000..ca96279 --- /dev/null +++ b/.circleci/unittest/linux/scripts/environment.yml @@ -0,0 +1,15 @@ +channels: + - pytorch + - conda-forge + - defaults +dependencies: + - numpy + - pytest + - pytest-cov + - codecov + - pip + - ca-certificates + - pycrypto + - pip: + - future + - scipy diff --git a/.circleci/unittest/linux/scripts/install.sh b/.circleci/unittest/linux/scripts/install.sh new file mode 100755 index 0000000..6334cb9 --- /dev/null +++ b/.circleci/unittest/linux/scripts/install.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash + +unset PYTORCH_VERSION +# For unittest, nightly PyTorch is used as the following section, +# so no need to set PYTORCH_VERSION. +# In fact, keeping PYTORCH_VERSION forces us to hardcode PyTorch version in config. + +set -e + +eval "$(./conda/bin/conda shell.bash hook)" +conda activate ./env + +if [ "${CU_VERSION:-}" == cpu ] ; then + cudatoolkit="cpuonly" +else + if [[ ${#CU_VERSION} -eq 4 ]]; then + CUDA_VERSION="${CU_VERSION:2:1}.${CU_VERSION:3:1}" + elif [[ ${#CU_VERSION} -eq 5 ]]; then + CUDA_VERSION="${CU_VERSION:2:2}.${CU_VERSION:4:1}" + fi + echo "Using CUDA $CUDA_VERSION as determined by CU_VERSION" + version="$(python -c "print('.'.join(\"${CUDA_VERSION}\".split('.')[:2]))")" + cudatoolkit="cudatoolkit=${version}" +fi +printf "Installing PyTorch with %s\n" "${cudatoolkit}" +conda install -y -c pytorch-nightly pytorch "${cudatoolkit}" + +printf "* Installing torchcsprng\n" +python setup.py develop \ No newline at end of file diff --git a/.circleci/unittest/linux/scripts/post_process.sh b/.circleci/unittest/linux/scripts/post_process.sh new file mode 100755 index 0000000..b05be6d --- /dev/null +++ b/.circleci/unittest/linux/scripts/post_process.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +set -e + +eval "$(./conda/bin/conda shell.bash hook)" +conda activate ./env + +codecov \ No newline at end of file diff --git a/.circleci/unittest/linux/scripts/run_test.sh b/.circleci/unittest/linux/scripts/run_test.sh new file mode 100755 index 0000000..61f6e3e --- /dev/null +++ b/.circleci/unittest/linux/scripts/run_test.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +set -e + +eval "$(./conda/bin/conda shell.bash hook)" +conda activate ./env + +python -m torch.utils.collect_env +pytest --cov=torchcsprng --junitxml=test-results/junit.xml -v --durations 20 test \ No newline at end of file diff --git a/.circleci/unittest/linux/scripts/setup_env.sh b/.circleci/unittest/linux/scripts/setup_env.sh new file mode 100755 index 0000000..054ebf2 --- /dev/null +++ b/.circleci/unittest/linux/scripts/setup_env.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash + +# This script is for setting up environment in which unit test is ran. +# To speed up the CI time, the resulting environment is cached. +# +# Do not install PyTorch and torchcsprng here, otherwise they also get cached. + +set -e + +this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +root_dir="$(git rev-parse --show-toplevel)" +conda_dir="${root_dir}/conda" +env_dir="${root_dir}/env" + +cd "${root_dir}" + +case "$(uname -s)" in + Darwin*) os=MacOSX;; + *) os=Linux +esac + +# 1. Install conda at ./conda +if [ ! -d "${conda_dir}" ]; then + printf "* Installing conda\n" + wget -O miniconda.sh "http://repo.continuum.io/miniconda/Miniconda3-latest-${os}-x86_64.sh" + bash ./miniconda.sh -b -f -p "${conda_dir}" +fi +eval "$(${conda_dir}/bin/conda shell.bash hook)" + +# 2. Create test environment at ./env +if [ ! -d "${env_dir}" ]; then + printf "* Creating a test environment\n" + conda create --prefix "${env_dir}" -y python="$PYTHON_VERSION" +fi +conda activate "${env_dir}" + +# 3. Install Conda dependencies +printf "* Installing dependencies (except PyTorch)\n" +conda env update --file "${this_dir}/environment.yml" --prune diff --git a/.circleci/unittest/windows/scripts/environment.yml b/.circleci/unittest/windows/scripts/environment.yml new file mode 100644 index 0000000..ca96279 --- /dev/null +++ b/.circleci/unittest/windows/scripts/environment.yml @@ -0,0 +1,15 @@ +channels: + - pytorch + - conda-forge + - defaults +dependencies: + - numpy + - pytest + - pytest-cov + - codecov + - pip + - ca-certificates + - pycrypto + - pip: + - future + - scipy diff --git a/.circleci/unittest/windows/scripts/install.sh b/.circleci/unittest/windows/scripts/install.sh new file mode 100644 index 0000000..deba8f6 --- /dev/null +++ b/.circleci/unittest/windows/scripts/install.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash + +unset PYTORCH_VERSION +# For unittest, nightly PyTorch is used as the following section, +# so no need to set PYTORCH_VERSION. +# In fact, keeping PYTORCH_VERSION forces us to hardcode PyTorch version in config. + +set -e + +this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" + +eval "$(./conda/Scripts/conda.exe 'shell.bash' 'hook')" +conda activate ./env + +if [ "${CU_VERSION:-}" == cpu ] ; then + cudatoolkit="cpuonly" +else + if [[ ${#CU_VERSION} -eq 4 ]]; then + CUDA_VERSION="${CU_VERSION:2:1}.${CU_VERSION:3:1}" + elif [[ ${#CU_VERSION} -eq 5 ]]; then + CUDA_VERSION="${CU_VERSION:2:2}.${CU_VERSION:4:1}" + fi + echo "Using CUDA $CUDA_VERSION as determined by CU_VERSION" + version="$(python -c "print('.'.join(\"${CUDA_VERSION}\".split('.')[:2]))")" + cudatoolkit="cudatoolkit=${version}" +fi +printf "Installing PyTorch with %s\n" "${cudatoolkit}" +conda install -y -c pytorch-nightly pytorch "${cudatoolkit}" + +printf "* Installing torchcsprng\n" +"$this_dir/vc_env_helper.bat" python setup.py develop \ No newline at end of file diff --git a/.circleci/unittest/windows/scripts/install_conda.bat b/.circleci/unittest/windows/scripts/install_conda.bat new file mode 100644 index 0000000..6612fba --- /dev/null +++ b/.circleci/unittest/windows/scripts/install_conda.bat @@ -0,0 +1 @@ +start /wait "" "%miniconda_exe%" /S /InstallationType=JustMe /RegisterPython=0 /AddToPath=0 /D=%tmp_conda% \ No newline at end of file diff --git a/.circleci/unittest/windows/scripts/post_process.sh b/.circleci/unittest/windows/scripts/post_process.sh new file mode 100644 index 0000000..2a1ac63 --- /dev/null +++ b/.circleci/unittest/windows/scripts/post_process.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +set -e + +eval "$(./conda/Scripts/conda.exe 'shell.bash' 'hook')" +conda activate ./env + +#codecov diff --git a/.circleci/unittest/windows/scripts/run_test.sh b/.circleci/unittest/windows/scripts/run_test.sh new file mode 100644 index 0000000..02c6327 --- /dev/null +++ b/.circleci/unittest/windows/scripts/run_test.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +set -e + +eval "$(./conda/Scripts/conda.exe 'shell.bash' 'hook')" +conda activate ./env + +python -m torch.utils.collect_env +pytest --cov=torchcsprng --junitxml=test-results/junit.xml -v --durations 20 test \ No newline at end of file diff --git a/.circleci/unittest/windows/scripts/setup_env.sh b/.circleci/unittest/windows/scripts/setup_env.sh new file mode 100644 index 0000000..6a73927 --- /dev/null +++ b/.circleci/unittest/windows/scripts/setup_env.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash + +# This script is for setting up environment in which unit test is ran. +# To speed up the CI time, the resulting environment is cached. +# +# Do not install PyTorch and torchcsprng here, otherwise they also get cached. + +set -e + +this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +root_dir="$(git rev-parse --show-toplevel)" +conda_dir="${root_dir}/conda" +env_dir="${root_dir}/env" + +cd "${root_dir}" + +# 1. Install conda at ./conda +if [ ! -d "${conda_dir}" ]; then + printf "* Installing conda\n" + export tmp_conda="$(echo $conda_dir | tr '/' '\\')" + export miniconda_exe="$(echo $root_dir | tr '/' '\\')\\miniconda.exe" + curl --output miniconda.exe https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe -O + "$this_dir/install_conda.bat" + unset tmp_conda + unset miniconda_exe +fi + +eval "$(${conda_dir}/Scripts/conda.exe 'shell.bash' 'hook')" + +# 2. Create test environment at ./env +if [ ! -d "${env_dir}" ]; then + printf "* Creating a test environment\n" + conda create --prefix "${env_dir}" -y python="$PYTHON_VERSION" +fi +conda activate "${env_dir}" + +# 3. Install Conda dependencies +printf "* Installing dependencies (except PyTorch)\n" +conda env update --file "${this_dir}/environment.yml" --prune \ No newline at end of file diff --git a/.circleci/unittest/windows/scripts/vc_env_helper.bat b/.circleci/unittest/windows/scripts/vc_env_helper.bat new file mode 100644 index 0000000..9410135 --- /dev/null +++ b/.circleci/unittest/windows/scripts/vc_env_helper.bat @@ -0,0 +1,39 @@ +@echo on + +set VC_VERSION_LOWER=16 +set VC_VERSION_UPPER=17 + +for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [%VC_VERSION_LOWER%^,%VC_VERSION_UPPER%^) -property installationPath`) do ( + if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" ( + set "VS15INSTALLDIR=%%i" + set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat" + goto vswhere + ) +) + +:vswhere +if "%VSDEVCMD_ARGS%" == "" ( + call "%VS15VCVARSALL%" x64 || exit /b 1 +) else ( + call "%VS15VCVARSALL%" x64 %VSDEVCMD_ARGS% || exit /b 1 +) + +@echo on + +set DISTUTILS_USE_SDK=1 + +set args=%1 +shift +:start +if [%1] == [] goto done +set args=%args% %1 +shift +goto start + +:done +if "%args%" == "" ( + echo Usage: vc_env_helper.bat [command] [args] + echo e.g. vc_env_helper.bat cl /c test.cpp +) + +%args% || exit /b 1 diff --git a/.gitignore b/.gitignore index 4404a2b..ee0c254 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,10 @@ +.idea +.vscode +# do not change or delete this comment - `python setup.py clean` deletes everything after this line dist/ build/ *.egg-info/ -.idea -.vscode -torch_csprng/version.py +torchcsprng/version.py */__pycache__ +.pytest_cache *.so diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..b91e23b --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,76 @@ +# Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as +contributors and maintainers pledge to make participation in our project and +our community a harassment-free experience for everyone, regardless of age, body +size, disability, ethnicity, sex characteristics, gender identity and expression, +level of experience, education, socio-economic status, nationality, personal +appearance, race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment +include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery and unwelcome sexual attention or +advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or electronic +address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a +professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable +behavior and are expected to take appropriate and fair corrective action in +response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies within all project spaces, and it also applies when +an individual is representing the project or its community in public spaces. +Examples of representing a project or community include using an official +project e-mail address, posting via an official social media account, or acting +as an appointed representative at an online or offline event. Representation of +a project may be further defined and clarified by project maintainers. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the project team at . All +complaints will be reviewed and investigated and will result in a response that +is deemed necessary and appropriate to the circumstances. The project team is +obligated to maintain confidentiality with regard to the reporter of an incident. +Further details of specific enforcement policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good +faith may face temporary or permanent repercussions as determined by other +members of the project's leadership. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, +available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see +https://www.contributor-covenant.org/faq diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..fdc1528 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,31 @@ +# Contributing to csprng +We want to make contributing to this project as easy and transparent as +possible. + +## Pull Requests +We actively welcome your pull requests. + +1. Fork the repo and create your branch from `master`. +2. If you've added code that should be tested, add tests. +3. If you've changed APIs, update the documentation. +4. Ensure the test suite passes. +5. Make sure your code lints. +6. If you haven't already, complete the Contributor License Agreement ("CLA"). + +## Contributor License Agreement ("CLA") +In order to accept your pull request, we need you to submit a CLA. You only need +to do this once to work on any of Facebook's open source projects. + +Complete your CLA here: + +## Issues +We use GitHub issues to track public bugs. Please ensure your description is +clear and has sufficient instructions to be able to reproduce the issue. + +Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe +disclosure of security bugs. In those cases, please go through the process +outlined on that page and do not file a public issue. + +## License +By contributing to csprng, you agree that your contributions will be licensed +under the LICENSE file in the root directory of this source tree. \ No newline at end of file diff --git a/README.md b/README.md index 792c546..128e327 100644 --- a/README.md +++ b/README.md @@ -1,33 +1,54 @@ # PyTorch/CSPRNG -CSPRNG is a [PyTorch C++/CUDA extension](https://pytorch.org/tutorials/advanced/cpp_extension.html) that provides [cryptographically secure pseudorandom number generators](https://en.wikipedia.org/wiki/Cryptographically_secure_pseudorandom_number_generator) for PyTorch. - [![CircleCI](https://circleci.com/gh/pytorch/csprng.svg?style=shield&circle-token=64701692dd7f13f31019612289f0200fdb661dc2)](https://circleci.com/gh/pytorch/csprng) +torchcsprng is a [PyTorch C++/CUDA extension](https://pytorch.org/tutorials/advanced/cpp_extension.html) that provides: + +- [AES](https://en.wikipedia.org/wiki/Advanced_Encryption_Standard) 128-bit encryption/decryption in two modes: [ECB](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Electronic_codebook_(ECB)) and [CTR](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Counter_(CTR)) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pytorch/csprng/blob/master/examples/encrypt_decrypt.ipynb) +- [cryptographically secure pseudorandom number generators](https://en.wikipedia.org/wiki/Cryptographically_secure_pseudorandom_number_generator) for PyTorch. [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pytorch/csprng/blob/master/examples/csprng.ipynb) + ## Design -CSPRNG generates a random 128-bits key on CPU using one of its generators and runs +torchcsprng generates a random 128-bit key on CPU using one of its generators and runs [AES128](https://en.wikipedia.org/wiki/Advanced_Encryption_Standard) in [CTR mode](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Counter_(CTR)) -mode either on CPU or on GPU using CUDA to generate random 128 bits state and apply transformation function to map it to target tensor values. + either on CPU or on GPU using CUDA to generate a random 128 bit state and apply a transformation function to map it to target tensor values. This approach is based on [Parallel Random Numbers: As Easy as 1, 2, 3(John K. Salmon, Mark A. Moraes, Ron O. Dror, and David E. Shaw, D. E. Shaw Research)](http://www.thesalmons.org/john/random123/papers/random123sc11.pdf). -It makes CSPRNG both crypto-secure and parallel on CUDA and CPU. +It makes torchcsprng both crypto-secure and parallel on CUDA and CPU. ![CSPRNG architecture](.github/csprng_architecture.png) Advantages: -- A user can choose either seed-based(for testing) or random device based(fully crypto-secure) generators +- The user can choose either seed-based(for testing) or random device based(fully crypto-secure) generators - One generator instance for both CPU and CUDA tensors(because the encryption key is always generated on CPU) -- CPU random number generation is also parallel(unlike default PyTorch CPU generator) +- CPU random number generation is also parallel(unlike the default PyTorch CPU generator) ## Features -CSPRNG exposes two methods to create crypto-secure and non-crypto-secure PRNGs: +torchcsprng 0.2.0 exposes new API for tensor encryption/decryption. Tensor encryption/decryption API is dtype agnostic, so a tensor of any dtype can be encrypted and the result can be stored to a tensor of any dtype. An encryption key also can be a tensor of any dtype. Currently torchcsprng supports [AES](https://en.wikipedia.org/wiki/Advanced_Encryption_Standard) cipher with 128-bit key in two modes: [ECB](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Electronic_codebook_(ECB)) and [CTR](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Counter_(CTR)). + +* `torchcsprng.encrypt(input: Tensor, output: Tensor, key: Tensor, cipher: string, mode: string)` + +> - `input` tensor can be any CPU or CUDA tensor of any dtype and size in bytes(zero-padding is used to make its size in bytes divisible by block size in bytes) +> - `output` tensor can have any dtype and the same device as `input` tensor and the size in bytes rounded up to the block size in bytes(16 bytes for AES 128) +> - `key` tensor can have any dtype and the same device as `input` tensor and size in bytes equal to 16 for AES 128 +> - `cipher` currently can be only one supported value `"aes128"` +> - `mode` currently can be either [`"ecb"`](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Electronic_codebook_(ECB)) or [`"ctr"`](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Counter_(CTR)) + +* `torchcsprng.decrypt(input: Tensor, output: Tensor, key: Tensor, cipher: string, mode: string)` + +> - `input` tensor can be any CPU or CUDA tensor of any dtype with size in bytes divisible by the block size in bytes(16 bytes for AES 128) +> - `output` tensor can have any dtype but the same device as `input` tensor and the same size in bytes as `input` tensor +> - `key` tensor can have any dtype and the same device as `input` tensor and size in bytes equal to 16 for AES 128 +> - `cipher` currently can be only one supported value `"aes128"` +> - `mode` currently can be either [`"ecb"`](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Electronic_codebook_(ECB)) or [`"ctr"`](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Counter_(CTR)) + +torchcsprng exposes two methods to create crypto-secure and non-crypto-secure PRNGs: -| Method to create PRNG | Is crypto-secure? | Has seed? | Underlying implementation | -|----------------------------------------------------|-------------------|-----------|------------------------------------------------------------------------------------------------------------------------------------| -| create_random_device_generator(token: string=None) | yes | no | See [std::random_device](https://en.cppreference.com/w/cpp/numeric/random/random_device) and [it's constructor](https://en.cppreference.com/w/cpp/numeric/random/random_device/random_device). The implementation in libstdc++ expects token to name the source of random bytes. Possible token values include "default", "rand_s", "rdseed", "rdrand", "rdrnd", "/dev/urandom", "/dev/random", "mt19937", and integer string specifying the seed of the mt19937 engine. (Token values other than "default" are only valid for certain targets.) If token=None then constructs a new std::random_device object with an implementation-defined token. | -| create_mt19937_generator(seed: int=None) | no | yes | See [std::mt19937](https://en.cppreference.com/w/cpp/numeric/random/mersenne_twister_engine) and [it's constructor](https://en.cppreference.com/w/cpp/numeric/random/mersenne_twister_engine/mersenne_twister_engine). Constructs a mersenne_twister_engine object, and initializes its internal state sequence to pseudo-random values. If seed=None then seeds the engine with default_seed.| +| Method to create PRNG | Is crypto-secure? | Has seed? | Underlying implementation | +|----------------------------------------------------|-------------------|-----------|---------------------------| +| create_random_device_generator(token: string=None) | yes | no | See [std::random_device](https://en.cppreference.com/w/cpp/numeric/random/random_device) and [its constructor](https://en.cppreference.com/w/cpp/numeric/random/random_device/random_device). The implementation in libstdc++ expects token to name the source of random bytes. Possible token values include "default", "rand_s", "rdseed", "rdrand", "rdrnd", "/dev/urandom", "/dev/random", "mt19937", and integer string specifying the seed of the mt19937 engine. (Token values other than "default" are only valid for certain targets.) If token=None then constructs a new std::random_device object with an implementation-defined token. | +| create_mt19937_generator(seed: int=None) | no | yes | See [std::mt19937](https://en.cppreference.com/w/cpp/numeric/random/mersenne_twister_engine) and [its constructor](https://en.cppreference.com/w/cpp/numeric/random/mersenne_twister_engine/mersenne_twister_engine). Constructs a mersenne_twister_engine object, and initializes its internal state sequence to pseudo-random values. If seed=None then seeds the engine with default_seed.| The following list of methods supports all forementioned PRNGs: @@ -42,68 +63,151 @@ The following list of methods supports all forementioned PRNGs: | log_normal_(mean, std) | yes | yes | | geometric_(p) | yes | yes | | exponential_(lambda) | yes | yes | +| randperm(n) | yes* | yes | + +* the calculations are done on CPU and the result is copied to CUDA + +## Installation + +CSPRNG works with Python 3.6-3.9 on the following operating systems and can be used with PyTorch tensors on the following devices: + +| Tensor Device Type | Linux | macOS | MS Window | +|--------------------|-----------|---------------|----------------| +| CPU | Supported | Supported | Supported | +| CUDA | Supported | Not Supported | Supported since 0.2.0 | + +The following is the corresponding CSPRNG versions and supported Python versions. + +| PyTorch | CSPRNG | Python | CUDA | +|---------|--------|----------|------------------| +| 1.8.0 | 0.2.0 | 3.7-3.9 | 10.1, 10.2, 11.1 | +| 1.7.1 | 0.1.4 | 3.6-3.8 | 9.2, 10.1, 10.2 | +| 1.7.0 | 0.1.3 | 3.6-3.8 | 9.2, 10.1, 10.2 | +| 1.6.0 | 0.1.2 | 3.6-3.8 | 9.2, 10.1, 10.2 | + + +### Binary Installation -## How to build +Anaconda: -Since CSPRNG is C++/CUDA extension it uses setuptools, just run `python setup.py install` to build and install it. +| OS | CUDA | | +|---------------|------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Linux/Windows | 10.1

10.2

11.1

None | conda install torchcsprng cudatoolkit=10.1 -c pytorch -c conda-forge

conda install torchcsprng cudatoolkit=10.2 -c pytorch -c conda-forge

conda install torchcsprng cudatoolkit=11.1 -c pytorch -c conda-forge

conda install torchcsprng cpuonly -c pytorch -c conda-forge | +| macOS | None | conda install torchcsprng -c pytorch | -## How to use +pip: +| OS | CUDA | | +|---------------|------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Linux/Windows | 10.1

10.2

11.1

None | pip install torchcsprng==0.2.0+cu101 torch==1.8.0+cu101 -f https://download.pytorch.org/whl/cu101/torch_stable.html

pip install torchcsprng==0.2.0 torch==1.8.0 -f https://download.pytorch.org/whl/cu102/torch_stable.html

pip install torchcsprng==0.2.0+cu111 torch==1.8.0+cu111 -f https://download.pytorch.org/whl/cu111/torch_stable.html

pip install torchcsprng==0.2.0+cpu torch==1.8.0+cpu -f https://download.pytorch.org/whl/cpu/torch_stable.html | +| macOS | None | pip install torchcsprng torch | + +### Nightly builds: + +Anaconda: + +| OS | CUDA | | +|---------------|------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Linux/Windows | 10.1

10.2

11.1

None | conda install torchcsprng cudatoolkit=10.1 -c pytorch-nightly -c conda-forge

conda install torchcsprng cudatoolkit=10.2 -c pytorch-nightly -c conda-forge

conda install torchcsprng cudatoolkit=11.1 -c pytorch-nightly -c conda-forge

conda install torchcsprng cpuonly -c pytorch-nightly -c conda-forge | +| macOS | None | conda install torchcsprng -c pytorch-nightly | + +pip: + +| OS | CUDA | | +|---------------|------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Linux/Windows | 10.1

10.2

11.1

None | pip install --pre torchcsprng -f https://download.pytorch.org/whl/nightly/cu101/torch_nightly.html

pip install --pre torchcsprng -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html

pip install --pre torchcsprng -f https://download.pytorch.org/whl/nightly/cu111/torch_nightly.html

pip install --pre torchcsprng -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html | +| macOS | None | pip install --pre torchcsprng -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html | + +### From Source + +torchcsprng is a Python C++/CUDA extension that depends on PyTorch. In order to build CSPRNG from source it is required to have Python(>=3.7) with PyTorch(>=1.8.0) installed and C++ compiler(gcc/clang for Linux, XCode for macOS, Visual Studio for MS Windows). +To build torchcsprng you can run the following: +```console +python setup.py install +``` +By default, GPU support is built if CUDA is found and torch.cuda.is_available() is True. Additionally, it is possible to force building GPU support by setting the FORCE_CUDA=1 environment variable, which is useful when building a docker image. + +## Getting Started + +The torchcsprng API is available in `torchcsprng` module: ```python import torch -import torch_csprng as csprng - -# Create crypto-secure PRNG from /dev/urandom: +import torchcsprng as csprng +``` +Create crypto-secure PRNG from /dev/urandom: +```python urandom_gen = csprng.create_random_device_generator('/dev/urandom') +``` -# Create empty boolean tensor on CUDA and initialize it with random values from urandom_gen: -print(torch.empty(10, dtype=torch.bool, device='cuda').random_(generator=urandom_gen)) +Create empty boolean tensor on CUDA and initialize it with random values from urandom_gen: +```python +torch.empty(10, dtype=torch.bool, device='cuda').random_(generator=urandom_gen) +``` +``` tensor([ True, False, False, True, False, False, False, True, False, False], device='cuda:0') +``` -# Create empty int16 tensor on CUDA and initialize it with random values in range [0, 100) from urandom_gen: -print(torch.empty(10, dtype=torch.int16, device='cuda').random_(100, generator=urandom_gen)) +Create empty int16 tensor on CUDA and initialize it with random values in range [0, 100) from urandom_gen: +```python +torch.empty(10, dtype=torch.int16, device='cuda').random_(100, generator=urandom_gen) +``` +``` tensor([59, 20, 68, 51, 18, 37, 7, 54, 74, 85], device='cuda:0', dtype=torch.int16) +``` -# Create non-crypto-secure MT19937 PRNG: +Create non-crypto-secure MT19937 PRNG: +```python mt19937_gen = csprng.create_mt19937_generator() - -print(torch.empty(10, dtype=torch.int64, device='cuda').random_(torch.iinfo(torch.int64).min, to=None, generator=mt19937_gen)) +torch.empty(10, dtype=torch.int64, device='cuda').random_(torch.iinfo(torch.int64).min, to=None, generator=mt19937_gen) +``` +``` tensor([-7584783661268263470, 2477984957619728163, -3472586837228887516, -5174704429717287072, 4125764479102447192, -4763846282056057972, -182922600982469112, -498242863868415842, 728545841957750221, 7740902737283645074], device='cuda:0') +``` -# Create crypto-secure PRNG from default random device: +Create crypto-secure PRNG from default random device: +```python default_device_gen = csprng.create_random_device_generator() - -print(torch.randn(10, device='cuda', generator=default_device_gen)) +torch.randn(10, device='cuda', generator=default_device_gen) +``` +``` tensor([ 1.2885, 0.3240, -1.1813, 0.8629, 0.5714, 2.3720, -0.5627, -0.5551, -0.6304, 0.1090], device='cuda:0') +``` -# Create non-crypto-secure MT19937 PRNG with seed +Create non-crypto-secure MT19937 PRNG with seed: +```python mt19937_gen = csprng.create_mt19937_generator(42) +torch.empty(10, device='cuda').geometric_(p=0.2, generator=mt19937_gen) +``` +``` +tensor([ 7., 1., 8., 1., 11., 3., 1., 1., 5., 10.], device='cuda:0') +``` -print(torch.empty(10, device='cuda').geometric_(p=0.2, generator=mt19937_gen)) +Recreate MT19937 PRNG with the same seed: +```python +mt19937_gen = csprng.create_mt19937_generator(42) +torch.empty(10, device='cuda').geometric_(p=0.2, generator=mt19937_gen) +``` +``` tensor([ 7., 1., 8., 1., 11., 3., 1., 1., 5., 10.], device='cuda:0') +``` -print(torch.empty(10, device='cuda').geometric_(p=0.2, generator=mt19937_gen)) -tensor([ 1., 1., 1., 6., 1., 13., 5., 1., 3., 4.], device='cuda:0') +## Contributing +We appreciate all contributions. If you are planning to contribute back bug-fixes, please do so without any further discussion. If you plan to contribute new features, utility functions or extensions, please first open an issue and discuss the feature with us. -print(torch.empty(10, device='cuda').geometric_(p=0.2, generator=mt19937_gen)) -tensor([14., 5., 4., 5., 1., 1., 8., 1., 7., 10.], device='cuda:0') -# Recreate MT19937 PRNG with the same seed -mt19937_gen = csprng.create_mt19937_generator(42) -print(torch.empty(10, device='cuda').geometric_(p=0.2, generator=mt19937_gen)) -tensor([ 7., 1., 8., 1., 11., 3., 1., 1., 5., 10.], device='cuda:0') +## License -print(torch.empty(10, device='cuda').geometric_(p=0.2, generator=mt19937_gen)) -tensor([ 1., 1., 1., 6., 1., 13., 5., 1., 3., 4.], device='cuda:0') +torchcsprng is BSD 3-clause licensed. See the license file [here](https://github.com/pytorch/csprng/blob/master/LICENSE) -print(torch.empty(10, device='cuda').geometric_(p=0.2, generator=mt19937_gen)) -tensor([14., 5., 4., 5., 1., 1., 8., 1., 7., 10.], device='cuda:0') +## [Terms of Use](https://opensource.facebook.com/legal/terms) -``` +## [Privacy Policy](https://opensource.facebook.com/legal/privacy) + +Copyright © 2020 Meta Platforms, Inc diff --git a/examples/csprng.ipynb b/examples/csprng.ipynb new file mode 100644 index 0000000..1f6b477 --- /dev/null +++ b/examples/csprng.ipynb @@ -0,0 +1,226 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "csprng.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "Lpno_zUJT8ms" + }, + "source": [ + "# Cryptographically secure pseudorandom number generators for PyTorch\n", + "\n", + "The torchcsprng API is available in `torchcsprng` module:\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "db4YYky-PDI_" + }, + "source": [ + "!pip install torchcsprng==0.2.0 torch==1.8.0 -f https://download.pytorch.org/whl/cu101/torch_stable.html" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "O1s_j8CPPHSn" + }, + "source": [ + "import torch\n", + "import torchcsprng as csprng" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "o1Kz25IoS9m-" + }, + "source": [ + "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HLlLxkDIUWCG" + }, + "source": [ + "Create crypto-secure PRNG from /dev/urandom:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "yyyYlq5kUQss" + }, + "source": [ + "urandom_gen = csprng.create_random_device_generator('/dev/urandom')" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xbUCnJfkUdUI" + }, + "source": [ + "Create empty boolean tensor on the `device` and initialize it with random values from `urandom_gen`:\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "zmj_VlIzUYIO" + }, + "source": [ + "torch.empty(10, dtype=torch.bool, device=device).random_(generator=urandom_gen)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ycODsYhtUud9" + }, + "source": [ + "Create empty int16 tensor on the `device` and initialize it with random values in range [0, 100) from `urandom_gen`:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "uel-jbW9UlZH" + }, + "source": [ + "torch.empty(10, dtype=torch.int16, device=device).random_(100, generator=urandom_gen)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1jXW1FEmVMW_" + }, + "source": [ + "Create non-crypto-secure MT19937 PRNG:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "sL-cwFGfVOrp" + }, + "source": [ + "mt19937_gen = csprng.create_mt19937_generator()\n", + "torch.empty(10, dtype=torch.int64, device=device).random_(torch.iinfo(torch.int64).min, to=None, generator=mt19937_gen)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KW96wT4UVXBm" + }, + "source": [ + "Create crypto-secure PRNG from default random device:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "tjwbuE6FVRgm" + }, + "source": [ + "default_device_gen = csprng.create_random_device_generator()\n", + "torch.randn(10, device=device, generator=default_device_gen)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qYgdkZAYVfZT" + }, + "source": [ + "Create non-crypto-secure MT19937 PRNG with seed:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "xjOsYOxxVbzg" + }, + "source": [ + "mt19937_gen = csprng.create_mt19937_generator(42)\n", + "first = torch.empty(10, device=device).geometric_(p=0.2, generator=mt19937_gen)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cV77v7tHVlRd" + }, + "source": [ + "Recreate MT19937 PRNG with the same seed:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "i0O2lC0hVjAg" + }, + "source": [ + "mt19937_gen = csprng.create_mt19937_generator(42)\n", + "second = torch.empty(10, device=device).geometric_(p=0.2, generator=mt19937_gen)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OcgSK0mejcef" + }, + "source": [ + "Check that `first` equals to `second`:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "vMx1BRO3jh7L" + }, + "source": [ + "assert (first == second).all()" + ], + "execution_count": null, + "outputs": [] + } + ] +} diff --git a/examples/encrypt_decrypt.ipynb b/examples/encrypt_decrypt.ipynb new file mode 100644 index 0000000..3de8968 --- /dev/null +++ b/examples/encrypt_decrypt.ipynb @@ -0,0 +1,307 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "encrypt_decrypt.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "4JG-7IJgz_dK" + }, + "source": [ + "# PyTorch/CSPRNG encrypt/decrypt examples" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "H8TZemj30JvQ" + }, + "source": [ + "torchcsprng 0.2.0 exposes new API for tensor encryption/decryption. Tensor encryption/decryption API is dtype agnostic, so a tensor of any dtype can be encrypted and the result can be stored to a tensor of any dtype. An encryption key also can be a tensor of any dtype. Currently torchcsprng supports AES cipher with 128-bit key in two modes: ECB and CTR." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "jC1O-C25vI0W" + }, + "source": [ + "!pip install torchcsprng==0.2.0 torch==1.8.0 -f https://download.pytorch.org/whl/cu101/torch_stable.html" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "su2RWWdOrWFU" + }, + "source": [ + "import torch\n", + "import torchcsprng as csprng" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "NHTOLPZ_3254" + }, + "source": [ + "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "17L0sgmy0R6o" + }, + "source": [ + "torchcsprng implementation of AES with 128 bit key requires to have a key tensor of 16 bytes but of any dtype" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "rw7WYZ-50To9" + }, + "source": [ + "key = torch.empty(16, dtype=torch.uint8, device=device).random_(0, 256)\n", + "key" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RRfvyfHM4MY1" + }, + "source": [ + "Alternatively it can be a tensor of 8 elements of `torch.int16` or even 4 elements of `torch.float32`" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rCy01t1-0dtO" + }, + "source": [ + "The size of input tensor is 42 * (32/8) = 168 bytes. AES 128 operates with 16-bytes blocks, so zero-padding of 8 bytes will be used to form 176 bytes(eleven 16-bytes blocks)" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "LcuVmhyU0WTn" + }, + "source": [ + "initial = torch.empty(42, dtype=torch.float32, device=device).normal_(-24.0, 42.0)\n", + "initial" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rPNq2u4e3tlJ" + }, + "source": [ + "torchcsprng requires output tensor to be of the same size in bytes as input tensor rounded up to 16 bytes(AES 128 block size), so if `torch.int64` is dtype of the destination tensor size must be 176 / (64/8) = 22" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "RAJya9GT0gb4" + }, + "source": [ + "encrypted = torch.empty(22, dtype=torch.int64, device=device)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-DCI4QOh4oGX" + }, + "source": [ + "Call `torchcsprng.encrypt` to encrypt `initial` tensor in [ECB](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Electronic_codebook_(ECB)) mode with 128-bit `key` tensor and store the result to `encrypted` tensor." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "TK4OjPRq4lsJ" + }, + "source": [ + "csprng.encrypt(initial, encrypted, key, \"aes128\", \"ecb\")" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yXUAwFHh5PSy" + }, + "source": [ + "Create an output tensor" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "4LtJ-kD446DJ" + }, + "source": [ + "decrypted = torch.empty_like(initial)" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8VcF04mf6Rn5" + }, + "source": [ + "Call `torchcsprng.decrypt` to decrypt `encrypted` tensor in ECB mode with 128-bit `key` tensor and store the result to `decrypted` tensor." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "kojXCFGK5v6l" + }, + "source": [ + "csprng.decrypt(encrypted, decrypted, key, \"aes128\", \"ecb\")" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9dEBSPD6EFSu" + }, + "source": [ + "Let's check that `decrypted` equals to `initial`:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "yOc1ftnM5yyj" + }, + "source": [ + "assert (decrypted == initial).all()" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cQWyteLlE4mQ" + }, + "source": [ + "Another example is to use [CTR](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Counter_(CTR)) mode with 128-bit `key` tensor of 4 elements of dtype `dtype=torch.float32`:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "ZFInqYawD7ks" + }, + "source": [ + "key = torch.empty(4, dtype=torch.float32, device=device).random_()\n", + "key" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FRz94NaZGyRS" + }, + "source": [ + "Let's encrypt 100 elements `torch.bool` tensor and store the result in 56 elements `torch.int16` tensor:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "8uiqxiehF_is" + }, + "source": [ + "initial = torch.empty(100, dtype=torch.bool, device=device).random_()\n", + "initial" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "G0URlmQYGfcW" + }, + "source": [ + "encrypted = torch.empty(56, dtype=torch.int16, device=device)\n", + "csprng.encrypt(initial, encrypted, key, \"aes128\", \"ctr\")" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "U9Zz2oXoHw9Q" + }, + "source": [ + "Decrypt it back and check that `decrypted` equals to `initial`:" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "YXNcdUbXHoPC" + }, + "source": [ + "decrypted = torch.empty_like(initial)\n", + "csprng.decrypt(encrypted, decrypted, key, \"aes128\", \"ctr\")\n", + "decrypted" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "ie7epw1SKrdQ" + }, + "source": [ + "assert (decrypted == initial).all()" + ], + "execution_count": null, + "outputs": [] + } + ] +} diff --git a/packaging/README.md b/packaging/README.md new file mode 100644 index 0000000..20ff064 --- /dev/null +++ b/packaging/README.md @@ -0,0 +1,90 @@ +# Building torchcsprng packages for release + +## Anaconda packages + +### Linux + +```bash +nvidia-docker run -it --ipc=host --rm -v $(pwd):/remote soumith/conda-cuda bash +pushd remote/conda + +./build_csprng.sh 9.0 +./build_csprng.sh 10.0 +./build_csprng.sh cpu + +# copy packages over to /remote +# exit docker +# anaconda upload -u pytorch torchcsprng*.bz2 +``` + +### OSX + +```bash +# create a fresh anaconda environment / install and activate it +conda install -y conda-build anaconda-client +./build_csprng.sh cpu + +# copy packages over to /remote +# exit docker +# anaconda upload -u pytorch torchcsprng*.bz2 +``` + +### Windows + +```bash +# Open `Git Bash` and change dir to `conda` +./build_csprng.sh 9.0 +./build_csprng.sh 10.0 +./build_csprng.sh cpu + +# copy packages to a output directory +# anaconda upload -u pytorch torchcsprng*.bz2 +``` + +## Wheels + +### Linux + +pushd wheel + +```bash +nvidia-docker run -it --ipc=host --rm -v $(pwd):/remote soumith/manylinux-cuda90:latest bash +cd remote +./linux_manywheel.sh cu90 + +rm -rf /usr/local/cuda* +./linux_manywheel.sh cpu +``` + +```bash +nvidia-docker run -it --ipc=host --rm -v $(pwd):/remote soumith/manylinux-cuda100:latest bash +cd remote +./linux_manywheel.sh cu100 +``` + +wheels are in the folders `cpu`, `cu90`, `cu100`. + +You can upload the `cu90` wheels to twine with `twine upload *.whl`. +Which wheels we upload depends on which wheels PyTorch uploads as default, and right now, it's `cu90`. + +### OSX + +```bash +pushd wheel +./osx_wheel.sh +``` + +### Windows + +```cmd +set PYTORCH_REPO=pytorch + +pushd windows +call build_csprng.bat 90 0.3.0 1 +call build_csprng.bat 100 0.3.0 1 +call build_csprng.bat cpu 0.3.0 1 +``` + +wheels are in the current folder. + +You can upload them to twine with `twine upload *.whl` diff --git a/packaging/build_conda.sh b/packaging/build_conda.sh index c628f6e..e0e096d 100755 --- a/packaging/build_conda.sh +++ b/packaging/build_conda.sh @@ -5,10 +5,10 @@ script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" . "$script_dir/pkg_helpers.bash" export BUILD_TYPE=conda -setup_env 0.1.0 +setup_env $(cat "version.txt" | sed "s/\([0-9]*\.[0-9]*\.[0-9]*\).*/\1/g") export SOURCE_ROOT_DIR="$PWD" setup_conda_pytorch_constraint setup_conda_cudatoolkit_constraint setup_visual_studio_constraint setup_junit_results_folder -conda build $CONDA_CHANNEL_FLAGS -c defaults -c conda-forge --no-anaconda-upload --python "$PYTHON_VERSION" packaging/torch_csprng +conda build $CONDA_CHANNEL_FLAGS -c defaults -c conda-forge --no-anaconda-upload --python "$PYTHON_VERSION" packaging/torchcsprng diff --git a/packaging/build_wheel.sh b/packaging/build_wheel.sh index 98726cd..15b85a4 100755 --- a/packaging/build_wheel.sh +++ b/packaging/build_wheel.sh @@ -5,14 +5,50 @@ script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" . "$script_dir/pkg_helpers.bash" export BUILD_TYPE=wheel -setup_env 0.1.0 +setup_env $(cat "version.txt" | sed "s/\([0-9]*\.[0-9]*\.[0-9]*\).*/\1/g") setup_wheel_python pip_install numpy pyyaml future ninja setup_pip_pytorch_version python setup.py clean +# Copy binaries to be included in the wheel distribution +if [[ "$(uname)" == Darwin || "$OSTYPE" == "msys" ]]; then + python_exec="$(which python)" + bin_path=$(dirname $python_exec) + env_path=$(dirname $bin_path) + if [[ "$(uname)" == Darwin ]]; then + # Install delocate to relocate the required binaries + pip_install delocate + fi +else + # Install auditwheel to get some inspection utilities + pip_install auditwheel + + # Point to custom libraries + export LD_LIBRARY_PATH=$(pwd)/ext_libraries/lib:$LD_LIBRARY_PATH + export TORCHCSPRNG_INCLUDE=$(pwd)/ext_libraries/include + export TORCHCSPRNG_LIBRARY=$(pwd)/ext_libraries/lib +fi + if [[ "$OSTYPE" == "msys" ]]; then IS_WHEEL=1 "$script_dir/windows/internal/vc_env_helper.bat" python setup.py bdist_wheel else IS_WHEEL=1 python setup.py bdist_wheel fi + + +if [[ "$(uname)" == Darwin ]]; then + pushd dist/ + python_exec="$(which python)" + bin_path=$(dirname $python_exec) + env_path=$(dirname $bin_path) + for whl in *.whl; do + DYLD_LIBRARY_PATH="$env_path/lib/:$DYLD_LIBRARY_PATH" delocate-wheel -v $whl + done +else + if [[ "$OSTYPE" == "msys" ]]; then + "$script_dir/windows/internal/vc_env_helper.bat" python $script_dir/wheel/relocate.py + else + LD_LIBRARY_PATH="/usr/local/lib:$LD_LIBRARY_PATH" python $script_dir/wheel/relocate.py + fi +fi diff --git a/packaging/conda/build_csprng.sh b/packaging/conda/build_csprng.sh new file mode 100755 index 0000000..44fc0af --- /dev/null +++ b/packaging/conda/build_csprng.sh @@ -0,0 +1,229 @@ +#!/usr/bin/env bash +if [[ -x "/remote/anaconda_token" ]]; then + . /remote/anaconda_token || true +fi + +set -ex + +if [[ "$CIRCLECI" == 'true' ]]; then + export PATH="/usr/local/bin:/usr/local/sbin:/usr/bin:/usr/sbin:/bin:/sbin:.:$PATH" +fi + +# Function to retry functions that sometimes timeout or have flaky failures +retry () { + $* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*) +} + +# Parse arguments and determmine version +########################################################### +if [[ -n "$DESIRED_CUDA" && -n "$TORCHCSPRNG_BUILD_VERSION" && -n "$TORCHCSPRNG_BUILD_NUMBER" ]]; then + desired_cuda="$DESIRED_CUDA" + build_version="$PYTORCH_BUILD_VERSION" + build_number="$PYTORCH_BUILD_NUMBER" +else + if [ "$#" -ne 3 ]; then + echo "Illegal number of parameters. Pass cuda version, pytorch version, build number" + echo "CUDA version should be Mm with no dot, e.g. '80'" + echo "DESIRED_PYTHON should be M.m, e.g. '2.7'" + exit 1 + fi + + desired_cuda="$1" + build_version="$2" + build_number="$3" +fi +if [[ "$desired_cuda" != cpu ]]; then + desired_cuda="$(echo $desired_cuda | tr -d cuda. )" +fi +echo "Building cuda version $desired_cuda and torchcsprng version: $build_version build_number: $build_number" + +if [[ "$desired_cuda" == 'cpu' ]]; then + cpu_only=1 + cuver="cpu" +else + # Switch desired_cuda to be M.m to be consistent with other scripts in + # pytorch/builder + export FORCE_CUDA=1 + cuda_nodot="$desired_cuda" + + if [[ ${#cuda_nodot} -eq 2 ]]; then + desired_cuda="${desired_cuda:0:1}.${desired_cuda:1:1}" + elif [[ ${#cuda_nodot} -eq 3 ]]; then + desired_cuda="${desired_cuda:0:2}.${desired_cuda:2:1}" + else + echo "unknown cuda version $cuda_nodot" + exit 1 + fi + + cuver="cu$cuda_nodot" +fi + +export TORCHCSPRNG_BUILD_VERSION=$build_version +export TORCHCSPRNG_BUILD_NUMBER=$build_number + +if [[ -z "$DESIRED_PYTHON" ]]; then + DESIRED_PYTHON=('3.5' '3.6' '3.7') +fi + +SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" + +if [[ -z "$WIN_PACKAGE_WORK_DIR" ]]; then + WIN_PACKAGE_WORK_DIR="$(echo $(pwd -W) | tr '/' '\\')\\tmp_conda_$(date +%H%M%S)" +fi + +mkdir -p "$WIN_PACKAGE_WORK_DIR" || true +csprng_rootdir="$(realpath ${WIN_PACKAGE_WORK_DIR})/torchcsprng-src" +git config --system core.longpaths true + +if [[ ! -d "$csprng_rootdir" ]]; then + rm -rf "$csprng_rootdir" + git clone "https://github.com/pytorch/csprng" "$csprng_rootdir" + pushd "$csprng_rootdir" + git checkout $PYTORCH_BRANCH + popd +fi + +cd "$SOURCE_DIR" + +export tmp_conda="${WIN_PACKAGE_WORK_DIR}\\conda" +export miniconda_exe="${WIN_PACKAGE_WORK_DIR}\\miniconda.exe" +rm -rf "$tmp_conda" +rm -f "$miniconda_exe" +curl -sSk https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe -o "$miniconda_exe" +"$SOURCE_DIR/install_conda.bat" && rm "$miniconda_exe" +pushd $tmp_conda +export PATH="$(pwd):$(pwd)/Library/usr/bin:$(pwd)/Library/bin:$(pwd)/Scripts:$(pwd)/bin:$PATH" +popd +retry conda install -yq conda-build + +ANACONDA_USER=pytorch-nightly +conda config --set anaconda_upload no + + +export TORCHCSPRNG_PACKAGE_SUFFIX="" +if [[ "$desired_cuda" == 'cpu' ]]; then + export CONDA_CUDATOOLKIT_CONSTRAINT="" + export CONDA_CPUONLY_FEATURE="- cpuonly # [not osx]" + export CUDA_VERSION="None" +else + export CONDA_CPUONLY_FEATURE="" + . ./switch_cuda_version.sh $desired_cuda + if [[ "$desired_cuda" == "10.2" ]]; then + export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=10.2,<10.3 # [not osx]" + elif [[ "$desired_cuda" == "10.1" ]]; then + export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=10.1,<10.2 # [not osx]" + elif [[ "$desired_cuda" == "10.0" ]]; then + export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=10.0,<10.1 # [not osx]" + elif [[ "$desired_cuda" == "9.2" ]]; then + export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=9.2,<9.3 # [not osx]" + elif [[ "$desired_cuda" == "9.0" ]]; then + export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=9.0,<9.1 # [not osx]" + elif [[ "$desired_cuda" == "8.0" ]]; then + export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=8.0,<8.1 # [not osx]" + else + echo "unhandled desired_cuda: $desired_cuda" + exit 1 + fi +fi + +if [[ -z "$PYTORCH_VERSION" ]]; then + export CONDA_CHANNEL_FLAGS="-c pytorch-nightly -c pytorch" + export PYTORCH_VERSION="$(conda search --json 'pytorch[channel=pytorch-nightly]' | \ + python -c "import os, sys, json, re; cuver = '$cuver'; \ + cuver = cuver.replace('cu', 'cuda') if cuver != 'cpu' else cuver; \ + print(re.sub(r'\\+.*$', '', \ + [x['version'] for x in json.load(sys.stdin)['pytorch'] \ + if (x['platform'] == 'darwin' or cuver in x['fn']) \ + and 'py' + os.environ['DESIRED_PYTHON'] in x['fn']][-1]))")" + if [[ -z "$PYTORCH_VERSION" ]]; then + echo "PyTorch version auto detection failed" + echo "No package found for desired_cuda=$desired_cuda and DESIRED_PYTHON=$DESIRED_PYTHON" + exit 1 + fi +else + export CONDA_CHANNEL_FLAGS="-c pytorch -c pytorch-nightly" +fi +if [[ "$desired_cuda" == 'cpu' ]]; then + export CONDA_PYTORCH_BUILD_CONSTRAINT="- pytorch==$PYTORCH_VERSION" + export CONDA_PYTORCH_CONSTRAINT="- pytorch==$PYTORCH_VERSION" +else + export CONDA_PYTORCH_BUILD_CONSTRAINT="- pytorch==${PYTORCH_VERSION}" + export CONDA_PYTORCH_CONSTRAINT="- pytorch==${PYTORCH_VERSION}" +fi + +# Loop through all Python versions to build a package for each +for py_ver in "${DESIRED_PYTHON[@]}"; do + build_string="py${py_ver}_${build_string_suffix}" + folder_tag="${build_string}_$(date +'%Y%m%d')" + + # Create the conda package into this temporary folder. This is so we can find + # the package afterwards, as there's no easy way to extract the final filename + # from conda-build + output_folder="out_$folder_tag" + rm -rf "$output_folder" + mkdir "$output_folder" + + if [[ "$py_ver" == 3.5 ]]; then + export CONDA_TYPING_CONSTRAINT="- typing" + else + export CONDA_TYPING_CONSTRAINT="" + fi + + export VSTOOLCHAIN_PACKAGE=vs2017 + + # We need to build the compiler activation scripts first on Windows + time VSDEVCMD_ARGS=${VSDEVCMD_ARGS[@]} \ + conda build -c "$ANACONDA_USER" \ + --no-anaconda-upload \ + --output-folder "$output_folder" \ + ../$VSTOOLCHAIN_PACKAGE + + cp ../$VSTOOLCHAIN_PACKAGE/conda_build_config.yaml ../torchcsprng/conda_build_config.yaml + + conda config --set anaconda_upload no + echo "Calling conda-build at $(date)" + if [[ "$desired_cuda" == "9.2" ]]; then + time CMAKE_ARGS=${CMAKE_ARGS[@]} \ + BUILD_VERSION="$TORCHCSPRNG_BUILD_VERSION" \ + CU_VERSION="$cuver" \ + SOURCE_ROOT_DIR="$csprng_rootdir" \ + conda build -c "$ANACONDA_USER" \ + -c defaults \ + -c conda-forge \ + -c "numba/label/dev" \ + --no-anaconda-upload \ + --python "$py_ver" \ + --output-folder "$output_folder" \ + --no-verify \ + --no-test \ + ../torchcsprng + else + time CMAKE_ARGS=${CMAKE_ARGS[@]} \ + BUILD_VERSION="$TORCHCSPRNG_BUILD_VERSION" \ + CU_VERSION="$cuver" \ + SOURCE_ROOT_DIR="$csprng_rootdir" \ + conda build -c "$ANACONDA_USER" \ + -c defaults \ + -c conda-forge \ + --no-anaconda-upload \ + --python "$py_ver" \ + --output-folder "$output_folder" \ + --no-verify \ + --no-test \ + ../torchcsprng + fi + echo "Finished conda-build at $(date)" + + # Extract the package for testing + ls -lah "$output_folder" + built_package="$(find $output_folder/ -name '*torchcsprng*.tar.bz2')" + + # Copy the built package to the host machine for persistence before testing + if [[ -n "$PYTORCH_FINAL_PACKAGE_DIR" ]]; then + mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true + cp "$built_package" "$PYTORCH_FINAL_PACKAGE_DIR/" + fi +done + + +set +e diff --git a/packaging/conda/install_conda.bat b/packaging/conda/install_conda.bat new file mode 100644 index 0000000..6052ad0 --- /dev/null +++ b/packaging/conda/install_conda.bat @@ -0,0 +1 @@ +start /wait "" "%miniconda_exe%" /S /InstallationType=JustMe /RegisterPython=0 /AddToPath=0 /D=%tmp_conda% diff --git a/packaging/conda/switch_cuda_version.sh b/packaging/conda/switch_cuda_version.sh new file mode 100755 index 0000000..342def9 --- /dev/null +++ b/packaging/conda/switch_cuda_version.sh @@ -0,0 +1,28 @@ +if [[ "$OSTYPE" == "msys" ]]; then + CUDA_DIR="/c/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v$1" +else + CUDA_DIR="/usr/local/cuda-$1" +fi + +if ! ls "$CUDA_DIR" +then + echo "folder $CUDA_DIR not found to switch" +fi + +echo "Switching symlink to $CUDA_DIR" +mkdir -p /usr/local +rm -fr /usr/local/cuda +ln -s "$CUDA_DIR" /usr/local/cuda + +if [[ "$OSTYPE" == "msys" ]]; then + export CUDA_VERSION=`ls /usr/local/cuda/bin/cudart64*.dll | head -1 | tr '._' ' ' | cut -d ' ' -f2` + export CUDNN_VERSION=`ls /usr/local/cuda/bin/cudnn64*.dll | head -1 | tr '._' ' ' | cut -d ' ' -f2` +else + export CUDA_VERSION=$(ls /usr/local/cuda/lib64/libcudart.so.*|sort|tac | head -1 | rev | cut -d"." -f -3 | rev) + export CUDNN_VERSION=$(ls /usr/local/cuda/lib64/libcudnn.so.*|sort|tac | head -1 | rev | cut -d"." -f -3 | rev) +fi + +ls -alh /usr/local/cuda + +echo "CUDA_VERSION=$CUDA_VERSION" +echo "CUDNN_VERSION=$CUDNN_VERSION" diff --git a/packaging/pkg_helpers.bash b/packaging/pkg_helpers.bash index 8181bf2..dad9622 100644 --- a/packaging/pkg_helpers.bash +++ b/packaging/pkg_helpers.bash @@ -14,8 +14,8 @@ # PYTORCH_VERSION_SUFFIX (e.g., +cpu) # WHEEL_DIR (e.g., cu100/) # CUDA_HOME (e.g., /usr/local/cuda-9.2, respected by torch.utils.cpp_extension) -# FORCE_CUDA (respected by torch_csprng setup.py) -# NVCC_FLAGS (respected by torch_csprng setup.py) +# FORCE_CUDA (respected by torchcsprng setup.py) +# NVCC_FLAGS (respected by torchcsprng setup.py) # # Precondition: CUDA versions are installed in their conventional locations in # /usr/local/cuda-* @@ -49,6 +49,39 @@ setup_cuda() { # Now work out the CUDA settings case "$CU_VERSION" in + cu112) + if [[ "$OSTYPE" == "msys" ]]; then + export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.2" + else + export CUDA_HOME=/usr/local/cuda-11.2/ + fi + export FORCE_CUDA=1 + # Hard-coding gencode flags is temporary situation until + # https://github.com/pytorch/pytorch/pull/23408 lands + export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_50,code=compute_50" + ;; + cu111) + if [[ "$OSTYPE" == "msys" ]]; then + export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.1" + else + export CUDA_HOME=/usr/local/cuda-11.1/ + fi + export FORCE_CUDA=1 + # Hard-coding gencode flags is temporary situation until + # https://github.com/pytorch/pytorch/pull/23408 lands + export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_50,code=compute_50" + ;; + cu110) + if [[ "$OSTYPE" == "msys" ]]; then + export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.0" + else + export CUDA_HOME=/usr/local/cuda-11.0/ + fi + export FORCE_CUDA=1 + # Hard-coding gencode flags is temporary situation until + # https://github.com/pytorch/pytorch/pull/23408 lands + export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_50,code=compute_50" + ;; cu102) if [[ "$OSTYPE" == "msys" ]]; then export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v10.2" @@ -121,7 +154,7 @@ setup_build_version() { # Set build version based on tag if on tag if [[ -n "${CIRCLE_TAG}" ]]; then # Strip tag - export BUILD_VERSION="$(echo "${CIRCLE_TAG}" | sed -e 's/^v//' -e 's/-.*$//')" + export BUILD_VERSION="$(echo "${CIRCLE_TAG}" | sed -e 's/^v//' -e 's/-.*$//')${VERSION_SUFFIX}" fi } @@ -170,11 +203,7 @@ setup_wheel_python() { conda env remove -n "env$PYTHON_VERSION" || true conda create -yn "env$PYTHON_VERSION" python="$PYTHON_VERSION" conda activate "env$PYTHON_VERSION" - # Install libpng from Anaconda (defaults) - conda install libpng jpeg -y else - # Install native CentOS libPNG - yum install -y libpng-devel libjpeg-turbo-devel case "$PYTHON_VERSION" in 2.7) if [[ -n "$UNICODE_ABI" ]]; then @@ -187,12 +216,19 @@ setup_wheel_python() { 3.6) python_abi=cp36-cp36m ;; 3.7) python_abi=cp37-cp37m ;; 3.8) python_abi=cp38-cp38 ;; + 3.9) python_abi=cp39-cp39 ;; *) echo "Unrecognized PYTHON_VERSION=$PYTHON_VERSION" exit 1 ;; esac - export PATH="/opt/python/$python_abi/bin:$PATH" + # Download all the dependencies required to compile image and video_reader + # extensions + + mkdir -p ext_libraries + pushd ext_libraries + popd + export PATH="/opt/python/$python_abi/bin:$(pwd)/ext_libraries/bin:$PATH" fi } @@ -217,9 +253,8 @@ setup_pip_pytorch_version() { fi else pip_install "torch==$PYTORCH_VERSION$PYTORCH_VERSION_SUFFIX" \ - -f https://download.pytorch.org/whl/torch_stable.html \ - -f https://download.pytorch.org/whl/test/torch_test.html \ - -f https://download.pytorch.org/whl/nightly/torch_nightly.html + -f "https://download.pytorch.org/whl/${CU_VERSION}/torch_stable.html" \ + -f "https://download.pytorch.org/whl/${UPLOAD_CHANNEL}/${CU_VERSION}/torch_${UPLOAD_CHANNEL}.html" fi } @@ -229,7 +264,7 @@ setup_pip_pytorch_version() { # You MUST have populated PYTORCH_VERSION_SUFFIX before hand. setup_conda_pytorch_constraint() { if [[ -z "$PYTORCH_VERSION" ]]; then - export CONDA_CHANNEL_FLAGS="-c pytorch-nightly" + export CONDA_CHANNEL_FLAGS="-c pytorch-nightly -c pytorch" export PYTORCH_VERSION="$(conda search --json 'pytorch[channel=pytorch-nightly]' | \ python -c "import os, sys, json, re; cuver = os.environ.get('CU_VERSION'); \ cuver_1 = cuver.replace('cu', 'cuda') if cuver != 'cpu' else cuver; \ @@ -244,7 +279,7 @@ setup_conda_pytorch_constraint() { exit 1 fi else - export CONDA_CHANNEL_FLAGS="-c pytorch -c pytorch-nightly -c pytorch-test" + export CONDA_CHANNEL_FLAGS="-c pytorch -c pytorch-${UPLOAD_CHANNEL}" fi if [[ "$CU_VERSION" == cpu ]]; then export CONDA_PYTORCH_BUILD_CONSTRAINT="- pytorch==$PYTORCH_VERSION${PYTORCH_VERSION_SUFFIX}" @@ -265,6 +300,15 @@ setup_conda_cudatoolkit_constraint() { export CONDA_CUDATOOLKIT_CONSTRAINT="" else case "$CU_VERSION" in + cu112) + export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=11.2,<11.3 # [not osx]" + ;; + cu111) + export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=11.1,<11.2 # [not osx]" + ;; + cu110) + export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=11.0,<11.1 # [not osx]" + ;; cu102) export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=10.2,<10.3 # [not osx]" ;; @@ -289,12 +333,45 @@ setup_conda_cudatoolkit_constraint() { fi } +setup_conda_cudatoolkit_plain_constraint() { + export CONDA_CPUONLY_FEATURE="" + export CMAKE_USE_CUDA=1 + if [[ "$(uname)" == Darwin ]]; then + export CONDA_CUDATOOLKIT_CONSTRAINT="" + export CMAKE_USE_CUDA=0 + else + case "$CU_VERSION" in + cu102) + export CONDA_CUDATOOLKIT_CONSTRAINT="cudatoolkit=10.2" + ;; + cu101) + export CONDA_CUDATOOLKIT_CONSTRAINT="cudatoolkit=10.1" + ;; + cu100) + export CONDA_CUDATOOLKIT_CONSTRAINT="cudatoolkit=10.0" + ;; + cu92) + export CONDA_CUDATOOLKIT_CONSTRAINT="cudatoolkit=9.2" + ;; + cpu) + export CONDA_CUDATOOLKIT_CONSTRAINT="" + export CONDA_CPUONLY_FEATURE="cpuonly" + export CMAKE_USE_CUDA=0 + ;; + *) + echo "Unrecognized CU_VERSION=$CU_VERSION" + exit 1 + ;; + esac + fi +} + # Build the proper compiler package before building the final package setup_visual_studio_constraint() { if [[ "$OSTYPE" == "msys" ]]; then export VSTOOLCHAIN_PACKAGE=vs$VC_YEAR conda build $CONDA_CHANNEL_FLAGS --no-anaconda-upload packaging/$VSTOOLCHAIN_PACKAGE - cp packaging/$VSTOOLCHAIN_PACKAGE/conda_build_config.yaml packaging/torch_csprng/conda_build_config.yaml + cp packaging/$VSTOOLCHAIN_PACKAGE/conda_build_config.yaml packaging/torchcsprng/conda_build_config.yaml fi } diff --git a/packaging/torchcsprng/bld.bat b/packaging/torchcsprng/bld.bat new file mode 100644 index 0000000..8c7c833 --- /dev/null +++ b/packaging/torchcsprng/bld.bat @@ -0,0 +1,27 @@ +@echo on + +set TORCHCSPRNG_BUILD_VERSION=%PKG_VERSION% +set TORCHCSPRNG_BUILD_NUMBER=%PKG_BUILDNUM% + +set build_with_cuda= + +if "%CUDA_VERSION%" == "None" goto cuda_flags_end +if "%CUDA_VERSION%" == "cpu" goto cuda_flags_end +if "%CUDA_VERSION%" == "" goto cuda_flags_end + +set build_with_cuda=1 +set desired_cuda=%CUDA_VERSION:~0,-1%.%CUDA_VERSION:~-1,1% + +set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%desired_cuda% +set CUDA_BIN_PATH=%CUDA_PATH%\bin +set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr +if "%desired_cuda%" == "9.0" set NVCC_FLAGS=%NVCC_FLAGS% -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_50,code=compute_50 +if "%desired_cuda%" == "9.2" set NVCC_FLAGS=%NVCC_FLAGS% -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_61,code=sm_61 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_50,code=compute_50 +if "%desired_cuda%" == "10.0" set NVCC_FLAGS=%NVCC_FLAGS% -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50 +if "%desired_cuda%" == "10.1" set NVCC_FLAGS=%NVCC_FLAGS% -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50 +if "%desired_cuda%" == "10.2" set NVCC_FLAGS=%NVCC_FLAGS% -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50 + +:cuda_flags_end + +python setup.py install --single-version-externally-managed --record=record.txt +if errorlevel 1 exit /b 1 diff --git a/packaging/torchcsprng/conda_build_config.yaml b/packaging/torchcsprng/conda_build_config.yaml new file mode 100644 index 0000000..257515c --- /dev/null +++ b/packaging/torchcsprng/conda_build_config.yaml @@ -0,0 +1,26 @@ +channel_sources: + - pytorch-nightly,pytorch,defaults +blas_impl: + - mkl # [x86_64] +c_compiler: + - vs2017 # [win] +cxx_compiler: + - vs2017 # [win] +python: + - 3.5 + - 3.6 +# This differs from target_platform in that it determines what subdir the compiler +# will target, not what subdir the compiler package will be itself. +# For example, we need a win-64 vs2008_win-32 package, so that we compile win-32 +# code on win-64 miniconda. +cross_compiler_target_platform: + - win-64 # [win] +target_platform: + - win-64 # [win] +vc: + - 14 +zip_keys: + - # [win] + - vc # [win] + - c_compiler # [win] + - cxx_compiler # [win] diff --git a/packaging/torch_csprng/meta.yaml b/packaging/torchcsprng/meta.yaml similarity index 95% rename from packaging/torch_csprng/meta.yaml rename to packaging/torchcsprng/meta.yaml index 41be147..1b4570d 100644 --- a/packaging/torch_csprng/meta.yaml +++ b/packaging/torchcsprng/meta.yaml @@ -1,5 +1,5 @@ package: - name: torch_csprng + name: torchcsprng version: "{{ environ.get('BUILD_VERSION') }}" source: @@ -39,12 +39,13 @@ build: #test: # imports: # - torch -# - torch_csprng +# - torchcsprng # source_files: # - test # requires: # - pytest # - scipy +# - pycrypto # commands: # pytest . --verbose diff --git a/packaging/vs2017/activate.bat b/packaging/vs2017/activate.bat new file mode 100644 index 0000000..ccecfc2 --- /dev/null +++ b/packaging/vs2017/activate.bat @@ -0,0 +1,44 @@ +:: Set env vars that tell distutils to use the compiler that we put on path +SET DISTUTILS_USE_SDK=1 +SET MSSdk=1 + +SET "VS_VERSION=15.0" +SET "VS_MAJOR=15" +SET "VS_YEAR=2017" + +set "MSYS2_ARG_CONV_EXCL=/AI;/AL;/OUT;/out" +set "MSYS2_ENV_CONV_EXCL=CL" + +:: For Python 3.5+, ensure that we link with the dynamic runtime. See +:: http://stevedower.id.au/blog/building-for-python-3-5-part-two/ for more info +set "PY_VCRUNTIME_REDIST=%PREFIX%\\bin\\vcruntime140.dll" + +for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [15^,16^) -property installationPath`) do ( + if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" ( + set "VSINSTALLDIR=%%i\" + goto :vswhere + ) +) + +:vswhere + +:: Shorten PATH to avoid the `input line too long` error. +SET MyPath=%PATH% + +setlocal EnableDelayedExpansion + +SET TempPath="%MyPath:;=";"%" +SET var= +FOR %%a IN (%TempPath%) DO ( + IF EXIST %%~sa ( + SET "var=!var!;%%~sa" + ) +) + +set "TempPath=!var:~1!" +endlocal & set "PATH=%TempPath%" + +:: Shorten current directory too +FOR %%A IN (.) DO CD "%%~sA" + +:: other things added by install_activate.bat at package build time diff --git a/packaging/torch_csprng/conda_build_config.yaml b/packaging/vs2017/conda_build_config.yaml similarity index 93% rename from packaging/torch_csprng/conda_build_config.yaml rename to packaging/vs2017/conda_build_config.yaml index dd426d8..5188bb0 100644 --- a/packaging/torch_csprng/conda_build_config.yaml +++ b/packaging/vs2017/conda_build_config.yaml @@ -1,3 +1,5 @@ +blas_impl: + - mkl # [x86_64] c_compiler: - vs2017 # [win] cxx_compiler: diff --git a/packaging/vs2017/install_activate.bat b/packaging/vs2017/install_activate.bat new file mode 100644 index 0000000..de0e6ff --- /dev/null +++ b/packaging/vs2017/install_activate.bat @@ -0,0 +1,30 @@ +set YEAR=2017 +set VER=15 + +mkdir "%PREFIX%\etc\conda\activate.d" +COPY "%RECIPE_DIR%\activate.bat" "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" + +IF "%cross_compiler_target_platform%" == "win-64" ( + set "target_platform=amd64" + echo SET "CMAKE_GENERATOR=Visual Studio %VER% %YEAR% Win64" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" + echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" + IF "%VSDEVCMD_ARGS%" == "" ( + echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x64 >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" + echo popd >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" + echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" + echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x86_amd64 >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" + ) ELSE ( + echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x64 %VSDEVCMD_ARGS% >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" + echo popd >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" + echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" + echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x86_amd64 %VSDEVCMD_ARGS% >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" + ) + echo popd >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" + ) else ( + set "target_platform=x86" + echo SET "CMAKE_GENERATOR=Visual Studio %VER% %YEAR%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" + echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" + echo CALL "VC\Auxiliary\Build\vcvars32.bat" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" + echo popd + ) + diff --git a/packaging/vs2017/install_runtime.bat b/packaging/vs2017/install_runtime.bat new file mode 100644 index 0000000..5163c16 --- /dev/null +++ b/packaging/vs2017/install_runtime.bat @@ -0,0 +1,49 @@ +set VC_PATH=x86 +if "%ARCH%"=="64" ( + set VC_PATH=x64 +) + +set MSC_VER=2017 + +rem :: This should always be present for VC installed with VS. Not sure about VC installed with Visual C++ Build Tools 2015 +rem FOR /F "usebackq tokens=3*" %%A IN (`REG QUERY "HKEY_LOCAL_MACHINE\Software\Microsoft\DevDiv\VC\Servicing\14.0\IDE.x64" /v UpdateVersion`) DO ( +rem set SP=%%A +rem ) + +rem if not "%SP%" == "%PKG_VERSION%" ( +rem echo "Version detected from registry: %SP%" +rem echo "does not match version of package being built (%PKG_VERSION%)" +rem echo "Do you have current updates for VS 2015 installed?" +rem exit 1 +rem ) + + +REM ========== REQUIRES Win 10 SDK be installed, or files otherwise copied to location below! +robocopy "C:\Program Files (x86)\Windows Kits\10\Redist\ucrt\DLLs\%VC_PATH%" "%LIBRARY_BIN%" *.dll /E +robocopy "C:\Program Files (x86)\Windows Kits\10\Redist\ucrt\DLLs\%VC_PATH%" "%PREFIX%" *.dll /E +if %ERRORLEVEL% GEQ 8 exit 1 + +REM ========== This one comes from visual studio 2017 +set "VC_VER=141" + +for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [15^,16^) -property installationPath`) do ( + if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" ( + set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat" + goto :eof + ) +) + +@setlocal +call "%VS15VARSALL%" x64 + +set "REDIST_ROOT=%VCToolsRedistDir%%VC_PATH%" + +robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.CRT" "%LIBRARY_BIN%" *.dll /E +if %ERRORLEVEL% LSS 8 exit 0 +robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.CRT" "%PREFIX%" *.dll /E +if %ERRORLEVEL% LSS 8 exit 0 +robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.OpenMP" "%LIBRARY_BIN%" *.dll /E +if %ERRORLEVEL% LSS 8 exit 0 +robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.OpenMP" "%PREFIX%" *.dll /E +if %ERRORLEVEL% LSS 8 exit 0 +@endlocal diff --git a/packaging/vs2017/meta.yaml b/packaging/vs2017/meta.yaml new file mode 100644 index 0000000..1f56952 --- /dev/null +++ b/packaging/vs2017/meta.yaml @@ -0,0 +1,24 @@ +{% set vcver="14.1" %} +{% set vcfeature="14" %} +{% set vsyear="2017" %} +{% set fullver="15.4.27004.2010" %} + +package: + name: vs{{ vsyear }} + version: {{ fullver }} + +build: + skip: True [not win] + script_env: + - VSDEVCMD_ARGS # [win] + +outputs: + - name: vs{{ vsyear }}_{{ cross_compiler_target_platform }} + script: install_activate.bat + track_features: + # VS 2017 is binary-compatible with VS 2015/vc14. Tools are "v141". + strong: + - vc{{ vcfeature }} + about: + summary: Activation and version verification of MSVC {{ vcver }} (VS {{ vsyear }}) compiler + license: BSD 3-clause diff --git a/packaging/vs2019/activate.bat b/packaging/vs2019/activate.bat new file mode 100644 index 0000000..6f607ba --- /dev/null +++ b/packaging/vs2019/activate.bat @@ -0,0 +1,44 @@ +:: Set env vars that tell distutils to use the compiler that we put on path +SET DISTUTILS_USE_SDK=1 +SET MSSdk=1 + +SET "VS_VERSION=16.0" +SET "VS_MAJOR=16" +SET "VS_YEAR=2019" + +set "MSYS2_ARG_CONV_EXCL=/AI;/AL;/OUT;/out" +set "MSYS2_ENV_CONV_EXCL=CL" + +:: For Python 3.5+, ensure that we link with the dynamic runtime. See +:: http://stevedower.id.au/blog/building-for-python-3-5-part-two/ for more info +set "PY_VCRUNTIME_REDIST=%PREFIX%\\bin\\vcruntime140.dll" + +for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [16^,17^) -property installationPath`) do ( + if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" ( + set "VSINSTALLDIR=%%i\" + goto :vswhere + ) +) + +:vswhere + +:: Shorten PATH to avoid the `input line too long` error. +SET MyPath=%PATH% + +setlocal EnableDelayedExpansion + +SET TempPath="%MyPath:;=";"%" +SET var= +FOR %%a IN (%TempPath%) DO ( + IF EXIST %%~sa ( + SET "var=!var!;%%~sa" + ) +) + +set "TempPath=!var:~1!" +endlocal & set "PATH=%TempPath%" + +:: Shorten current directory too +FOR %%A IN (.) DO CD "%%~sA" + +:: other things added by install_activate.bat at package build time diff --git a/packaging/vs2019/conda_build_config.yaml b/packaging/vs2019/conda_build_config.yaml new file mode 100644 index 0000000..358052e --- /dev/null +++ b/packaging/vs2019/conda_build_config.yaml @@ -0,0 +1,24 @@ +blas_impl: + - mkl # [x86_64] +c_compiler: + - vs2019 # [win] +cxx_compiler: + - vs2019 # [win] +python: + - 3.5 + - 3.6 +# This differs from target_platform in that it determines what subdir the compiler +# will target, not what subdir the compiler package will be itself. +# For example, we need a win-64 vs2008_win-32 package, so that we compile win-32 +# code on win-64 miniconda. +cross_compiler_target_platform: + - win-64 # [win] +target_platform: + - win-64 # [win] +vc: + - 14 +zip_keys: + - # [win] + - vc # [win] + - c_compiler # [win] + - cxx_compiler # [win] diff --git a/packaging/vs2019/install_activate.bat b/packaging/vs2019/install_activate.bat new file mode 100644 index 0000000..3c38253 --- /dev/null +++ b/packaging/vs2019/install_activate.bat @@ -0,0 +1,30 @@ +set YEAR=2019 +set VER=16 + +mkdir "%PREFIX%\etc\conda\activate.d" +COPY "%RECIPE_DIR%\activate.bat" "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" + +IF "%cross_compiler_target_platform%" == "win-64" ( + set "target_platform=amd64" + echo SET "CMAKE_GENERATOR=Visual Studio %VER% %YEAR% Win64" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" + echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" + IF "%VSDEVCMD_ARGS%" == "" ( + echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x64 >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" + echo popd >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" + echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" + echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x86_amd64 >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" + ) ELSE ( + echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x64 %VSDEVCMD_ARGS% >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" + echo popd >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" + echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" + echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x86_amd64 %VSDEVCMD_ARGS% >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" + ) + echo popd >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" + ) else ( + set "target_platform=x86" + echo SET "CMAKE_GENERATOR=Visual Studio %VER% %YEAR%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" + echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" + echo CALL "VC\Auxiliary\Build\vcvars32.bat" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" + echo popd + ) + diff --git a/packaging/vs2019/install_runtime.bat b/packaging/vs2019/install_runtime.bat new file mode 100644 index 0000000..e09a5cc --- /dev/null +++ b/packaging/vs2019/install_runtime.bat @@ -0,0 +1,49 @@ +set VC_PATH=x86 +if "%ARCH%"=="64" ( + set VC_PATH=x64 +) + +set MSC_VER=2019 + +rem :: This should always be present for VC installed with VS. Not sure about VC installed with Visual C++ Build Tools 2015 +rem FOR /F "usebackq tokens=3*" %%A IN (`REG QUERY "HKEY_LOCAL_MACHINE\Software\Microsoft\DevDiv\VC\Servicing\14.0\IDE.x64" /v UpdateVersion`) DO ( +rem set SP=%%A +rem ) + +rem if not "%SP%" == "%PKG_VERSION%" ( +rem echo "Version detected from registry: %SP%" +rem echo "does not match version of package being built (%PKG_VERSION%)" +rem echo "Do you have current updates for VS 2015 installed?" +rem exit 1 +rem ) + + +REM ========== REQUIRES Win 10 SDK be installed, or files otherwise copied to location below! +robocopy "C:\Program Files (x86)\Windows Kits\10\Redist\ucrt\DLLs\%VC_PATH%" "%LIBRARY_BIN%" *.dll /E +robocopy "C:\Program Files (x86)\Windows Kits\10\Redist\ucrt\DLLs\%VC_PATH%" "%PREFIX%" *.dll /E +if %ERRORLEVEL% GEQ 8 exit 1 + +REM ========== This one comes from visual studio 2019 +set "VC_VER=142" + +for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [16^,17^) -property installationPath`) do ( + if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" ( + set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat" + goto :eof + ) +) + +@setlocal +call "%VS15VARSALL%" x64 + +set "REDIST_ROOT=%VCToolsRedistDir%%VC_PATH%" + +robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.CRT" "%LIBRARY_BIN%" *.dll /E +if %ERRORLEVEL% LSS 8 exit 0 +robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.CRT" "%PREFIX%" *.dll /E +if %ERRORLEVEL% LSS 8 exit 0 +robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.OpenMP" "%LIBRARY_BIN%" *.dll /E +if %ERRORLEVEL% LSS 8 exit 0 +robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.OpenMP" "%PREFIX%" *.dll /E +if %ERRORLEVEL% LSS 8 exit 0 +@endlocal diff --git a/packaging/vs2019/meta.yaml b/packaging/vs2019/meta.yaml new file mode 100644 index 0000000..94a0ed4 --- /dev/null +++ b/packaging/vs2019/meta.yaml @@ -0,0 +1,24 @@ +{% set vcver="14.2" %} +{% set vcfeature="14" %} +{% set vsyear="2019" %} +{% set fullver="15.4.27004.2010" %} + +package: + name: vs{{ vsyear }} + version: {{ fullver }} + +build: + skip: True [not win] + script_env: + - VSDEVCMD_ARGS # [win] + +outputs: + - name: vs{{ vsyear }}_{{ cross_compiler_target_platform }} + script: install_activate.bat + track_features: + # VS 2019 is binary-compatible with VS 2017/vc 14.1 and 2015/vc14. Tools are "v142". + strong: + - vc{{ vcfeature }} + about: + summary: Activation and version verification of MSVC {{ vcver }} (VS {{ vsyear }}) compiler + license: BSD 3-clause diff --git a/packaging/wheel/linux_manywheel.sh b/packaging/wheel/linux_manywheel.sh new file mode 100644 index 0000000..d6471aa --- /dev/null +++ b/packaging/wheel/linux_manywheel.sh @@ -0,0 +1,62 @@ +#!/bin/bash +set -ex + +if [ "$#" -ne 1 ]; then + echo "Illegal number of parameters. Pass cuda version" + echo "CUDA version should be cu92, cu100 or cpu" + exit 1 +fi +export CUVER="$1" # cu[0-9]* cpu + +if [[ "$CUVER" == "cu102" ]]; then + cu_suffix="" +else + cu_suffix="+$CUVER" +fi + +export TORCHCSPRNG_BUILD_VERSION="0.4.0.dev$(date "+%Y%m%d")${cu_suffix}" +export TORCHCSPRNG_BUILD_NUMBER="1" +export TORCHCSPRNG_LOCAL_VERSION_LABEL="$CUVER" +export OUT_DIR="/remote/$CUVER" + +pushd /opt/python +DESIRED_PYTHON=(*/) +popd +for desired_py in "${DESIRED_PYTHON[@]}"; do + python_installations+=("/opt/python/$desired_py") +done + +OLD_PATH=$PATH +cd /tmp +rm -rf csprng +git clone https://github.com/pytorch/csprng + +cd /tmp/csprng + +for PYDIR in "${python_installations[@]}"; do + export PATH=$PYDIR/bin:$OLD_PATH + pip install --upgrade pip + pip install numpy pyyaml future + + pip uninstall -y torch || true + pip uninstall -y torch_nightly || true + + export TORCHCSPRNG_PYTORCH_DEPENDENCY_NAME=torch_nightly + pip install torch_nightly -f https://download.pytorch.org/whl/nightly/$CUVER/torch_nightly.html + # CPU/CUDA variants of PyTorch have ABI compatible PyTorch for + # the CPU only bits. Therefore, we + # strip off the local package qualifier, but ONLY if we're + # doing a CPU build. + if [[ "$CUVER" == "cpu" ]]; then + export TORCHCSPRNG_PYTORCH_DEPENDENCY_VERSION="$(pip show torch_nightly | grep ^Version: | sed 's/Version: \+//' | sed 's/+.\+//')" + else + export TORCHCSPRNG_PYTORCH_DEPENDENCY_VERSION="$(pip show torch_nightly | grep ^Version: | sed 's/Version: \+//')" + fi + echo "Building against ${TORCHCSPRNG_PYTORCH_DEPENDENCY_VERSION}" + + pip install ninja + python setup.py clean + python setup.py bdist_wheel + mkdir -p $OUT_DIR + cp dist/*.whl $OUT_DIR/ +done diff --git a/packaging/wheel/osx_wheel.sh b/packaging/wheel/osx_wheel.sh new file mode 100644 index 0000000..566f956 --- /dev/null +++ b/packaging/wheel/osx_wheel.sh @@ -0,0 +1,52 @@ +if [[ ":$PATH:" == *"conda"* ]]; then + echo "existing anaconda install in PATH, remove it and run script" + exit 1 +fi +# download and activate anaconda +rm -rf ~/minconda_wheel_env_tmp +wget -q https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh && \ + chmod +x Miniconda3-latest-MacOSX-x86_64.sh && \ + ./Miniconda3-latest-MacOSX-x86_64.sh -b -p ~/minconda_wheel_env_tmp && \ + rm Miniconda3-latest-MacOSX-x86_64.sh + +. ~/minconda_wheel_env_tmp/bin/activate + + +export TORCHCSPRNG_BUILD_VERSION="0.4.0.dev$(date "+%Y%m%d")" +export TORCHCSPRNG_BUILD_NUMBER="1" +export OUT_DIR=~/torchcsprng_wheels + +export MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ + +pushd /tmp +rm -rf csprng +git clone https://github.com/pytorch/csprng +pushd csprng + +desired_pythons=( "2.7" "3.5" "3.6" "3.7" ) +# for each python +for desired_python in "${desired_pythons[@]}" +do + # create and activate python env + env_name="env$desired_python" + conda create -yn $env_name python="$desired_python" + conda activate $env_name + + pip uninstall -y torch || true + pip uninstall -y torch_nightly || true + + export TORCHCSPRNG_PYTORCH_DEPENDENCY_NAME=torch_nightly + pip install torch_nightly -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html + export TORCHCSPRNG_PYTORCH_DEPENDENCY_VERSION="$(pip show torch_nightly | grep ^Version: | sed 's/Version: *//')" + echo "Building against ${TORCHAUDIO_PYTORCH_DEPENDENCY_VERSION}" + + # install torchcsprng dependencies + pip install ninja scipy pytest pycrypto + + python setup.py clean + python setup.py bdist_wheel + mkdir -p $OUT_DIR + cp dist/*.whl $OUT_DIR/ +done +popd +popd diff --git a/packaging/wheel/relocate.py b/packaging/wheel/relocate.py new file mode 100644 index 0000000..fd92b68 --- /dev/null +++ b/packaging/wheel/relocate.py @@ -0,0 +1,408 @@ +# -*- coding: utf-8 -*- + +"""Helper script to package wheels and relocate binaries.""" + +import glob +import hashlib +import io + +# Standard library imports +import os +import os.path as osp +import platform +import shutil +import subprocess +import sys +import zipfile +from base64 import urlsafe_b64encode + +# Third party imports +if sys.platform == "linux": + from auditwheel.lddtree import lddtree +from wheel.bdist_wheel import get_abi_tag + + +ALLOWLIST = { + "libgcc_s.so.1", + "libstdc++.so.6", + "libm.so.6", + "libdl.so.2", + "librt.so.1", + "libc.so.6", + "libnsl.so.1", + "libutil.so.1", + "libpthread.so.0", + "libresolv.so.2", + "libX11.so.6", + "libXext.so.6", + "libXrender.so.1", + "libICE.so.6", + "libSM.so.6", + "libGL.so.1", + "libgobject-2.0.so.0", + "libgthread-2.0.so.0", + "libglib-2.0.so.0", + "ld-linux-x86-64.so.2", + "ld-2.17.so", +} + +WINDOWS_ALLOWLIST = { + "MSVCP140.dll", + "KERNEL32.dll", + "VCRUNTIME140_1.dll", + "VCRUNTIME140.dll", + "api-ms-win-crt-heap-l1-1-0.dll", + "api-ms-win-crt-runtime-l1-1-0.dll", + "api-ms-win-crt-stdio-l1-1-0.dll", + "api-ms-win-crt-filesystem-l1-1-0.dll", + "api-ms-win-crt-string-l1-1-0.dll", + "api-ms-win-crt-environment-l1-1-0.dll", + "api-ms-win-crt-math-l1-1-0.dll", + "api-ms-win-crt-convert-l1-1-0.dll", +} + + +HERE = osp.dirname(osp.abspath(__file__)) +PACKAGE_ROOT = osp.dirname(osp.dirname(HERE)) +PLATFORM_ARCH = platform.machine() +PYTHON_VERSION = sys.version_info + + +def read_chunks(file, size=io.DEFAULT_BUFFER_SIZE): + """Yield pieces of data from a file-like object until EOF.""" + while True: + chunk = file.read(size) + if not chunk: + break + yield chunk + + +def rehash(path, blocksize=1 << 20): + """Return (hash, length) for path using hashlib.sha256()""" + h = hashlib.sha256() + length = 0 + with open(path, "rb") as f: + for block in read_chunks(f, size=blocksize): + length += len(block) + h.update(block) + digest = "sha256=" + urlsafe_b64encode(h.digest()).decode("latin1").rstrip("=") + # unicode/str python2 issues + return (digest, str(length)) # type: ignore + + +def unzip_file(file, dest): + """Decompress zip `file` into directory `dest`.""" + with zipfile.ZipFile(file, "r") as zip_ref: + zip_ref.extractall(dest) + + +def is_program_installed(basename): + """ + Return program absolute path if installed in PATH. + Otherwise, return None + On macOS systems, a .app is considered installed if + it exists. + """ + if sys.platform == "darwin" and basename.endswith(".app") and osp.exists(basename): + return basename + + for path in os.environ["PATH"].split(os.pathsep): + abspath = osp.join(path, basename) + if osp.isfile(abspath): + return abspath + + +def find_program(basename): + """ + Find program in PATH and return absolute path + Try adding .exe or .bat to basename on Windows platforms + (return None if not found) + """ + names = [basename] + if os.name == "nt": + # Windows platforms + extensions = (".exe", ".bat", ".cmd", ".dll") + if not basename.endswith(extensions): + names = [basename + ext for ext in extensions] + [basename] + for name in names: + path = is_program_installed(name) + if path: + return path + + +def patch_new_path(library_path, new_dir): + library = osp.basename(library_path) + name, *rest = library.split(".") + rest = ".".join(rest) + hash_id = hashlib.sha256(library_path.encode("utf-8")).hexdigest()[:8] + new_name = ".".join([name, hash_id, rest]) + return osp.join(new_dir, new_name) + + +def find_dll_dependencies(dumpbin, binary): + out = subprocess.run([dumpbin, "/dependents", binary], stdout=subprocess.PIPE) + out = out.stdout.strip().decode("utf-8") + start_index = out.find("dependencies:") + len("dependencies:") + end_index = out.find("Summary") + dlls = out[start_index:end_index].strip() + dlls = dlls.split(os.linesep) + dlls = [dll.strip() for dll in dlls] + return dlls + + +def relocate_elf_library(patchelf, output_dir, output_library, binary): + """ + Relocate an ELF shared library to be packaged on a wheel. + + Given a shared library, find the transitive closure of its dependencies, + rename and copy them into the wheel while updating their respective rpaths. + """ + + print("Relocating {0}".format(binary)) + binary_path = osp.join(output_library, binary) + + ld_tree = lddtree(binary_path) + tree_libs = ld_tree["libs"] + + binary_queue = [(n, binary) for n in ld_tree["needed"]] + binary_paths = {binary: binary_path} + binary_dependencies = {} + + while binary_queue != []: + library, parent = binary_queue.pop(0) + library_info = tree_libs[library] + print(library) + + if library_info["path"] is None: + print("Omitting {0}".format(library)) + continue + + if library in ALLOWLIST: + # Omit glibc/gcc/system libraries + print("Omitting {0}".format(library)) + continue + + parent_dependencies = binary_dependencies.get(parent, []) + parent_dependencies.append(library) + binary_dependencies[parent] = parent_dependencies + + if library in binary_paths: + continue + + binary_paths[library] = library_info["path"] + binary_queue += [(n, library) for n in library_info["needed"]] + + print("Copying dependencies to wheel directory") + new_libraries_path = osp.join(output_dir, "torchcsprng.libs") + os.makedirs(new_libraries_path) + + new_names = {binary: binary_path} + + for library in binary_paths: + if library != binary: + library_path = binary_paths[library] + new_library_path = patch_new_path(library_path, new_libraries_path) + print("{0} -> {1}".format(library, new_library_path)) + shutil.copyfile(library_path, new_library_path) + new_names[library] = new_library_path + + print("Updating dependency names by new files") + for library in binary_paths: + if library != binary: + if library not in binary_dependencies: + continue + library_dependencies = binary_dependencies[library] + new_library_name = new_names[library] + for dep in library_dependencies: + new_dep = osp.basename(new_names[dep]) + print("{0}: {1} -> {2}".format(library, dep, new_dep)) + subprocess.check_output( + [patchelf, "--replace-needed", dep, new_dep, new_library_name], + cwd=new_libraries_path, + ) + + print("Updating library rpath") + subprocess.check_output( + [patchelf, "--set-rpath", "$ORIGIN", new_library_name], + cwd=new_libraries_path, + ) + + subprocess.check_output( + [patchelf, "--print-rpath", new_library_name], cwd=new_libraries_path + ) + + print("Update library dependencies") + library_dependencies = binary_dependencies[binary] + for dep in library_dependencies: + new_dep = osp.basename(new_names[dep]) + print("{0}: {1} -> {2}".format(binary, dep, new_dep)) + subprocess.check_output( + [patchelf, "--replace-needed", dep, new_dep, binary], cwd=output_library + ) + + print("Update library rpath") + subprocess.check_output( + [patchelf, "--set-rpath", "$ORIGIN:$ORIGIN/../torchcsprng.libs", binary_path], + cwd=output_library, + ) + + +def relocate_dll_library(dumpbin, output_dir, output_library, binary): + """ + Relocate a DLL/PE shared library to be packaged on a wheel. + + Given a shared library, find the transitive closure of its dependencies, + rename and copy them into the wheel. + """ + print("Relocating {0}".format(binary)) + binary_path = osp.join(output_library, binary) + + library_dlls = find_dll_dependencies(dumpbin, binary_path) + binary_queue = [(dll, binary) for dll in library_dlls] + binary_paths = {binary: binary_path} + binary_dependencies = {} + + while binary_queue != []: + library, parent = binary_queue.pop(0) + if library in WINDOWS_ALLOWLIST or library.startswith("api-ms-win"): + print("Omitting {0}".format(library)) + continue + + library_path = find_program(library) + if library_path is None: + print("{0} not found".format(library)) + continue + + if osp.basename(osp.dirname(library_path)) == "system32": + continue + + print("{0}: {1}".format(library, library_path)) + parent_dependencies = binary_dependencies.get(parent, []) + parent_dependencies.append(library) + binary_dependencies[parent] = parent_dependencies + + if library in binary_paths: + continue + + binary_paths[library] = library_path + downstream_dlls = find_dll_dependencies(dumpbin, library_path) + binary_queue += [(n, library) for n in downstream_dlls] + + print("Copying dependencies to wheel directory") + package_dir = osp.join(output_dir, "torchcsprng") + for library in binary_paths: + if library != binary: + library_path = binary_paths[library] + new_library_path = osp.join(package_dir, library) + print("{0} -> {1}".format(library, new_library_path)) + shutil.copyfile(library_path, new_library_path) + + +def compress_wheel(output_dir, wheel, wheel_dir, wheel_name): + """Create RECORD file and compress wheel distribution.""" + print("Update RECORD file in wheel") + dist_info = glob.glob(osp.join(output_dir, "*.dist-info"))[0] + record_file = osp.join(dist_info, "RECORD") + + with open(record_file, "w") as f: + for root, _, files in os.walk(output_dir): + for this_file in files: + full_file = osp.join(root, this_file) + rel_file = osp.relpath(full_file, output_dir) + if full_file == record_file: + f.write("{0},,\n".format(rel_file)) + else: + digest, size = rehash(full_file) + f.write("{0},{1},{2}\n".format(rel_file, digest, size)) + + print("Compressing wheel") + base_wheel_name = osp.join(wheel_dir, wheel_name) + shutil.make_archive(base_wheel_name, "zip", output_dir) + os.remove(wheel) + shutil.move("{0}.zip".format(base_wheel_name), wheel) + shutil.rmtree(output_dir) + + +def patch_linux(): + # Get patchelf location + patchelf = find_program("patchelf") + if patchelf is None: + raise FileNotFoundError( + "Patchelf was not found in the system, please" + " make sure that is available on the PATH." + ) + + # Find wheel + print("Finding wheels...") + wheels = glob.glob(osp.join(PACKAGE_ROOT, "dist", "*.whl")) + output_dir = osp.join(PACKAGE_ROOT, "dist", ".wheel-process") + + image_binary = "image.so" + video_binary = "video_reader.so" + torchcsprng_binaries = [image_binary, video_binary] + for wheel in wheels: + if osp.exists(output_dir): + shutil.rmtree(output_dir) + + os.makedirs(output_dir) + + print("Unzipping wheel...") + wheel_file = osp.basename(wheel) + wheel_dir = osp.dirname(wheel) + print("{0}".format(wheel_file)) + wheel_name, _ = osp.splitext(wheel_file) + unzip_file(wheel, output_dir) + + print("Finding ELF dependencies...") + output_library = osp.join(output_dir, "torchcsprng") + for binary in torchcsprng_binaries: + if osp.exists(osp.join(output_library, binary)): + relocate_elf_library(patchelf, output_dir, output_library, binary) + + compress_wheel(output_dir, wheel, wheel_dir, wheel_name) + + +def patch_win(): + # Get dumpbin location + dumpbin = find_program("dumpbin") + if dumpbin is None: + raise FileNotFoundError( + "Dumpbin was not found in the system, please" + " make sure that is available on the PATH." + ) + + # Find wheel + print("Finding wheels...") + wheels = glob.glob(osp.join(PACKAGE_ROOT, "dist", "*.whl")) + output_dir = osp.join(PACKAGE_ROOT, "dist", ".wheel-process") + + image_binary = "image.pyd" + video_binary = "video_reader.pyd" + torchcsprng_binaries = [image_binary, video_binary] + for wheel in wheels: + if osp.exists(output_dir): + shutil.rmtree(output_dir) + + os.makedirs(output_dir) + + print("Unzipping wheel...") + wheel_file = osp.basename(wheel) + wheel_dir = osp.dirname(wheel) + print("{0}".format(wheel_file)) + wheel_name, _ = osp.splitext(wheel_file) + unzip_file(wheel, output_dir) + + print("Finding DLL/PE dependencies...") + output_library = osp.join(output_dir, "torchcsprng") + for binary in torchcsprng_binaries: + if osp.exists(osp.join(output_library, binary)): + relocate_dll_library(dumpbin, output_dir, output_library, binary) + + compress_wheel(output_dir, wheel, wheel_dir, wheel_name) + + +if __name__ == "__main__": + if sys.platform == "linux": + patch_linux() + elif sys.platform == "win32": + patch_win() diff --git a/packaging/windows/azure-pipelines-ci.yml b/packaging/windows/azure-pipelines-ci.yml new file mode 100644 index 0000000..6f9f346 --- /dev/null +++ b/packaging/windows/azure-pipelines-ci.yml @@ -0,0 +1,11 @@ + +# Turn off auto builds for commits +trigger: none +pr: none + +jobs: +- template: templates/build_task.yml + parameters: + package: 'Wheels' + spec: 'CPU' + msagent: true diff --git a/packaging/windows/azure-pipelines.yml b/packaging/windows/azure-pipelines.yml new file mode 100644 index 0000000..d024057 --- /dev/null +++ b/packaging/windows/azure-pipelines.yml @@ -0,0 +1,35 @@ + +# Turn off auto builds for commits +trigger: none +pr: none + +jobs: +- template: templates/auth_task.yml + +- template: templates/build_task.yml + parameters: + package: 'Wheels' + spec: 'CPU' + msagent: true + +- template: templates/build_task.yml + parameters: + package: 'Conda' + spec: 'CPU' + msagent: true + +- template: templates/build_task.yml + parameters: + package: 'Wheels' + spec: 'CUDA' + msagent: true + +- template: templates/build_task.yml + parameters: + package: 'Conda' + spec: 'CUDA' + msagent: true + +- template: templates/linux_build_task.yml + parameters: + msagent: $(ms.hosted.agent.cpu) diff --git a/packaging/windows/build_csprng.bat b/packaging/windows/build_csprng.bat new file mode 100644 index 0000000..e6da23d --- /dev/null +++ b/packaging/windows/build_csprng.bat @@ -0,0 +1,145 @@ +@echo off + +:: This script parses args, installs required libraries (miniconda, MKL, +:: Magma), and then delegates to cpu.bat, cuda80.bat, etc. + +IF NOT "%CUDA_VERSION%" == "" IF NOT "%TORCHCSPRNG_BUILD_VERSION%" == "" if NOT "%TORCHCSPRNG_BUILD_NUMBER%" == "" goto env_end +if "%~1"=="" goto arg_error +if "%~2"=="" goto arg_error +if "%~3"=="" goto arg_error +if NOT "%~4"=="" goto arg_error +goto arg_end + +:arg_error + +echo Illegal number of parameters. Pass cuda version, pytorch version, build number +echo CUDA version should be Mm with no dot, e.g. '80' +echo DESIRED_PYTHON should be M.m, e.g. '2.7' +exit /b 1 + +:arg_end + +set CUDA_VERSION=%~1 +set TORCHCSPRNG_BUILD_VERSION=%~2 +set TORCHCSPRNG_BUILD_NUMBER=%~3 + +set BUILD_VERSION=%TORCHCSPRNG_BUILD_VERSION% + +:env_end + +if NOT "%CUDA_VERSION%" == "cpu" ( + set CUDA_PREFIX=cuda%CUDA_VERSION% + set CUVER=cu%CUDA_VERSION% + set FORCE_CUDA=1 +) else ( + set CUDA_PREFIX=cpu + set CUVER=cpu +) + +set BUILD_CSPRNG=1 +REM set TORCH_WHEEL=torch -f https://download.pytorch.org/whl/%CUVER%/stable.html --no-index + +IF "%DESIRED_PYTHON%" == "" set DESIRED_PYTHON=3.5;3.6;3.7 +set DESIRED_PYTHON_PREFIX=%DESIRED_PYTHON:.=% +set DESIRED_PYTHON_PREFIX=py%DESIRED_PYTHON_PREFIX:;=;py% + +set SRC_DIR=%~dp0 +pushd %SRC_DIR% + +:: Install Miniconda3 +set "CONDA_HOME=%CD%\conda" +set "tmp_conda=%CONDA_HOME%" +set "miniconda_exe=%CD%\miniconda.exe" +rmdir /s /q conda +del miniconda.exe +curl -k https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe -o "%miniconda_exe%" +call ..\conda\install_conda.bat +IF ERRORLEVEL 1 exit /b 1 +set "ORIG_PATH=%PATH%" +set "PATH=%CONDA_HOME%;%CONDA_HOME%\scripts;%CONDA_HOME%\Library\bin;%PATH%" + +:: Create a new conda environment +setlocal EnableDelayedExpansion +FOR %%v IN (%DESIRED_PYTHON%) DO ( + set PYTHON_VERSION_STR=%%v + set PYTHON_VERSION_STR=!PYTHON_VERSION_STR:.=! + conda remove -n py!PYTHON_VERSION_STR! --all -y || rmdir %CONDA_HOME%\envs\py!PYTHON_VERSION_STR! /s + conda create -n py!PYTHON_VERSION_STR! -y -q -c defaults -c conda-forge numpy>=1.11 mkl>=2018 python=%%v ca-certificates scipy pycrypto +) + +:: Uncomment for stable releases +:: FOR %%v IN (%DESIRED_PYTHON%) DO ( +:: set PYTHON_VERSION_STR=%%v +:: set PYTHON_VERSION_STR=!PYTHON_VERSION_STR:.=! +:: set "PATH=%CONDA_HOME%\envs\py!PYTHON_VERSION_STR!;%CONDA_HOME%\envs\py!PYTHON_VERSION_STR!\scripts;%CONDA_HOME%\envs\py!PYTHON_VERSION_STR!\Library\bin;%ORIG_PATH%" + +:: if "%CUDA_VERSION%" == "100" ( +:: set TORCH_WHEEL=https://download.pytorch.org/whl/%CUVER%/torch-1.2.0-cp!PYTHON_VERSION_STR!-cp!PYTHON_VERSION_STR!m-win_amd64.whl +:: ) else ( +:: set TORCH_WHEEL=https://download.pytorch.org/whl/%CUVER%/torch-1.2.0%%2B%CUVER%-cp!PYTHON_VERSION_STR!-cp!PYTHON_VERSION_STR!m-win_amd64.whl +:: ) +:: echo Installing !TORCH_WHEEL!... +:: pip install "!TORCH_WHEEL!" +:: ) + +:: Uncomment for nightly releases +FOR %%v IN (%DESIRED_PYTHON%) DO ( + set PYTHON_VERSION_STR=%%v + set PYTHON_VERSION_STR=!PYTHON_VERSION_STR:.=! + set "PATH=%CONDA_HOME%\envs\py!PYTHON_VERSION_STR!;%CONDA_HOME%\envs\py!PYTHON_VERSION_STR!\scripts;%CONDA_HOME%\envs\py!PYTHON_VERSION_STR!\Library\bin;%ORIG_PATH%" + + set TORCH_WHEEL=torch --pre -f https://download.pytorch.org/whl/nightly/%CUVER%/torch_nightly.html + echo Installing !TORCH_WHEEL!... + pip install !TORCH_WHEEL! +) + +endlocal + +if "%DEBUG%" == "1" ( + set BUILD_TYPE=debug +) ELSE ( + set BUILD_TYPE=release +) + +:: Install sccache +if "%USE_SCCACHE%" == "1" ( + mkdir %CD%\tmp_bin + curl -k https://s3.amazonaws.com/ossci-windows/sccache.exe --output %CD%\tmp_bin\sccache.exe + if not "%CUDA_VERSION%" == "" ( + copy %CD%\tmp_bin\sccache.exe %CD%\tmp_bin\nvcc.exe + + set CUDA_NVCC_EXECUTABLE=%CD%\tmp_bin\nvcc + set "PATH=%CD%\tmp_bin;%PATH%" + ) +) + +for %%v in (%DESIRED_PYTHON_PREFIX%) do ( + :: Activate Python Environment + set PYTHON_PREFIX=%%v + set "PATH=%CONDA_HOME%\envs\%%v;%CONDA_HOME%\envs\%%v\scripts;%CONDA_HOME%\envs\%%v\Library\bin;%ORIG_PATH%" + if defined INCLUDE ( + set "INCLUDE=%INCLUDE%;%CONDA_HOME%\envs\%%v\Library\include" + ) else ( + set "INCLUDE=%CONDA_HOME%\envs\%%v\Library\include" + ) + if defined LIB ( + set "LIB=%LIB%;%CONDA_HOME%\envs\%%v\Library\lib" + ) else ( + set "LIB=%CONDA_HOME%\envs\%%v\Library\lib" + ) + @setlocal + :: Set Flags + if NOT "%CUDA_VERSION%"=="cpu" ( + set CUDNN_VERSION=7 + ) + call %CUDA_PREFIX%.bat + IF ERRORLEVEL 1 exit /b 1 + call internal\test.bat + IF ERRORLEVEL 1 exit /b 1 + @endlocal +) + +set "PATH=%ORIG_PATH%" +popd + +IF ERRORLEVEL 1 exit /b 1 diff --git a/packaging/windows/cpu.bat b/packaging/windows/cpu.bat new file mode 100644 index 0000000..1897fb5 --- /dev/null +++ b/packaging/windows/cpu.bat @@ -0,0 +1,37 @@ +@echo off + +IF NOT "%BUILD_CSPRNG%" == "" ( + set MODULE_NAME=csprng +) ELSE ( + set MODULE_NAME=pytorch +) + +IF NOT EXIST "setup.py" IF NOT EXIST "%MODULE_NAME%" ( + call internal\clone.bat + cd .. + IF ERRORLEVEL 1 goto eof +) ELSE ( + call internal\clean.bat +) + +call internal\check_deps.bat +IF ERRORLEVEL 1 goto eof + +REM Check for optional components + +echo Disabling CUDA +set NO_CUDA=1 +set USE_CUDA=0 + +IF "%BUILD_CSPRNG%" == "" ( + call internal\check_opts.bat + IF ERRORLEVEL 1 goto eof + + call internal\copy_cpu.bat + IF ERRORLEVEL 1 goto eof +) + +call internal\setup.bat +IF ERRORLEVEL 1 goto eof + +:eof diff --git a/packaging/windows/cuda101.bat b/packaging/windows/cuda101.bat new file mode 100644 index 0000000..016baec --- /dev/null +++ b/packaging/windows/cuda101.bat @@ -0,0 +1,59 @@ +@echo off + +IF NOT "%BUILD_CSPRNG%" == "" ( + set MODULE_NAME=csprng +) ELSE ( + set MODULE_NAME=pytorch +) + +IF NOT EXIST "setup.py" IF NOT EXIST "%MODULE_NAME%" ( + call internal\clone.bat + cd .. + IF ERRORLEVEL 1 goto eof +) ELSE ( + call internal\clean.bat +) + +call internal\check_deps.bat +IF ERRORLEVEL 1 goto eof + +REM Check for optional components + +set NO_CUDA= +set CMAKE_GENERATOR=Visual Studio 15 2017 Win64 + +IF "%NVTOOLSEXT_PATH%"=="" ( + echo NVTX ^(Visual Studio Extension ^for CUDA^) ^not installed, failing + exit /b 1 + goto optcheck +) + +IF "%CUDA_PATH_V10_1%"=="" ( + echo CUDA 10.1 not found, failing + exit /b 1 +) ELSE ( + IF "%BUILD_CSPRNG%" == "" ( + set TORCH_CUDA_ARCH_LIST=3.5;5.0+PTX;6.0;6.1;7.0;7.5 + set TORCH_NVCC_FLAGS=-Xfatbin -compress-all + ) ELSE ( + set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50 + ) + + set "CUDA_PATH=%CUDA_PATH_V10_1%" + set "PATH=%CUDA_PATH_V10_1%\bin;%PATH%" +) + +:optcheck + +IF "%BUILD_CSPRNG%" == "" ( + call internal\check_opts.bat + IF ERRORLEVEL 1 goto eof + + call internal\copy.bat + IF ERRORLEVEL 1 goto eof +) + +call internal\setup.bat +IF ERRORLEVEL 1 goto eof + +:eof diff --git a/packaging/windows/cuda102.bat b/packaging/windows/cuda102.bat new file mode 100644 index 0000000..d5a0bdf --- /dev/null +++ b/packaging/windows/cuda102.bat @@ -0,0 +1,59 @@ +@echo off + +IF NOT "%BUILD_CSPRNG%" == "" ( + set MODULE_NAME=csprng +) ELSE ( + set MODULE_NAME=pytorch +) + +IF NOT EXIST "setup.py" IF NOT EXIST "%MODULE_NAME%" ( + call internal\clone.bat + cd .. + IF ERRORLEVEL 1 goto eof +) ELSE ( + call internal\clean.bat +) + +call internal\check_deps.bat +IF ERRORLEVEL 1 goto eof + +REM Check for optional components + +set NO_CUDA= +set CMAKE_GENERATOR=Visual Studio 15 2017 Win64 + +IF "%NVTOOLSEXT_PATH%"=="" ( + echo NVTX ^(Visual Studio Extension ^for CUDA^) ^not installed, failing + exit /b 1 + goto optcheck +) + +IF "%CUDA_PATH_V10_2%"=="" ( + echo CUDA 10.2 not found, failing + exit /b 1 +) ELSE ( + IF "%BUILD_CSPRNG%" == "" ( + set TORCH_CUDA_ARCH_LIST=3.5;5.0+PTX;6.0;6.1;7.0;7.5 + set TORCH_NVCC_FLAGS=-Xfatbin -compress-all + ) ELSE ( + set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50 + ) + + set "CUDA_PATH=%CUDA_PATH_V10_2%" + set "PATH=%CUDA_PATH_V10_2%\bin;%PATH%" +) + +:optcheck + +IF "%BUILD_CSPRNG%" == "" ( + call internal\check_opts.bat + IF ERRORLEVEL 1 goto eof + + call internal\copy.bat + IF ERRORLEVEL 1 goto eof +) + +call internal\setup.bat +IF ERRORLEVEL 1 goto eof + +:eof diff --git a/packaging/windows/cuda92.bat b/packaging/windows/cuda92.bat new file mode 100644 index 0000000..7f520da --- /dev/null +++ b/packaging/windows/cuda92.bat @@ -0,0 +1,59 @@ +@echo off + +IF NOT "%BUILD_CSPRNG%" == "" ( + set MODULE_NAME=csprng +) ELSE ( + set MODULE_NAME=pytorch +) + +IF NOT EXIST "setup.py" IF NOT EXIST "%MODULE_NAME%" ( + call internal\clone.bat + cd .. + IF ERRORLEVEL 1 goto eof +) ELSE ( + call internal\clean.bat +) + +call internal\check_deps.bat +IF ERRORLEVEL 1 goto eof + +REM Check for optional components + +set USE_CUDA= +set CMAKE_GENERATOR=Visual Studio 15 2017 Win64 + +IF "%NVTOOLSEXT_PATH%"=="" ( + echo NVTX ^(Visual Studio Extension ^for CUDA^) ^not installed, failing + exit /b 1 + goto optcheck +) + +IF "%CUDA_PATH_V9_2%"=="" ( + echo CUDA 9.2 not found, failing + exit /b 1 +) ELSE ( + IF "%BUILD_CSPRNG%" == "" ( + set TORCH_CUDA_ARCH_LIST=3.5;5.0+PTX;6.0;6.1;7.0 + set TORCH_NVCC_FLAGS=-Xfatbin -compress-all + ) ELSE ( + set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_61,code=sm_61 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_50,code=compute_50 + ) + + set "CUDA_PATH=%CUDA_PATH_V9_2%" + set "PATH=%CUDA_PATH_V9_2%\bin;%PATH%" +) + +:optcheck + +IF "%BUILD_CSPRNG%" == "" ( + call internal\check_opts.bat + IF ERRORLEVEL 1 goto eof + + call internal\copy.bat + IF ERRORLEVEL 1 goto eof +) + +call internal\setup.bat +IF ERRORLEVEL 1 goto eof + +:eof diff --git a/packaging/windows/internal/auth.bat b/packaging/windows/internal/auth.bat new file mode 100644 index 0000000..c874bce --- /dev/null +++ b/packaging/windows/internal/auth.bat @@ -0,0 +1,46 @@ +@echo off + +: From the following doc, the build won't be triggered if the users don't sign in daily. +: https://docs.microsoft.com/en-us/azure/devops/pipelines/build/triggers?tabs=yaml&view=vsts#my-build-didnt-run-what-happened +: To avoid this problem, we can just go through the sign in process using the following command. + +:auth_start + +if "%RETRY_TIMES%" == "" ( + set /a RETRY_TIMES=10 + set /a SLEEP_TIME=2 +) else ( + set /a RETRY_TIMES=%RETRY_TIMES%-1 + set /a SLEEP_TIME=%SLEEP_TIME%*2 +) + +for /f "usebackq tokens=*" %%i in (`curl -so NUL -w "%%{http_code}" -u %VSTS_AUTH% https://dev.azure.com/pytorch`) do ( + set STATUS_CODE=%%i +) + +IF NOT "%STATUS_CODE%" == "200" ( + echo Auth retry times remaining: %RETRY_TIMES% + echo Sleep time: %SLEEP_TIME% seconds + IF %RETRY_TIMES% EQU 0 ( + echo Auth failed + goto err + ) + waitfor SomethingThatIsNeverHappening /t %SLEEP_TIME% 2>nul || ver >nul + goto auth_start +) ELSE ( + echo Login Attempt Succeeded + goto auth_end +) + +:err + +: Throw a warning if it fails +powershell -c "Write-Warning 'Login Attempt Failed'" + +:auth_end + +set RETRY_TIMES= +set SLEEP_TIME= +set STATUS_CODE= + +exit /b 0 diff --git a/packaging/windows/internal/build_conda.bat b/packaging/windows/internal/build_conda.bat new file mode 100644 index 0000000..6ffd67b --- /dev/null +++ b/packaging/windows/internal/build_conda.bat @@ -0,0 +1,15 @@ +if "%VC_YEAR%" == "2017" set VSDEVCMD_ARGS=-vcvars_ver=14.13 +if "%VC_YEAR%" == "2017" powershell packaging/windows/internal/vs2017_install.ps1 +if errorlevel 1 exit /b 1 + +call packaging/windows/internal/cuda_install.bat +if errorlevel 1 exit /b 1 + +call packaging/windows/internal/nightly_defaults.bat Conda +if errorlevel 1 exit /b 1 + +set PYTORCH_FINAL_PACKAGE_DIR=%CD%\packaging\windows\output +if not exist "%PYTORCH_FINAL_PACKAGE_DIR%" mkdir %PYTORCH_FINAL_PACKAGE_DIR% + +bash ./packaging/conda/build_csprng.sh %CUDA_VERSION% %TORCHCSPRNG_BUILD_VERSION% %TORCHCSPRNG_BUILD_NUMBER% +if errorlevel 1 exit /b 1 diff --git a/packaging/windows/internal/build_wheels.bat b/packaging/windows/internal/build_wheels.bat new file mode 100644 index 0000000..876b8b0 --- /dev/null +++ b/packaging/windows/internal/build_wheels.bat @@ -0,0 +1,12 @@ +if "%VC_YEAR%" == "2017" set VSDEVCMD_ARGS=-vcvars_ver=14.13 +if "%VC_YEAR%" == "2017" powershell packaging/windows/internal/vs2017_install.ps1 +if errorlevel 1 exit /b 1 + +call packaging/windows/internal/cuda_install.bat +if errorlevel 1 exit /b 1 + +call packaging/windows/internal/nightly_defaults.bat Wheels +if errorlevel 1 exit /b 1 + +call packaging/windows/build_csprng.bat %CUDA_VERSION% %TORCHCSPRNG_BUILD_VERSION% %TORCHCSPRNG_BUILD_NUMBER% +if errorlevel 1 exit /b 1 diff --git a/packaging/windows/internal/check_deps.bat b/packaging/windows/internal/check_deps.bat new file mode 100644 index 0000000..739e568 --- /dev/null +++ b/packaging/windows/internal/check_deps.bat @@ -0,0 +1,67 @@ +@echo off + +REM Check for necessary components + +IF NOT "%PROCESSOR_ARCHITECTURE%"=="AMD64" ( + echo You should use 64 bits Windows to build and run PyTorch + exit /b 1 +) + +IF "%BUILD_CSPRNG%" == "" ( + where /q cmake.exe + + IF ERRORLEVEL 1 ( + echo CMake is required to compile PyTorch on Windows + exit /b 1 + ) +) + +IF NOT EXIST "%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" ( + echo Visual Studio 2017 C++ BuildTools is required to compile PyTorch on Windows + exit /b 1 +) + +for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [15^,16^) -property installationPath`) do ( + if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" ( + set "VS15INSTALLDIR=%%i" + set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat" + goto vswhere + ) +) + +:vswhere +IF "%VS15VCVARSALL%"=="" ( + echo Visual Studio 2017 C++ BuildTools is required to compile PyTorch on Windows + exit /b 1 +) + +set MSSdk=1 +set DISTUTILS_USE_SDK=1 + +where /q python.exe + +IF ERRORLEVEL 1 ( + echo Python x64 3.5 or up is required to compile PyTorch on Windows + exit /b 1 +) + +for /F "usebackq delims=" %%i in (`python -c "import sys; print('{0[0]}{0[1]}'.format(sys.version_info))"`) do ( + set /a PYVER=%%i +) + +if %PYVER% LSS 35 ( + echo Warning: PyTorch for Python 2 under Windows is experimental. + echo Python x64 3.5 or up is recommended to compile PyTorch on Windows + echo Maybe you can create a virual environment if you have conda installed: + echo ^> conda create -n test python=3.6 pyyaml mkl numpy + echo ^> activate test +) + +for /F "usebackq delims=" %%i in (`python -c "import struct;print( 8 * struct.calcsize('P'))"`) do ( + set /a PYSIZE=%%i +) + +if %PYSIZE% NEQ 64 ( + echo Python x64 3.5 or up is required to compile PyTorch on Windows + exit /b 1 +) diff --git a/packaging/windows/internal/check_opts.bat b/packaging/windows/internal/check_opts.bat new file mode 100644 index 0000000..003ad92 --- /dev/null +++ b/packaging/windows/internal/check_opts.bat @@ -0,0 +1,33 @@ +@echo off + +REM Check for optional components + +where /q ninja.exe + +IF NOT ERRORLEVEL 1 ( + echo Ninja found, using it to speed up builds + set CMAKE_GENERATOR=Ninja +) + +where /q clcache.exe + +IF NOT ERRORLEVEL 1 ( + echo clcache found, using it to speed up builds + set CC=clcache + set CXX=clcache +) + +where /q sccache.exe + +IF NOT ERRORLEVEL 1 ( + echo sccache found, using it to speed up builds + set CC=sccache cl + set CXX=sccache cl +) + +IF exist "%MKLProductDir%\mkl\lib\intel64_win" ( + echo MKL found, adding it to build + set "LIB=%MKLProductDir%\mkl\lib\intel64_win;%MKLProductDir%\compiler\lib\intel64_win;%LIB%"; +) + +exit /b 0 diff --git a/packaging/windows/internal/clean.bat b/packaging/windows/internal/clean.bat new file mode 100644 index 0000000..7489640 --- /dev/null +++ b/packaging/windows/internal/clean.bat @@ -0,0 +1,5 @@ +@echo off + +cd %MODULE_NAME% +python setup.py clean +cd .. diff --git a/packaging/windows/internal/clone.bat b/packaging/windows/internal/clone.bat new file mode 100644 index 0000000..758527c --- /dev/null +++ b/packaging/windows/internal/clone.bat @@ -0,0 +1,56 @@ +@echo off + +:: The conda and wheels jobs are seperated on Windows, so we don't need to clone again. +IF "%BUILD_CSPRNG%" == "" ( + if exist "%NIGHTLIES_PYTORCH_ROOT%" ( + xcopy /E /Y /Q "%NIGHTLIES_PYTORCH_ROOT%" pytorch\ + cd pytorch + goto submodule + ) +) + +git clone https://github.com/%PYTORCH_REPO%/%MODULE_NAME% + +cd %MODULE_NAME% + +IF NOT "%BUILD_CSPRNG%" == "" goto latest_end + +IF "%PYTORCH_BRANCH%" == "latest" ( goto latest_start ) else ( goto latest_end ) + +:latest_start + +if "%NIGHTLIES_DATE%" == "" ( goto date_start ) else ( goto date_end ) + +:date_start + +set "DATE_CMD=Get-Date ([System.TimeZoneInfo]::ConvertTimeFromUtc((Get-Date).ToUniversalTime(), [System.TimeZoneInfo]::FindSystemTimeZoneById('Pacific Standard Time'))) -f 'yyyy_MM_dd'" +set "DATE_COMPACT_CMD=Get-Date ([System.TimeZoneInfo]::ConvertTimeFromUtc((Get-Date).ToUniversalTime(), [System.TimeZoneInfo]::FindSystemTimeZoneById('Pacific Standard Time'))) -f 'yyyyMMdd'" + +FOR /F "delims=" %%i IN ('powershell -c "%DATE_CMD%"') DO set NIGHTLIES_DATE=%%i +FOR /F "delims=" %%i IN ('powershell -c "%DATE_COMPACT_CMD%"') DO set NIGHTLIES_DATE_COMPACT=%%i + +:date_end + +if "%NIGHTLIES_DATE_COMPACT%" == "" set NIGHTLIES_DATE_COMPACT=%NIGHTLIES_DATE:~0,4%%NIGHTLIES_DATE:~5,2%%NIGHTLIES_DATE:~8,2% + +:: Switch to the latest commit by 11:59 yesterday +echo PYTORCH_BRANCH is set to latest so I will find the last commit +echo before 0:00 midnight on %NIGHTLIES_DATE% +set git_date=%NIGHTLIES_DATE:_=-% +FOR /F "delims=" %%i IN ('git log --before %git_date% -n 1 "--pretty=%%H"') DO set last_commit=%%i +echo Setting PYTORCH_BRANCH to %last_commit% since that was the last +echo commit before %NIGHTLIES_DATE% +set PYTORCH_BRANCH=%last_commit% + +:latest_end + +IF "%PYTORCH_BRANCH%" == "" ( + set PYTORCH_BRANCH=v%TORCHCSPRNG_BUILD_VERSION% +) +git checkout %PYTORCH_BRANCH% +IF ERRORLEVEL 1 git checkout tags/%PYTORCH_BRANCH% + +:submodule + +git submodule update --init --recursive +IF ERRORLEVEL 1 exit /b 1 diff --git a/packaging/windows/internal/copy.bat b/packaging/windows/internal/copy.bat new file mode 100644 index 0000000..b4aa397 --- /dev/null +++ b/packaging/windows/internal/copy.bat @@ -0,0 +1,13 @@ +copy "%CUDA_PATH%\bin\cusparse64_%CUDA_VERSION%.dll*" pytorch\torch\lib +copy "%CUDA_PATH%\bin\cublas64_%CUDA_VERSION%.dll*" pytorch\torch\lib +copy "%CUDA_PATH%\bin\cudart64_%CUDA_VERSION%.dll*" pytorch\torch\lib +copy "%CUDA_PATH%\bin\curand64_%CUDA_VERSION%.dll*" pytorch\torch\lib +copy "%CUDA_PATH%\bin\cufft64_%CUDA_VERSION%.dll*" pytorch\torch\lib +copy "%CUDA_PATH%\bin\cufftw64_%CUDA_VERSION%.dll*" pytorch\torch\lib + +copy "%CUDA_PATH%\bin\cudnn64_%CUDNN_VERSION%.dll*" pytorch\torch\lib +copy "%CUDA_PATH%\bin\nvrtc64_%CUDA_VERSION%*.dll*" pytorch\torch\lib +copy "%CUDA_PATH%\bin\nvrtc-builtins64_%CUDA_VERSION%.dll*" pytorch\torch\lib + +copy "C:\Program Files\NVIDIA Corporation\NvToolsExt\bin\x64\nvToolsExt64_1.dll*" pytorch\torch\lib +copy "%CONDA_LIB_PATH%\libiomp*5md.dll" pytorch\torch\lib diff --git a/packaging/windows/internal/copy_cpu.bat b/packaging/windows/internal/copy_cpu.bat new file mode 100644 index 0000000..f5b9d11 --- /dev/null +++ b/packaging/windows/internal/copy_cpu.bat @@ -0,0 +1 @@ +copy "%CONDA_LIB_PATH%\libiomp*5md.dll" pytorch\torch\lib diff --git a/packaging/windows/internal/cuda_install.bat b/packaging/windows/internal/cuda_install.bat new file mode 100644 index 0000000..9ca08e1 --- /dev/null +++ b/packaging/windows/internal/cuda_install.bat @@ -0,0 +1,201 @@ +@echo on + +if "%CU_VERSION%" == "cpu" ( + echo Skipping for CPU builds + exit /b 0 +) + +set SRC_DIR=%~dp0\.. + +if not exist "%SRC_DIR%\temp_build" mkdir "%SRC_DIR%\temp_build" + +set /a CUDA_VER=%CU_VERSION:cu=% +set CUDA_VER_MAJOR=%CUDA_VER:~0,-1% +set CUDA_VER_MINOR=%CUDA_VER:~-1,1% +set CUDA_VERSION_STR=%CUDA_VER_MAJOR%.%CUDA_VER_MINOR% + +if %CUDA_VER% EQU 92 goto cuda92 +if %CUDA_VER% EQU 100 goto cuda100 +if %CUDA_VER% EQU 101 goto cuda101 +if %CUDA_VER% EQU 102 goto cuda102 +if %CUDA_VER% EQU 110 goto cuda110 +if %CUDA_VER% EQU 111 goto cuda111 +if %CUDA_VER% EQU 112 goto cuda112 + +echo CUDA %CUDA_VERSION_STR% is not supported +exit /b 1 + +:cuda92 +if not exist "%SRC_DIR%\temp_build\cuda_9.2.148_win10.exe" ( + curl -k -L https://ossci-windows.s3.amazonaws.com/win2016/cuda_9.2.148_win10.exe --output "%SRC_DIR%\temp_build\cuda_9.2.148_win10.exe" + if errorlevel 1 exit /b 1 + set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_9.2.148_win10.exe" + set "ARGS=nvcc_9.2 cuobjdump_9.2 nvprune_9.2 cupti_9.2 cublas_9.2 cublas_dev_9.2 cudart_9.2 cufft_9.2 cufft_dev_9.2 curand_9.2 curand_dev_9.2 cusolver_9.2 cusolver_dev_9.2 cusparse_9.2 cusparse_dev_9.2 nvgraph_9.2 nvgraph_dev_9.2 npp_9.2 npp_dev_9.2 nvrtc_9.2 nvrtc_dev_9.2 nvml_dev_9.2" +) + +if not exist "%SRC_DIR%\temp_build\cudnn-9.2-windows10-x64-v7.2.1.38.zip" ( + curl -k -L https://ossci-windows.s3.amazonaws.com/win2016/cudnn-9.2-windows10-x64-v7.2.1.38.zip --output "%SRC_DIR%\temp_build\cudnn-9.2-windows10-x64-v7.2.1.38.zip" + if errorlevel 1 exit /b 1 + set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-9.2-windows10-x64-v7.2.1.38.zip" +) + +goto cuda_common + +:cuda100 + +if not exist "%SRC_DIR%\temp_build\cuda_10.0.130_411.31_win10.exe" ( + curl -k -L https://ossci-windows.s3.amazonaws.com/win2016/cuda_10.0.130_411.31_win10.exe --output "%SRC_DIR%\temp_build\cuda_10.0.130_411.31_win10.exe" + if errorlevel 1 exit /b 1 + set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_10.0.130_411.31_win10.exe" + set "ARGS=nvcc_10.0 cuobjdump_10.0 nvprune_10.0 cupti_10.0 cublas_10.0 cublas_dev_10.0 cudart_10.0 cufft_10.0 cufft_dev_10.0 curand_10.0 curand_dev_10.0 cusolver_10.0 cusolver_dev_10.0 cusparse_10.0 cusparse_dev_10.0 nvgraph_10.0 nvgraph_dev_10.0 npp_10.0 npp_dev_10.0 nvrtc_10.0 nvrtc_dev_10.0 nvml_dev_10.0" +) + +if not exist "%SRC_DIR%\temp_build\cudnn-10.0-windows10-x64-v7.4.1.5.zip" ( + curl -k -L https://ossci-windows.s3.amazonaws.com/win2016/cudnn-10.0-windows10-x64-v7.4.1.5.zip --output "%SRC_DIR%\temp_build\cudnn-10.0-windows10-x64-v7.4.1.5.zip" + if errorlevel 1 exit /b 1 + set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-10.0-windows10-x64-v7.4.1.5.zip" +) + +goto cuda_common + +:cuda101 + +if not exist "%SRC_DIR%\temp_build\cuda_10.1.243_426.00_win10.exe" ( + curl -k -L https://ossci-windows.s3.amazonaws.com/cuda_10.1.243_426.00_win10.exe --output "%SRC_DIR%\temp_build\cuda_10.1.243_426.00_win10.exe" + if errorlevel 1 exit /b 1 + set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_10.1.243_426.00_win10.exe" + set "ARGS=nvcc_10.1 cuobjdump_10.1 nvprune_10.1 cupti_10.1 cublas_10.1 cublas_dev_10.1 cudart_10.1 cufft_10.1 cufft_dev_10.1 curand_10.1 curand_dev_10.1 cusolver_10.1 cusolver_dev_10.1 cusparse_10.1 cusparse_dev_10.1 nvgraph_10.1 nvgraph_dev_10.1 npp_10.1 npp_dev_10.1 nvrtc_10.1 nvrtc_dev_10.1 nvml_dev_10.1" +) + +if not exist "%SRC_DIR%\temp_build\cudnn-10.1-windows10-x64-v7.6.4.38.zip" ( + curl -k -L https://ossci-windows.s3.amazonaws.com/cudnn-10.1-windows10-x64-v7.6.4.38.zip --output "%SRC_DIR%\temp_build\cudnn-10.1-windows10-x64-v7.6.4.38.zip" + if errorlevel 1 exit /b 1 + set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-10.1-windows10-x64-v7.6.4.38.zip" +) + +goto cuda_common + +:cuda102 + +if not exist "%SRC_DIR%\temp_build\cuda_10.2.89_441.22_win10.exe" ( + curl -k -L https://ossci-windows.s3.amazonaws.com/cuda_10.2.89_441.22_win10.exe --output "%SRC_DIR%\temp_build\cuda_10.2.89_441.22_win10.exe" + if errorlevel 1 exit /b 1 + set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_10.2.89_441.22_win10.exe" + set "ARGS=nvcc_10.2 cuobjdump_10.2 nvprune_10.2 cupti_10.2 cublas_10.2 cublas_dev_10.2 cudart_10.2 cufft_10.2 cufft_dev_10.2 curand_10.2 curand_dev_10.2 cusolver_10.2 cusolver_dev_10.2 cusparse_10.2 cusparse_dev_10.2 nvgraph_10.2 nvgraph_dev_10.2 npp_10.2 npp_dev_10.2 nvrtc_10.2 nvrtc_dev_10.2 nvml_dev_10.2" +) + +if not exist "%SRC_DIR%\temp_build\cudnn-10.2-windows10-x64-v7.6.5.32.zip" ( + curl -k -L https://ossci-windows.s3.amazonaws.com/cudnn-10.2-windows10-x64-v7.6.5.32.zip --output "%SRC_DIR%\temp_build\cudnn-10.2-windows10-x64-v7.6.5.32.zip" + if errorlevel 1 exit /b 1 + set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-10.2-windows10-x64-v7.6.5.32.zip" +) + +goto cuda_common + +:cuda110 + +if not exist "%SRC_DIR%\temp_build\cuda_11.0.2_451.48_win10.exe" ( + curl -k -L https://ossci-windows.s3.amazonaws.com/cuda_11.0.2_451.48_win10.exe --output "%SRC_DIR%\temp_build\cuda_11.0.2_451.48_win10.exe" + if errorlevel 1 exit /b 1 + set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_11.0.2_451.48_win10.exe" + set "ARGS=nvcc_11.0 cuobjdump_11.0 nvprune_11.0 nvprof_11.0 cupti_11.0 cublas_11.0 cublas_dev_11.0 cudart_11.0 cufft_11.0 cufft_dev_11.0 curand_11.0 curand_dev_11.0 cusolver_11.0 cusolver_dev_11.0 cusparse_11.0 cusparse_dev_11.0 npp_11.0 npp_dev_11.0 nvrtc_11.0 nvrtc_dev_11.0 nvml_dev_11.0" +) + +if not exist "%SRC_DIR%\temp_build\cudnn-11.0-windows-x64-v8.0.4.30.zip" ( + curl -k -L https://ossci-windows.s3.amazonaws.com/cudnn-11.0-windows-x64-v8.0.4.30.zip --output "%SRC_DIR%\temp_build\cudnn-11.0-windows-x64-v8.0.4.30.zip" + if errorlevel 1 exit /b 1 + set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-11.0-windows-x64-v8.0.4.30.zip" +) + +goto cuda_common + +:cuda111 + +if not exist "%SRC_DIR%\temp_build\cuda_11.1.0_456.43_win10.exe" ( + curl -k -L https://ossci-windows.s3.amazonaws.com/cuda_11.1.0_456.43_win10.exe --output "%SRC_DIR%\temp_build\cuda_11.1.0_456.43_win10.exe" + if errorlevel 1 exit /b 1 + set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_11.1.0_456.43_win10.exe" + set "ARGS=nvcc_11.1 cuobjdump_11.1 nvprune_11.1 nvprof_11.1 cupti_11.1 cublas_11.1 cublas_dev_11.1 cudart_11.1 cufft_11.1 cufft_dev_11.1 curand_11.1 curand_dev_11.1 cusolver_11.1 cusolver_dev_11.1 cusparse_11.1 cusparse_dev_11.1 npp_11.1 npp_dev_11.1 nvrtc_11.1 nvrtc_dev_11.1 nvml_dev_11.1" +) + +@REM There is no downloadable driver for Tesla on CUDA 11.1 yet. We will use +@REM the driver inside CUDA +if "%JOB_EXECUTOR%" == "windows-with-nvidia-gpu" set "ARGS=%ARGS% Display.Driver" + +if not exist "%SRC_DIR%\temp_build\cudnn-11.1-windows-x64-v8.0.5.39.zip" ( + curl -k -L https://ossci-windows.s3.amazonaws.com/cudnn-11.1-windows-x64-v8.0.5.39.zip --output "%SRC_DIR%\temp_build\cudnn-11.1-windows-x64-v8.0.5.39.zip" + if errorlevel 1 exit /b 1 + set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-11.1-windows-x64-v8.0.5.39.zip" +) + +goto cuda_common + +:cuda112 + +if not exist "%SRC_DIR%\temp_build\cuda_11.2.0_460.89_win10.exe" ( + curl -k -L https://ossci-windows.s3.amazonaws.com/cuda_11.2.0_460.89_win10.exe --output "%SRC_DIR%\temp_build\cuda_11.2.0_460.89_win10.exe" + if errorlevel 1 exit /b 1 + set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_11.2.0_460.89_win10.exe" + set "ARGS=nvcc_11.2 cuobjdump_11.2 nvprune_11.2 nvprof_11.2 cupti_11.2 cublas_11.2 cublas_dev_11.2 cudart_11.2 cufft_11.2 cufft_dev_11.2 curand_11.2 curand_dev_11.2 cusolver_11.2 cusolver_dev_11.2 cusparse_11.2 cusparse_dev_11.2 npp_11.2 npp_dev_11.2 nvrtc_11.2 nvrtc_dev_11.2 nvml_dev_11.2" +) + +if not exist "%SRC_DIR%\temp_build\cudnn-11.2-windows-x64-v8.1.0.77.zip" ( + curl -k -L http://s3.amazonaws.com/ossci-windows/cudnn-11.2-windows-x64-v8.1.0.77.zip --output "%SRC_DIR%\temp_build\cudnn-11.2-windows-x64-v8.1.0.77.zip" + if errorlevel 1 exit /b 1 + set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-11.2-windows-x64-v8.1.0.77.zip" +) + +goto cuda_common + +:cuda_common + +if not exist "%SRC_DIR%\temp_build\NvToolsExt.7z" ( + curl -k -L https://www.dropbox.com/s/9mcolalfdj4n979/NvToolsExt.7z?dl=1 --output "%SRC_DIR%\temp_build\NvToolsExt.7z" + if errorlevel 1 exit /b 1 +) + +if not exist "%SRC_DIR%\temp_build\gpu_driver_dlls.7z" ( + curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "%SRC_DIR%\temp_build\gpu_driver_dlls.zip" + if errorlevel 1 exit /b 1 +) + +echo Installing CUDA toolkit... +7z x %CUDA_SETUP_FILE% -o"%SRC_DIR%\temp_build\cuda" +pushd "%SRC_DIR%\temp_build\cuda" +start /wait setup.exe -s %ARGS% +popd + +echo Installing VS integration... +xcopy /Y "%SRC_DIR%\temp_build\cuda\CUDAVisualStudioIntegration\extras\visual_studio_integration\MSBuildExtensions\*.*" "C:\Program Files (x86)\Microsoft Visual Studio\2017\BuildTools\Common7\IDE\VC\VCTargets\BuildCustomizations" + +echo Installing NvToolsExt... +7z x %SRC_DIR%\temp_build\NvToolsExt.7z -o"%SRC_DIR%\temp_build\NvToolsExt" +mkdir "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\bin\x64" +mkdir "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\include" +mkdir "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\lib\x64" +xcopy /Y "%SRC_DIR%\temp_build\NvToolsExt\bin\x64\*.*" "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\bin\x64" +xcopy /Y "%SRC_DIR%\temp_build\NvToolsExt\include\*.*" "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\include" +xcopy /Y "%SRC_DIR%\temp_build\NvToolsExt\lib\x64\*.*" "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\lib\x64" + +echo Setting up environment... +set "PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\bin;%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\libnvvp;%PATH%" +set "CUDA_PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%" +set "CUDA_PATH_V%CUDA_VER_MAJOR%_%CUDA_VER_MINOR%=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%" +set "NVTOOLSEXT_PATH=%ProgramFiles%\NVIDIA Corporation\NvToolsExt\bin\x64" + +if not exist "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\bin\nvcc.exe" ( + echo CUDA %CUDA_VERSION_STR% installed failed. + exit /b 1 +) + +echo Installing cuDNN... +7z x %CUDNN_SETUP_FILE% -o"%SRC_DIR%\temp_build\cudnn" +xcopy /Y "%SRC_DIR%\temp_build\cudnn\cuda\bin\*.*" "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\bin" +xcopy /Y "%SRC_DIR%\temp_build\cudnn\cuda\lib\x64\*.*" "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\lib\x64" +xcopy /Y "%SRC_DIR%\temp_build\cudnn\cuda\include\*.*" "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\include" + +echo Installing GPU driver DLLs +7z x %SRC_DIR%\temp_build\gpu_driver_dlls.zip -o"C:\Windows\System32" + +echo Cleaning temp files +rd /s /q "%SRC_DIR%\temp_build" || ver > nul diff --git a/packaging/windows/internal/dep_install.bat b/packaging/windows/internal/dep_install.bat new file mode 100644 index 0000000..db665a9 --- /dev/null +++ b/packaging/windows/internal/dep_install.bat @@ -0,0 +1,14 @@ +@echo off + +REM curl -k https://www.7-zip.org/a/7z1805-x64.exe -O +REM if errorlevel 1 exit /b 1 + +REM start /wait 7z1805-x64.exe /S +REM if errorlevel 1 exit /b 1 + +REM set "PATH=%ProgramFiles%\7-Zip;%PATH%" + +choco feature disable --name showDownloadProgress +choco feature enable --name allowGlobalConfirmation + +choco install curl 7zip diff --git a/packaging/windows/internal/env_fix.bat b/packaging/windows/internal/env_fix.bat new file mode 100644 index 0000000..dd0aaf5 --- /dev/null +++ b/packaging/windows/internal/env_fix.bat @@ -0,0 +1,31 @@ +@echo off + +:: Caution: Please don't use this script locally +:: It may destroy your build environment. + +setlocal + +IF NOT EXIST "%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" ( + echo Visual Studio 2017 C++ BuildTools is required to compile PyTorch on Windows + exit /b 1 +) + +for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [15^,16^) -property installationPath`) do ( + if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" ( + set "VS15INSTALLDIR=%%i" + set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat" + goto vswhere + ) +) + +:vswhere + +IF "%VS15VCVARSALL%"=="" ( + echo Visual Studio 2017 C++ BuildTools is required to compile PyTorch on Windows + exit /b 1 +) + +call "%VS15VCVARSALL%" x86_amd64 +for /f "usebackq tokens=*" %%i in (`where link.exe`) do move "%%i" "%%i.bak" + +endlocal diff --git a/packaging/windows/internal/nightly_defaults.bat b/packaging/windows/internal/nightly_defaults.bat new file mode 100644 index 0000000..2b5ca5c --- /dev/null +++ b/packaging/windows/internal/nightly_defaults.bat @@ -0,0 +1,200 @@ +@echo on + +if "%~1"=="" goto arg_error +if NOT "%~2"=="" goto arg_error +goto arg_end + +:arg_error + +echo Illegal number of parameters. Pass packge type `Conda` or `Wheels`. +exit /b 1 + +:arg_end + +echo "nightly_defaults.bat at %CD% starting at %DATE%" + +set SRC_DIR=%~dp0\.. + +:: NIGHTLIES_FOLDER +:: N.B. this is also defined in cron_start.sh +:: An arbitrary root folder to store all nightlies folders, each of which is a +:: parent level date folder with separate subdirs for logs, wheels, conda +:: packages, etc. This should be kept the same across all scripts called in a +:: cron job, so it only has a default value in the top-most script +:: build_cron.sh to avoid the default values from diverging. +if "%NIGHTLIES_FOLDER%" == "" set "NIGHTLIES_FOLDER=%SRC_DIR%" + +:: NIGHTLIES_DATE +:: N.B. this is also defined in cron_start.sh +:: The date in YYYY_mm_dd format that we are building for. If this is not +:: already set, then this will first try to find the date of the nightlies +:: folder that this builder repo exists in; e.g. if this script exists in +:: some_dir/2019_09_04/builder/cron/ then this will be set to 2019_09_04 (must +:: match YYYY_mm_dd). This is for convenience when debugging/uploading past +:: dates, so that you don't have to set NIGHTLIES_DATE yourself. If a date +:: folder cannot be found in that exact location, then this will default to +:: the current date. + + +if "%NIGHTLIES_DATE%" == "" ( goto date_start ) else ( goto date_end ) + +:date_start + +set "DATE_CMD=Get-Date ([System.TimeZoneInfo]::ConvertTimeFromUtc((Get-Date).ToUniversalTime(), [System.TimeZoneInfo]::FindSystemTimeZoneById('Pacific Standard Time'))) -f 'yyyy_MM_dd'" +set "DATE_COMPACT_CMD=Get-Date ([System.TimeZoneInfo]::ConvertTimeFromUtc((Get-Date).ToUniversalTime(), [System.TimeZoneInfo]::FindSystemTimeZoneById('Pacific Standard Time'))) -f 'yyyyMMdd'" + +FOR /F "delims=" %%i IN ('powershell -c "%DATE_CMD%"') DO set NIGHTLIES_DATE=%%i +FOR /F "delims=" %%i IN ('powershell -c "%DATE_COMPACT_CMD%"') DO set NIGHTLIES_DATE_COMPACT=%%i + +:date_end + +if "%NIGHTLIES_DATE_COMPACT%" == "" set NIGHTLIES_DATE_COMPACT=%NIGHTLIES_DATE:~0,4%%NIGHTLIES_DATE:~5,2%%NIGHTLIES_DATE:~8,2% + +:: Used in lots of places as the root dir to store all conda/wheel/manywheel +:: packages as well as logs for the day +set today=%NIGHTLIES_FOLDER%\%NIGHTLIES_DATE% +mkdir "%today%" || ver >nul + + +::############################################################################# +:: Add new configuration variables below this line. 'today' should always be +:: defined ASAP to avoid weird errors +::############################################################################# + + +:: List of people to email when things go wrong. This is passed directly to +:: `mail -t` +:: TODO: Not supported yet +if "%NIGHTLIES_EMAIL_LIST%" == "" set NIGHTLIES_EMAIL_LIST=peterghost86@gmail.com + +:: PYTORCH_CREDENTIALS_FILE +:: A bash file that exports credentials needed to upload to aws and anaconda. +:: Needed variables are PYTORCH_ANACONDA_USERNAME, PYTORCH_ANACONDA_PASSWORD, +:: AWS_ACCESS_KEY_ID, and AWS_SECRET_ACCESS_KEY. Or it can just export the AWS +:: keys and then prepend a logged-in conda installation to the path. +:: TODO: Not supported yet +if "%PYTORCH_CREDENTIALS_FILE%" == "" set PYTORCH_CREDENTIALS_FILE=/c/Users/administrator/nightlies/credentials.sh + +:: Location of the temporary miniconda that is downloaded to install conda-build +:: and aws to upload finished packages TODO this is messy to install this in +:: upload.sh and later use it in upload_logs.sh +if "%CONDA_UPLOADER_INSTALLATION%" == "" set "CONDA_UPLOADER_INSTALLATION=%today%\miniconda" + +:: N.B. BUILDER_REPO and BUILDER_BRANCH are both set in cron_start.sh, as that +:: is the script that actually clones the builder repo that /this/ script is +:: running from. +pushd "%SRC_DIR%\.." +set NIGHTLIES_BUILDER_ROOT=%CD% +popd + +:: The shared pytorch repo to be used by all builds +if "%NIGHTLIES_PYTORCH_ROOT%" == "" set "NIGHTLIES_PYTORCH_ROOT=%today%\csprng" + +:: PYTORCH_REPO +:: The Github org/user whose fork of Pytorch to check out (git clone +:: https://github.com//pytorch.git). This will always be cloned +:: fresh to build with. Default is 'pytorch' +if "%PYTORCH_REPO%" == "" set PYTORCH_REPO=pytorch + +:: PYTORCH_BRANCH +:: The branch of Pytorch to checkout for building (git checkout ). +:: This can either be the name of the branch (e.g. git checkout +:: my_branch_name) or can be a git commit (git checkout 4b2674n...). Default +:: is 'latest', which is a special term that signals to pull the last commit +:: before 0:00 midnight on the NIGHTLIES_DATE +if "%PYTORCH_BRANCH%" == "" set PYTORCH_BRANCH=nightly + +:: Clone the requested pytorch checkout +if exist "%NIGHTLIES_PYTORCH_ROOT%" ( goto clone_end ) else ( goto clone_start ) + +:clone_start + +git clone --recursive "https://github.com/%PYTORCH_REPO%/csprng.git" "%NIGHTLIES_PYTORCH_ROOT%" +pushd "%NIGHTLIES_PYTORCH_ROOT%" + +if "%PYTORCH_BRANCH%" == "latest" ( goto latest_start ) else ( goto latest_end ) + +:latest_start + +:: Switch to the latest commit by 11:59 yesterday +echo PYTORCH_BRANCH is set to latest so I will find the last commit +echo before 0:00 midnight on %NIGHTLIES_DATE% +set git_date=%NIGHTLIES_DATE:_=-% +FOR /F "delims=" %%i IN ('git log --before %git_date% -n 1 "--pretty=%%H"') DO set last_commit=%%i +echo Setting PYTORCH_BRANCH to %last_commit% since that was the last +echo commit before %NIGHTLIES_DATE% +set PYTORCH_BRANCH=%last_commit% + +:latest_end + +git checkout "%PYTORCH_BRANCH%" +git submodule update +popd + +:clone_end + +if "%CUDA_VERSION%" == "cpu" ( + set _DESIRED_CUDA=cpu +) else ( + set _DESIRED_CUDA=cu%CUDA_VERSION% +) + +:: PYTORCH_BUILD_VERSION +:: The actual version string. Used in conda like +:: pytorch-nightly==1.0.0.dev20180908 +:: or in manylinux like +:: torch_nightly-1.0.0.dev20180908-cp27-cp27m-linux_x86_64.whl +if "%TORCHCSPRNG_BUILD_VERSION%" == "" set TORCHCSPRNG_BUILD_VERSION=0.9.0.dev%NIGHTLIES_DATE_COMPACT% + +if "%~1" == "Wheels" ( + if not "%CUDA_VERSION%" == "102" ( + set TORCHCSPRNG_BUILD_VERSION=%TORCHCSPRNG_BUILD_VERSION%+%_DESIRED_CUDA% + ) +) + +:: PYTORCH_BUILD_NUMBER +:: This is usually the number 1. If more than one build is uploaded for the +:: same version/date, then this can be incremented to 2,3 etc in which case +:: '.post2' will be appended to the version string of the package. This can +:: be set to '0' only if OVERRIDE_PACKAGE_VERSION is being used to bypass +:: all the version string logic in downstream scripts. Since we use the +:: override below, exporting this shouldn't actually matter. +if "%TORCHCSPRNG_BUILD_NUMBER%" == "" set /a TORCHCSPRNG_BUILD_NUMBER=1 +if %TORCHCSPRNG_BUILD_NUMBER% GTR 1 set TORCHCSPRNG_BUILD_VERSION=%TORCHCSPRNG_BUILD_VERSION%%TORCHCSPRNG_BUILD_NUMBER% + +:: The nightly builds use their own versioning logic, so we override whatever +:: logic is in setup.py or other scripts +:: TODO: Not supported yet +set OVERRIDE_PACKAGE_VERSION=%TORCHCSPRNG_BUILD_VERSION% +set BUILD_VERSION=%TORCHCSPRNG_BUILD_VERSION% + +:: Build folder for conda builds to use +if "%TORCH_CONDA_BUILD_FOLDER%" == "" set TORCH_CONDA_BUILD_FOLDER=torchcsprng + +:: TORCH_PACKAGE_NAME +:: The name of the package to upload. This should probably be pytorch or +:: pytorch-nightly. N.B. that pip will change all '-' to '_' but conda will +:: not. This is dealt with in downstream scripts. +:: TODO: Not supported yet +if "%TORCH_PACKAGE_NAME%" == "" set TORCH_PACKAGE_NAME=torchcsprng + +:: PIP_UPLOAD_FOLDER should end in a slash. This is to handle it being empty +:: (when uploading to e.g. whl/cpu/) and also to handle nightlies (when +:: uploading to e.g. /whl/nightly/cpu) +:: TODO: Not supported yet +if "%PIP_UPLOAD_FOLDER%" == "" set "PIP_UPLOAD_FOLDER=nightly\" + +:: The location of the binary_sizes dir in s3 is hardcoded into +:: upload_binary_sizes.sh + +:: DAYS_TO_KEEP +:: How many days to keep around for clean.sh. Build folders older than this +:: will be purged at the end of cron jobs. '1' means to keep only the current +:: day. Values less than 1 are not allowed. The default is 5. +:: TODO: Not supported yet +if "%DAYS_TO_KEEP%" == "" set /a DAYS_TO_KEEP=5 +if %DAYS_TO_KEEP% LSS 1 ( + echo DAYS_TO_KEEP cannot be less than 1. + echo A value of 1 means to only keep the build for today + exit /b 1 +) diff --git a/packaging/windows/internal/publish.bat b/packaging/windows/internal/publish.bat new file mode 100644 index 0000000..7e820d7 --- /dev/null +++ b/packaging/windows/internal/publish.bat @@ -0,0 +1,89 @@ +@echo off + +set SRC_DIR=%~dp0 +pushd %SRC_DIR% + +if NOT "%CUDA_VERSION%" == "cpu" ( + set PACKAGE_SUFFIX=_cuda%CUDA_VERSION% +) else ( + set PACKAGE_SUFFIX= +) + +if "%PACKAGEFULLNAME%" == "Conda" ( + set PACKAGE=conda +) else ( + set PACKAGE=wheels +) + +if not defined PACKAGE_SUFFIX ( + set PUBLISH_BRANCH=csprng_%PACKAGE%_%DESIRED_PYTHON% +) else ( + set PUBLISH_BRANCH=csprng_%PACKAGE%_%DESIRED_PYTHON%%PACKAGE_SUFFIX% +) + +git clone %ARTIFACT_REPO_URL% -b %PUBLISH_BRANCH% --single-branch >nul 2>&1 + +IF ERRORLEVEL 1 ( + echo Branch %PUBLISH_BRANCH% not exist, falling back to master + set NO_BRANCH=1 + git clone %ARTIFACT_REPO_URL% -b master --single-branch >nul 2>&1 +) + +IF ERRORLEVEL 1 ( + echo Clone failed + goto err +) + +cd pytorch_builder +attrib -s -h -r . /s /d + +:: Empty repo +rd /s /q . || ver >nul + +IF NOT EXIST %PACKAGE% mkdir %PACKAGE% + +xcopy /S /E /Y ..\..\output\*.* %PACKAGE%\ + +git config --global user.name "Azure DevOps" +git config --global user.email peterghost86@gmail.com +git init +git checkout --orphan %PUBLISH_BRANCH% +git remote add origin %ARTIFACT_REPO_URL% +git add . +git commit -m "Update artifacts" + +:push + +if "%RETRY_TIMES%" == "" ( + set /a RETRY_TIMES=10 + set /a SLEEP_TIME=2 +) else ( + set /a RETRY_TIMES=%RETRY_TIMES%-1 + set /a SLEEP_TIME=%SLEEP_TIME%*2 +) + +git push origin %PUBLISH_BRANCH% -f > nul 2>&1 + +IF ERRORLEVEL 1 ( + echo Git push retry times remaining: %RETRY_TIMES% + echo Sleep time: %SLEEP_TIME% seconds + IF %RETRY_TIMES% EQU 0 ( + echo Push failed + goto err + ) + waitfor SomethingThatIsNeverHappening /t %SLEEP_TIME% 2>nul || ver >nul + goto push +) ELSE ( + set RETRY_TIMES= + set SLEEP_TIME= +) + +popd + +exit /b 0 + +:err + +popd + +exit /b 1 diff --git a/packaging/windows/internal/setup.bat b/packaging/windows/internal/setup.bat new file mode 100644 index 0000000..96cb7fb --- /dev/null +++ b/packaging/windows/internal/setup.bat @@ -0,0 +1,44 @@ +@echo off + +echo The flags after configuring: +echo NO_CUDA=%NO_CUDA% +echo CMAKE_GENERATOR=%CMAKE_GENERATOR% +if "%NO_CUDA%"=="" echo CUDA_PATH=%CUDA_PATH% +if NOT "%CC%"=="" echo CC=%CC% +if NOT "%CXX%"=="" echo CXX=%CXX% +if NOT "%DISTUTILS_USE_SDK%"=="" echo DISTUTILS_USE_SDK=%DISTUTILS_USE_SDK% + +set SRC_DIR=%~dp0\.. + +IF "%VSDEVCMD_ARGS%" == "" ( + call "%VS15VCVARSALL%" x64 +) ELSE ( + call "%VS15VCVARSALL%" x64 %VSDEVCMD_ARGS% +) + +pushd %SRC_DIR% + +IF NOT exist "setup.py" ( + cd %MODULE_NAME% +) + +if "%CXX%"=="sccache cl" ( + sccache --stop-server + sccache --start-server + sccache --zero-stats +) + +:pytorch +:: This stores in e.g. D:/_work/1/s/windows/output/cpu +pip wheel -e . --no-deps --wheel-dir ../output + +:build_end +IF ERRORLEVEL 1 exit /b 1 +IF NOT ERRORLEVEL 0 exit /b 1 + +if "%CXX%"=="sccache cl" ( + taskkill /im sccache.exe /f /t || ver > nul + taskkill /im nvcc.exe /f /t || ver > nul +) + +cd .. diff --git a/packaging/windows/internal/test.bat b/packaging/windows/internal/test.bat new file mode 100644 index 0000000..8e6878b --- /dev/null +++ b/packaging/windows/internal/test.bat @@ -0,0 +1,79 @@ +@echo off + +set SRC_DIR=%~dp0\.. +pushd %SRC_DIR% + +set PYTHON_VERSION=%PYTHON_PREFIX:py=cp% + +if "%BUILD_CSPRNG%" == "" ( + pip install future pytest coverage hypothesis protobuf +) ELSE ( + pip install future pytest "pillow>=4.1.1" +) + +for /F "delims=" %%i in ('where /R %SRC_DIR%\output *%MODULE_NAME%*%PYTHON_VERSION%*.whl') do pip install "%%i" + +if ERRORLEVEL 1 exit /b 1 + +if NOT "%BUILD_CSPRNG%" == "" ( + echo Smoke testing imports + python -c "import torchcsprng" + if ERRORLEVEL 1 exit /b 1 + goto smoke_test_end +) + +echo Smoke testing imports +python -c "import torch" +if ERRORLEVEL 1 exit /b 1 + +python -c "from caffe2.python import core" +if ERRORLEVEL 1 exit /b 1 + +echo Checking that MKL is available +python -c "import torch; exit(0 if torch.backends.mkl.is_available() else 1)" +if ERRORLEVEL 1 exit /b 1 + +setlocal EnableDelayedExpansion +set NVIDIA_GPU_EXISTS=0 +for /F "delims=" %%i in ('wmic path win32_VideoController get name') do ( + set GPUS=%%i + if not "x!GPUS:NVIDIA=!" == "x!GPUS!" ( + SET NVIDIA_GPU_EXISTS=1 + goto gpu_check_end + ) +) +:gpu_check_end +endlocal & set NVIDIA_GPU_EXISTS=%NVIDIA_GPU_EXISTS% + +if NOT "%CUDA_PREFIX%" == "cpu" if "%NVIDIA_GPU_EXISTS%" == "1" ( + echo Checking that CUDA archs are setup correctly + python -c "import torch; torch.randn([3,5]).cuda()" + if ERRORLEVEL 1 exit /b 1 + + echo Checking that magma is available + python -c "import torch; torch.rand(1).cuda(); exit(0 if torch.cuda.has_magma else 1)" + if ERRORLEVEL 1 exit /b 1 + + echo Checking that CuDNN is available + python -c "import torch; exit(0 if torch.backends.cudnn.is_available() else 1)" + if ERRORLEVEL 1 exit /b 1 +) +:smoke_test_end + +echo Not running unit tests. Hopefully these problems are caught by CI +goto test_end + +if "%BUILD_CSPRNG%" == "" ( + cd pytorch\test + python run_test.py -v +) else ( + cd csprng + pytest . +) + +if ERRORLEVEL 1 exit /b 1 + +:test_end + +popd +exit /b 0 diff --git a/packaging/windows/internal/upload.bat b/packaging/windows/internal/upload.bat new file mode 100644 index 0000000..f78fe0b --- /dev/null +++ b/packaging/windows/internal/upload.bat @@ -0,0 +1,96 @@ +@echo off + +IF "%CONDA_UPLOADER_INSTALLATION%" == "" goto precheck_fail +IF "%PYTORCH_FINAL_PACKAGE_DIR%" == "" goto precheck_fail +IF "%today%" == "" goto precheck_fail +IF "%PYTORCH_ANACONDA_USERNAME%" == "" goto precheck_fail +IF "%PYTORCH_ANACONDA_PASSWORD%" == "" goto precheck_fail + +goto precheck_pass + +:precheck_fail + +echo Please run nightly_defaults.bat first. +echo And remember to set `PYTORCH_FINAL_PACKAGE_DIR` +echo Finally, don't forget to set anaconda tokens +exit /b 1 + +:precheck_pass + +pushd %today% + +:: Install anaconda client +set "CONDA_HOME=%CONDA_UPLOADER_INSTALLATION%" +set "tmp_conda=%CONDA_HOME%" +set "miniconda_exe=%CD%\miniconda.exe" +rmdir /s /q "%CONDA_HOME%" +del miniconda.exe +curl -k https://repo.continuum.io/miniconda/Miniconda3-latest-Windows-x86_64.exe -o "%miniconda_exe%" +popd + +IF ERRORLEVEL 1 ( + echo Conda download failed + exit /b 1 +) + +call %~dp0\..\..\conda\install_conda.bat + +IF ERRORLEVEL 1 ( + echo Conda installation failed + exit /b 1 +) + +set "ORIG_PATH=%PATH%" +set "PATH=%CONDA_HOME%;%CONDA_HOME%\scripts;%CONDA_HOME%\Library\bin;%PATH%" + +REM conda install -y anaconda-client +pip install git+https://github.com/peterjc123/anaconda-client.git@log_more_meaningfull_errors +IF ERRORLEVEL 1 ( + echo Anaconda client installation failed + exit /b 1 +) + +set PYTORCH_FINAL_PACKAGE= +:: Upload all the packages under `PYTORCH_FINAL_PACKAGE_DIR` +FOR /F "delims=" %%i IN ('where /R %PYTORCH_FINAL_PACKAGE_DIR% *csprng*.tar.bz2') DO ( + set "PYTORCH_FINAL_PACKAGE=%%i" +) + +IF "%PYTORCH_FINAL_PACKAGE%" == "" ( + echo No package to upload + exit /b 0 +) + +:upload + +if "%RETRY_TIMES%" == "" ( + set /a RETRY_TIMES=10 + set /a SLEEP_TIME=2 +) else ( + set /a RETRY_TIMES=%RETRY_TIMES%-1 + set /a SLEEP_TIME=%SLEEP_TIME%*2 +) + +REM bash -c "yes | anaconda login --username "%PYTORCH_ANACONDA_USERNAME%" --password "%PYTORCH_ANACONDA_PASSWORD%"" +anaconda login --username "%PYTORCH_ANACONDA_USERNAME%" --password "%PYTORCH_ANACONDA_PASSWORD%" +IF ERRORLEVEL 1 ( + echo Anaconda client login failed + exit /b 1 +) + +echo Uploading %PYTORCH_FINAL_PACKAGE% to Anaconda Cloud +anaconda upload "%PYTORCH_FINAL_PACKAGE%" -u pytorch-nightly --label main --force --no-progress + +IF ERRORLEVEL 1 ( + echo Anaconda upload retry times remaining: %RETRY_TIMES% + echo Sleep time: %SLEEP_TIME% seconds + IF %RETRY_TIMES% EQU 0 ( + echo Upload failed + exit /b 1 + ) + waitfor SomethingThatIsNeverHappening /t %SLEEP_TIME% 2>nul || ver >nul + goto upload +) ELSE ( + set RETRY_TIMES= + set SLEEP_TIME= +) diff --git a/packaging/windows/internal/vc_env_helper.bat b/packaging/windows/internal/vc_env_helper.bat new file mode 100644 index 0000000..e85a372 --- /dev/null +++ b/packaging/windows/internal/vc_env_helper.bat @@ -0,0 +1,43 @@ +@echo on + +set VC_VERSION_LOWER=16 +set VC_VERSION_UPPER=17 +if "%VC_YEAR%" == "2017" ( + set VC_VERSION_LOWER=15 + set VC_VERSION_UPPER=16 +) + +for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [%VC_VERSION_LOWER%^,%VC_VERSION_UPPER%^) -property installationPath`) do ( + if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" ( + set "VS15INSTALLDIR=%%i" + set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat" + goto vswhere + ) +) + +:vswhere +if "%VSDEVCMD_ARGS%" == "" ( + call "%VS15VCVARSALL%" x64 || exit /b 1 +) else ( + call "%VS15VCVARSALL%" x64 %VSDEVCMD_ARGS% || exit /b 1 +) + +@echo on + +set DISTUTILS_USE_SDK=1 + +set args=%1 +shift +:start +if [%1] == [] goto done +set args=%args% %1 +shift +goto start + +:done +if "%args%" == "" ( + echo Usage: vc_env_helper.bat [command] [args] + echo e.g. vc_env_helper.bat cl /c test.cpp +) + +%args% || exit /b 1 diff --git a/packaging/windows/internal/vc_install_helper.sh b/packaging/windows/internal/vc_install_helper.sh new file mode 100644 index 0000000..cdae180 --- /dev/null +++ b/packaging/windows/internal/vc_install_helper.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +set -ex + +if [[ "$CU_VERSION" == "cu92" ]]; then + export VC_YEAR=2017 + export VSDEVCMD_ARGS="-vcvars_ver=14.13" + powershell packaging/windows/internal/vs2017_install.ps1 +elif [[ "$CU_VERSION" == "cu100" ]]; then + export VC_YEAR=2017 + export VSDEVCMD_ARGS="" + powershell packaging/windows/internal/vs2017_install.ps1 +else + export VC_YEAR=2019 + export VSDEVCMD_ARGS="" +fi diff --git a/packaging/windows/internal/vs2017_install.ps1 b/packaging/windows/internal/vs2017_install.ps1 new file mode 100644 index 0000000..3e953de --- /dev/null +++ b/packaging/windows/internal/vs2017_install.ps1 @@ -0,0 +1,25 @@ +$VS_DOWNLOAD_LINK = "https://aka.ms/vs/15/release/vs_buildtools.exe" +$VS_INSTALL_ARGS = @("--nocache","--quiet","--wait", "--add Microsoft.VisualStudio.Workload.VCTools", + "--add Microsoft.VisualStudio.Component.VC.Tools.14.13", + "--add Microsoft.Component.MSBuild", + "--add Microsoft.VisualStudio.Component.Roslyn.Compiler", + "--add Microsoft.VisualStudio.Component.TextTemplating", + "--add Microsoft.VisualStudio.Component.VC.CoreIde", + "--add Microsoft.VisualStudio.Component.VC.Redist.14.Latest", + "--add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Core", + "--add Microsoft.VisualStudio.Component.VC.Tools.x86.x64", + "--add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Win81") + +curl.exe --retry 3 -kL $VS_DOWNLOAD_LINK --output vs_installer.exe +if ($LASTEXITCODE -ne 0) { + echo "Download of the VS 2017 installer failed" + exit 1 +} + +$process = Start-Process "${PWD}\vs_installer.exe" -ArgumentList $VS_INSTALL_ARGS -NoNewWindow -Wait -PassThru +Remove-Item -Path vs_installer.exe -Force +$exitCode = $process.ExitCode +if (($exitCode -ne 0) -and ($exitCode -ne 3010)) { + echo "VS 2017 installer exited with code $exitCode, which should be one of [0, 3010]." + exit 1 +} diff --git a/packaging/windows/internal/vs2019_install.ps1 b/packaging/windows/internal/vs2019_install.ps1 new file mode 100644 index 0000000..e436051 --- /dev/null +++ b/packaging/windows/internal/vs2019_install.ps1 @@ -0,0 +1,21 @@ +$VS_DOWNLOAD_LINK = "https://aka.ms/vs/16/release/vs_buildtools.exe" +$VS_INSTALL_ARGS = @("--nocache","--quiet","--wait", "--add Microsoft.VisualStudio.Workload.VCTools", + "--add Microsoft.Component.MSBuild", + "--add Microsoft.VisualStudio.Component.Roslyn.Compiler", + "--add Microsoft.VisualStudio.Component.VC.CoreBuildTools", + "--add Microsoft.VisualStudio.Component.VC.Redist.14.Latest", + "--add Microsoft.VisualStudio.Component.VC.Tools.x86.x64") + +curl.exe --retry 3 -kL $VS_DOWNLOAD_LINK --output vs_installer.exe +if ($LASTEXITCODE -ne 0) { + echo "Download of the VS 2019 installer failed" + exit 1 +} + +$process = Start-Process "${PWD}\vs_installer.exe" -ArgumentList $VS_INSTALL_ARGS -NoNewWindow -Wait -PassThru +Remove-Item -Path vs_installer.exe -Force +$exitCode = $process.ExitCode +if (($exitCode -ne 0) -and ($exitCode -ne 3010)) { + echo "VS 2019 installer exited with code $exitCode, which should be one of [0, 3010]." + exit 1 +} diff --git a/packaging/windows/internal/vs_install.bat b/packaging/windows/internal/vs_install.bat new file mode 100644 index 0000000..348a5e3 --- /dev/null +++ b/packaging/windows/internal/vs_install.bat @@ -0,0 +1,14 @@ +@echo off + +set VS_DOWNLOAD_LINK=https://aka.ms/vs/15/release/vs_enterprise.exe +set VS_INSTALL_PATH=C:\Program Files (x86)\Microsoft Visual Studio\2017\Enterprise +set VS_INSTALL_ARGS=--nocache --quiet --wait --add Microsoft.VisualStudio.Component.VC.Tools.14.11 +set VSDEVCMD_ARGS=-vcvars_ver=14.11 + +curl -k -L %VS_DOWNLOAD_LINK% --output vs_installer.exe +if errorlevel 1 exit /b 1 + +start /wait vs_installer.exe modify --installPath "%VS_INSTALL_PATH%" %VS_INSTALL_ARGS% +if not errorlevel 0 exit /b 1 +if errorlevel 1 if not errorlevel 3010 exit /b 1 +if errorlevel 3011 exit /b 1 diff --git a/packaging/windows/old/cuda100.bat b/packaging/windows/old/cuda100.bat new file mode 100644 index 0000000..f088bca --- /dev/null +++ b/packaging/windows/old/cuda100.bat @@ -0,0 +1,59 @@ +@echo off + +IF NOT "%BUILD_CSPRNG%" == "" ( + set MODULE_NAME=csprng +) ELSE ( + set MODULE_NAME=pytorch +) + +IF NOT EXIST "setup.py" IF NOT EXIST "%MODULE_NAME%" ( + call internal\clone.bat + cd .. + IF ERRORLEVEL 1 goto eof +) ELSE ( + call internal\clean.bat +) + +call internal\check_deps.bat +IF ERRORLEVEL 1 goto eof + +REM Check for optional components + +set NO_CUDA= +set CMAKE_GENERATOR=Visual Studio 15 2017 Win64 + +IF "%NVTOOLSEXT_PATH%"=="" ( + echo NVTX ^(Visual Studio Extension ^for CUDA^) ^not installed, failing + exit /b 1 + goto optcheck +) + +IF "%CUDA_PATH_V10_0%"=="" ( + echo CUDA 10.0 not found, failing + exit /b 1 +) ELSE ( + IF "%BUILD_CSPRNG%" == "" ( + set TORCH_CUDA_ARCH_LIST=3.5;5.0+PTX;6.0;6.1;7.0;7.5 + set TORCH_NVCC_FLAGS=-Xfatbin -compress-all + ) ELSE ( + set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_50,code=compute_50 + ) + + set "CUDA_PATH=%CUDA_PATH_V10_0%" + set "PATH=%CUDA_PATH_V10_0%\bin;%PATH%" +) + +:optcheck + +IF "%BUILD_CSPRNG%" == "" ( + call internal\check_opts.bat + IF ERRORLEVEL 1 goto eof + + call internal\copy.bat + IF ERRORLEVEL 1 goto eof +) + +call internal\setup.bat +IF ERRORLEVEL 1 goto eof + +:eof diff --git a/packaging/windows/old/cuda90.bat b/packaging/windows/old/cuda90.bat new file mode 100644 index 0000000..520b794 --- /dev/null +++ b/packaging/windows/old/cuda90.bat @@ -0,0 +1,59 @@ +@echo off + +IF NOT "%BUILD_CSPRNG%" == "" ( + set MODULE_NAME=csprng +) ELSE ( + set MODULE_NAME=pytorch +) + +IF NOT EXIST "setup.py" IF NOT EXIST "%MODULE_NAME%" ( + call internal\clone.bat + cd .. + IF ERRORLEVEL 1 goto eof +) ELSE ( + call internal\clean.bat +) + +call internal\check_deps.bat +IF ERRORLEVEL 1 goto eof + +REM Check for optional components + +set NO_CUDA= +set CMAKE_GENERATOR=Visual Studio 15 2017 Win64 + +IF "%NVTOOLSEXT_PATH%"=="" ( + echo NVTX ^(Visual Studio Extension ^for CUDA^) ^not installed, failing + exit /b 1 + goto optcheck +) + +IF "%CUDA_PATH_V9_0%"=="" ( + echo CUDA 9 not found, failing + exit /b 1 +) ELSE ( + IF "%BUILD_CSPRNG%" == "" ( + set TORCH_CUDA_ARCH_LIST=3.5;5.0+PTX;6.0;7.0 + set TORCH_NVCC_FLAGS=-Xfatbin -compress-all + ) ELSE ( + set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_50,code=compute_50 + ) + + set "CUDA_PATH=%CUDA_PATH_V9_0%" + set "PATH=%CUDA_PATH_V9_0%\bin;%PATH%" +) + +:optcheck + +IF "%BUILD_CSPRNG%" == "" ( + call internal\check_opts.bat + IF ERRORLEVEL 1 goto eof + + call internal\copy.bat + IF ERRORLEVEL 1 goto eof +) + +call internal\setup.bat +IF ERRORLEVEL 1 goto eof + +:eof diff --git a/packaging/windows/templates/auth_task.yml b/packaging/windows/templates/auth_task.yml new file mode 100644 index 0000000..7554ffa --- /dev/null +++ b/packaging/windows/templates/auth_task.yml @@ -0,0 +1,17 @@ +jobs: +- job: 'VSTS_Auth_Task' + timeoutInMinutes: 5 + cancelTimeoutInMinutes: 5 + variables: + - group: 'peterjc-vsts-token' + + pool: + vmImage: 'vs2017-win2016' + + steps: + - checkout: self + clean: true + + - template: vsts_auth.yml + parameters: + auth: $(vsts_auth) diff --git a/packaging/windows/templates/build_conda.yml b/packaging/windows/templates/build_conda.yml new file mode 100644 index 0000000..ce29c06 --- /dev/null +++ b/packaging/windows/templates/build_conda.yml @@ -0,0 +1,15 @@ +parameters: + msagent: false + +steps: +- bash: 'find . -name "*.sh" -exec dos2unix {} +' + displayName: Replace file endings + +- script: 'if not exist %PYTORCH_FINAL_PACKAGE_DIR% mkdir %PYTORCH_FINAL_PACKAGE_DIR%' + displayName: 'Create final package directory' + +- bash: './packaging/conda/build_csprng.sh $CUDA_VERSION $TORCHCSPRNG_BUILD_VERSION $TORCHCSPRNG_BUILD_NUMBER' + displayName: Build + env: + ${{ if eq(parameters.msagent, 'true') }}: + MAX_JOBS: 2 diff --git a/packaging/windows/templates/build_task.yml b/packaging/windows/templates/build_task.yml new file mode 100644 index 0000000..18d4f8e --- /dev/null +++ b/packaging/windows/templates/build_task.yml @@ -0,0 +1,173 @@ +parameters: + package: '' + spec: '' + jobDesc: '' + packageDesc: '' + msagent: true + cpuEnabled: true + cudaEnabled: true + condaEnabled: true + wheelsEnabled: true + override: false + +jobs: +- job: 'Windows_${{ parameters.spec }}_${{ parameters.package }}_Build' + timeoutInMinutes: 60 + cancelTimeoutInMinutes: 5 + condition: > + or(and(eq('${{ parameters.package }}', 'Conda'), eq('${{ parameters.spec }}', 'CPU'), + eq('${{ parameters.condaEnabled }}', 'true'), eq('${{ parameters.cpuEnabled }}', 'true')), + and(eq('${{ parameters.package }}', 'Wheels'), eq('${{ parameters.spec }}', 'CPU'), + eq('${{ parameters.wheelsEnabled }}', 'true'), eq('${{ parameters.cpuEnabled }}', 'true')), + and(eq('${{ parameters.package }}', 'Conda'), eq('${{ parameters.spec }}', 'CUDA'), + eq('${{ parameters.condaEnabled }}', 'true'), eq('${{ parameters.cudaEnabled }}', 'true')), + and(eq('${{ parameters.package }}', 'Wheels'), eq('${{ parameters.spec }}', 'CUDA'), + eq('${{ parameters.wheelsEnabled }}', 'true'), eq('${{ parameters.cudaEnabled }}', 'true'))) + variables: + - ${{ if eq(parameters.override, 'true') }}: + - name: TORCHCSPRNG_BUILD_NUMBER + value: 1 + - name: PYTORCH_REPO + value: 'pytorch' + - name: PYTORCH_BRANCH + value: 'v0.4.0' + - ${{ if eq(parameters.msagent, 'true') }}: + - name: USE_SCCACHE + value: 0 + - ${{ if eq(parameters.msagent, 'false') }}: + - name: USE_SCCACHE + value: 1 + - ${{ if eq(parameters.package, 'Conda') }}: + - group: peterjc_anaconda_token + - name: PYTORCH_FINAL_PACKAGE_DIR + value: '$(Build.Repository.LocalPath)\packaging\windows\output' + + strategy: + maxParallel: 10 + matrix: + ${{ if eq(parameters.spec, 'CPU') }}: + PY3.5: + DESIRED_PYTHON: 3.5 + CUDA_VERSION: cpu + PY3.6: + DESIRED_PYTHON: 3.6 + CUDA_VERSION: cpu + PY3.7: + DESIRED_PYTHON: 3.7 + CUDA_VERSION: cpu + PY3.8: + DESIRED_PYTHON: 3.8 + CUDA_VERSION: cpu + PY3.9: + DESIRED_PYTHON: 3.9 + CUDA_VERSION: cpu + ${{ if ne(parameters.spec, 'CPU') }}: + PY3.5_92: + DESIRED_PYTHON: 3.5 + CUDA_VERSION: 92 + PY3.6_92: + DESIRED_PYTHON: 3.6 + CUDA_VERSION: 92 + PY3.7_92: + DESIRED_PYTHON: 3.7 + CUDA_VERSION: 92 + PY3.8_92: + DESIRED_PYTHON: 3.8 + CUDA_VERSION: 92 + PY3.9_92: + DESIRED_PYTHON: 3.9 + CUDA_VERSION: 92 + PY3.5_101: + DESIRED_PYTHON: 3.5 + CUDA_VERSION: 101 + PY3.6_101: + DESIRED_PYTHON: 3.6 + CUDA_VERSION: 101 + PY3.7_101: + DESIRED_PYTHON: 3.7 + CUDA_VERSION: 101 + PY3.8_101: + DESIRED_PYTHON: 3.8 + CUDA_VERSION: 101 + PY3.9_101: + DESIRED_PYTHON: 3.9 + CUDA_VERSION: 101 + PY3.5_102: + DESIRED_PYTHON: 3.5 + CUDA_VERSION: 102 + PY3.6_102: + DESIRED_PYTHON: 3.6 + CUDA_VERSION: 102 + PY3.7_102: + DESIRED_PYTHON: 3.7 + CUDA_VERSION: 102 + PY3.8_102: + DESIRED_PYTHON: 3.8 + CUDA_VERSION: 102 + PY3.9_102: + DESIRED_PYTHON: 3.9 + CUDA_VERSION: 102 + + pool: + ${{ if eq(parameters.msagent, 'true') }}: + vmImage: 'vs2017-win2016' + ${{ if eq(parameters.msagent, 'false') }}: + name: 'release' + + steps: + - checkout: self + clean: true + + - template: setup_env_for_msagent.yml + parameters: + msagent: ${{ parameters.msagent }} + + # - ${{ if and(eq(parameters.override, 'true'), eq(parameters.package, 'Wheels')) }}: + # - template: override_pytorch_version.yml + + - template: setup_nightly_variables.yml + parameters: + package: ${{ parameters.package }} + + - ${{ if eq(parameters.package, 'Wheels') }}: + - template: build_wheels.yml + parameters: + msagent: ${{ parameters.msagent }} + + - ${{ if eq(parameters.package, 'Conda') }}: + - template: build_conda.yml + parameters: + msagent: ${{ parameters.msagent }} + + - ${{ if or(eq(parameters.package, 'Wheels'), eq(parameters.package, 'Conda')) }}: + - template: publish_test_results.yml + parameters: + msagent: ${{ parameters.msagent }} + + # If you want to upload binaries to S3 & Anaconda Cloud, please uncomment this section. + - ${{ if and(eq(parameters.package, 'Wheels'), eq(parameters.spec, 'CPU')) }}: + - template: upload_to_s3.yml + parameters: + cuVer: '$(CUDA_VERSION)' + cudaVer: '$(CUDA_VERSION)' + + - ${{ if and(eq(parameters.package, 'Wheels'), ne(parameters.spec, 'CPU')) }}: + - template: upload_to_s3.yml + parameters: + cuVer: 'cu$(CUDA_VERSION)' + cudaVer: 'cuda$(CUDA_VERSION)' + + - ${{ if eq(parameters.package, 'Conda') }}: + - template: upload_to_conda.yml + parameters: + user: $(peterjc_conda_username) + pass: $(peterjc_conda_password) + + # If you want to upload binaries to Azure Git, please uncomment this section. + # - ${{ if or(eq(parameters.package, 'Wheels'), eq(parameters.package, 'Conda')) }}: + # - template: publish_test_results.yml + # parameters: + # msagent: ${{ parameters.msagent }} + # - template: publish_packages.yml + # parameters: + # package: ${{ parameters.package }} diff --git a/packaging/windows/templates/build_wheels.yml b/packaging/windows/templates/build_wheels.yml new file mode 100644 index 0000000..8393fdb --- /dev/null +++ b/packaging/windows/templates/build_wheels.yml @@ -0,0 +1,9 @@ +parameters: + msagent: false + +steps: +- script: 'call packaging/windows/build_csprng.bat %CUDA_VERSION% %TORCHCSPRNG_BUILD_VERSION% %TORCHCSPRNG_BUILD_NUMBER%' + displayName: Build + env: + ${{ if eq(parameters.msagent, 'true') }}: + MAX_JOBS: 2 diff --git a/packaging/windows/templates/linux_build_task.yml b/packaging/windows/templates/linux_build_task.yml new file mode 100644 index 0000000..0b32892 --- /dev/null +++ b/packaging/windows/templates/linux_build_task.yml @@ -0,0 +1,38 @@ +parameters: + msagent: true + enabled: false + +jobs: +- job: 'Linux_CPU_Conda_Build' + timeoutInMinutes: 0 + cancelTimeoutInMinutes: 5 + condition: ${{ eq(parameters.enabled, 'true') }} + variables: + CUDA_VERSION: cpu + TORCH_CONDA_BUILD_FOLDER: pytorch-nightly + PYTORCH_FINAL_PACKAGE_DIR: '$(Build.Repository.LocalPath)/output' + + strategy: + maxParallel: 10 + matrix: + PY3.5: + DESIRED_PYTHON: 3.5 + + pool: + vmImage: 'ubuntu-16.04' + + steps: + - checkout: self + clean: true + + - script: 'sudo apt-get install p7zip-full' + displayName: 'Install 7Zip' + + - task: CondaEnvironment@1 + displayName: 'Install conda-build' + inputs: + packageSpecs: 'conda-build' + + - template: build_conda.yml + parameters: + msagent: ${{ parameters.msagent }} diff --git a/packaging/windows/templates/override_pytorch_version.yml b/packaging/windows/templates/override_pytorch_version.yml new file mode 100644 index 0000000..8af93ae --- /dev/null +++ b/packaging/windows/templates/override_pytorch_version.yml @@ -0,0 +1,6 @@ +steps: +- script: 'windows/internal/override_pytorch_version.bat' + displayName: 'Override PyTorch Build Version for Wheels' + +- script: 'echo $(PYTORCH_BUILD_VERSION)' + displayName: 'Show PyTorch Build Version' diff --git a/packaging/windows/templates/publish_packages.yml b/packaging/windows/templates/publish_packages.yml new file mode 100644 index 0000000..51ce824 --- /dev/null +++ b/packaging/windows/templates/publish_packages.yml @@ -0,0 +1,8 @@ +parameters: + package: '' + +steps: +- script: 'packaging/windows/internal/publish.bat' + displayName: 'Upload packages to Azure DevOps Repo' + env: + PACKAGEFULLNAME: ${{ parameters.package }} diff --git a/packaging/windows/templates/publish_test_results.yml b/packaging/windows/templates/publish_test_results.yml new file mode 100644 index 0000000..1e0dc02 --- /dev/null +++ b/packaging/windows/templates/publish_test_results.yml @@ -0,0 +1,6 @@ +steps: +- task: PublishTestResults@2 # No test results to publish + inputs: + testResultsFiles: 'windows/pytorch/test/**/*.xml' + testRunTitle: 'Publish test results' + enabled: false diff --git a/packaging/windows/templates/setup_env_for_msagent.yml b/packaging/windows/templates/setup_env_for_msagent.yml new file mode 100644 index 0000000..377734f --- /dev/null +++ b/packaging/windows/templates/setup_env_for_msagent.yml @@ -0,0 +1,25 @@ +parameters: + msagent: false + +steps: +- ${{ if eq(parameters.msagent, 'true') }}: + - task: BatchScript@1 + displayName: 'Install 7Zip & cURL' + inputs: + filename: 'packaging/windows/internal/dep_install.bat' + + modifyEnvironment: true + + - task: BatchScript@1 + displayName: 'Install Visual Studio 2017' + inputs: + filename: 'packaging/windows/internal/vs_install.bat' + + modifyEnvironment: true + + - task: BatchScript@1 + displayName: 'Install CUDA' + inputs: + filename: 'packaging/windows/internal/cuda_install.bat' + + modifyEnvironment: true diff --git a/packaging/windows/templates/setup_nightly_variables.yml b/packaging/windows/templates/setup_nightly_variables.yml new file mode 100644 index 0000000..94b2fe9 --- /dev/null +++ b/packaging/windows/templates/setup_nightly_variables.yml @@ -0,0 +1,11 @@ +parameters: + package: '' + +steps: +- task: BatchScript@1 + displayName: 'Setup nightly variables' + inputs: + filename: 'packaging/windows/internal/nightly_defaults.bat' + arguments: ${{ parameters.package }} + + modifyEnvironment: true diff --git a/packaging/windows/templates/upload_to_conda.yml b/packaging/windows/templates/upload_to_conda.yml new file mode 100644 index 0000000..dc172bc --- /dev/null +++ b/packaging/windows/templates/upload_to_conda.yml @@ -0,0 +1,10 @@ +parameters: + user: '' + pass: '' + +steps: +- script: 'call packaging/windows/internal/upload.bat' + displayName: 'Upload packages to Anaconda Cloud' + env: + PYTORCH_ANACONDA_USERNAME: ${{ parameters.user }} + PYTORCH_ANACONDA_PASSWORD: ${{ parameters.pass }} diff --git a/packaging/windows/templates/upload_to_s3.yml b/packaging/windows/templates/upload_to_s3.yml new file mode 100644 index 0000000..1de91b5 --- /dev/null +++ b/packaging/windows/templates/upload_to_s3.yml @@ -0,0 +1,15 @@ +parameters: + cuVer: '' + cudaVer: '' + +steps: +- task: AmazonWebServices.aws-vsts-tools.S3Upload.S3Upload@1 + displayName: 'Upload ${{ parameters.cuVer }} wheel to S3' + inputs: + awsCredentials: 'Pytorch S3 bucket' + bucketName: 'pytorch' + sourceFolder: 'packaging/windows/output' + globExpressions: '*.whl' + targetFolder: 'whl/nightly/${{ parameters.cuVer }}/' + filesAcl: 'public-read' + flattenFolders: 'true' diff --git a/packaging/windows/templates/vsts_auth.yml b/packaging/windows/templates/vsts_auth.yml new file mode 100644 index 0000000..fde767d --- /dev/null +++ b/packaging/windows/templates/vsts_auth.yml @@ -0,0 +1,8 @@ +parameters: + auth: '' + +steps: +- script: 'call packaging/windows/internal/auth.bat' + displayName: 'Sign in to Azure Pipelines' + env: + VSTS_AUTH: ${{ parameters.auth }} diff --git a/setup.py b/setup.py index cc77d67..5143b53 100644 --- a/setup.py +++ b/setup.py @@ -1,94 +1,193 @@ +import distutils.command.clean +import glob import os -from sys import platform +import shutil import subprocess -from setuptools import setup -from torch.utils import cpp_extension - -cu_version = os.getenv('CU_VERSION', default=None) -if cu_version is None: - use_cuda = os.getenv('USE_CUDA', default=None) - if use_cuda is None: - build_cuda = cpp_extension.CUDA_HOME is not None - else: - build_cuda = use_cuda -else: - build_cuda = cu_version != 'cpu' - -CXX_FLAGS = [] -if platform != "darwin": - CXX_FLAGS.append('-fopenmp') - -NVCC_FLAGS = os.getenv('NVCC_FLAGS', '') -if NVCC_FLAGS == '': - NVCC_FLAGS = [] -else: - NVCC_FLAGS = NVCC_FLAGS.split(' ') -# TODO: replace with a loop: -if '--expt-extended-lambda' not in NVCC_FLAGS: - NVCC_FLAGS.append('--expt-extended-lambda') -if '-Xcompiler' not in NVCC_FLAGS: - NVCC_FLAGS.append('-Xcompiler') -if '-fopenmp' not in NVCC_FLAGS: - NVCC_FLAGS.append('-fopenmp') -# NVCC_FLAGS = ['--expt-extended-lambda', '-Xcompiler', '-fopenmp'] - -module_name = 'torch_csprng' - -this_dir = os.path.dirname(os.path.abspath(__file__)) -extensions_dir = os.path.join(this_dir, module_name, 'csrc') - -if build_cuda: - csprng_ext = cpp_extension.CUDAExtension( - module_name, [os.path.join(extensions_dir, 'csprng.cu')], - extra_compile_args={'cxx': [], - 'nvcc': NVCC_FLAGS} - ) -else: - csprng_ext = cpp_extension.CppExtension( - module_name, [os.path.join(extensions_dir, 'csprng.cpp')], - extra_compile_args={'cxx': CXX_FLAGS} - ) +import sys + +import torch +from setuptools import find_packages, setup +from torch.utils.cpp_extension import ( + BuildExtension, + CppExtension, + CUDA_HOME, + CUDAExtension, +) + +version = open("version.txt", "r").read().strip() +sha = "Unknown" +package_name = "torchcsprng" -version = open('version.txt', 'r').read().strip() -sha = 'Unknown' -package_name = 'pytorch_csprng' +cwd = os.path.dirname(os.path.abspath(__file__)) try: - sha = subprocess.check_output(['git', 'rev-parse', 'HEAD'], cwd=this_dir).decode('ascii').strip() + sha = ( + subprocess.check_output(["git", "rev-parse", "HEAD"], cwd=cwd) + .decode("ascii") + .strip() + ) except Exception: pass -if os.getenv('BUILD_VERSION'): - version = os.getenv('BUILD_VERSION') -elif sha != 'Unknown': - version += '+' + sha[:7] +if os.getenv("BUILD_VERSION"): + version = os.getenv("BUILD_VERSION") +elif sha != "Unknown": + version += "+" + sha[:7] print("Building wheel {}-{}".format(package_name, version)) -# Doesn't work yet :( -# version_path = os.path.join(this_dir, module_name, 'version.py') -# with open(version_path, 'w') as f: -# f.write("__version__ = '{}'\n".format(version)) -# f.write("git_version = {}\n".format(repr(sha))) + +def write_version_file(): + version_path = os.path.join(cwd, "torchcsprng", "version.py") + with open(version_path, "w") as f: + f.write("__version__ = '{}'\n".format(version)) + f.write("git_version = {}\n".format(repr(sha))) + # f.write("from torchcsprng.extension import _check_cuda_version\n") + # f.write("if _check_cuda_version() > 0:\n") + # f.write(" cuda = _check_cuda_version()\n") + + +write_version_file() with open("README.md", "r") as fh: long_description = fh.read() + +requirements = [ + "torch", +] + + +def append_flags(flags, flags_to_append): + for flag in flags_to_append: + if not flag in flags: + flags.append(flag) + return flags + + +def get_extensions(): + build_cuda = torch.cuda.is_available() or os.getenv("FORCE_CUDA", "0") == "1" + + module_name = "torchcsprng" + + extensions_dir = os.path.join(cwd, module_name, "csrc") + + openmp = "ATen parallel backend: OpenMP" in torch.__config__.parallel_info() + + main_file = glob.glob(os.path.join(extensions_dir, "*.cpp")) + source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp")) + + sources = main_file + source_cpu + extension = CppExtension + + define_macros = [] + + cxx_flags = os.getenv("CXX_FLAGS", "") + if cxx_flags == "": + cxx_flags = [] + else: + cxx_flags = cxx_flags.split(" ") + if openmp: + if sys.platform == "linux": + cxx_flags = append_flags(cxx_flags, ["-fopenmp"]) + elif sys.platform == "win32": + cxx_flags = append_flags(cxx_flags, ["/openmp"]) + # elif sys.platform == 'darwin': + # cxx_flags = append_flags(cxx_flags, ['-Xpreprocessor', '-fopenmp']) + + if build_cuda: + extension = CUDAExtension + source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu")) + sources += source_cuda + + define_macros += [("WITH_CUDA", None)] + + nvcc_flags = os.getenv("NVCC_FLAGS", "") + if nvcc_flags == "": + nvcc_flags = [] + else: + nvcc_flags = nvcc_flags.split(" ") + nvcc_flags = append_flags(nvcc_flags, ["--expt-extended-lambda", "-Xcompiler"]) + extra_compile_args = { + "cxx": cxx_flags, + "nvcc": nvcc_flags, + } + else: + extra_compile_args = { + "cxx": cxx_flags, + } + + ext_modules = [ + extension( + module_name + "._C", + sources, + define_macros=define_macros, + extra_compile_args=extra_compile_args, + ) + ] + + return ext_modules + + +class clean(distutils.command.clean.clean): + def run(self): + with open(".gitignore", "r") as f: + ignores = f.read() + start_deleting = False + for wildcard in filter(None, ignores.split("\n")): + if ( + wildcard + == "# do not change or delete this comment - `python setup.py clean` deletes everything after this line" + ): + start_deleting = True + if not start_deleting: + continue + for filename in glob.glob(wildcard): + try: + os.remove(filename) + except OSError: + shutil.rmtree(filename, ignore_errors=True) + + # It's an old-style class in Python 2.7... + distutils.command.clean.clean.run(self) + + setup( + # Metadata name=package_name, version=version, author="Pavel Belevich", author_email="pbelevich@fb.com", - description="Cryptographically secure pseudorandom number generators for PyTorch", - # long_description=long_description, - # long_description_content_type="text/markdown", - license='BSD-3', url="https://github.com/pytorch/csprng", + description="Cryptographically secure pseudorandom number generators for PyTorch", + long_description=long_description, + long_description_content_type="text/markdown", + license="BSD-3", + # Package info + packages=find_packages(exclude=("test",)), classifiers=[ + "Intended Audience :: Developers", + "Intended Audience :: Education", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: BSD License", + "Programming Language :: C++", "Programming Language :: Python :: 3", - 'License :: OSI Approved :: BSD License', - 'Programming Language :: C++', - 'Programming Language :: Python :: 3', + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Topic :: Scientific/Engineering", + "Topic :: Scientific/Engineering :: Mathematics", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Software Development", + "Topic :: Software Development :: Libraries", + "Topic :: Software Development :: Libraries :: Python Modules", ], - python_requires='>=3.6', - ext_modules=[csprng_ext], - cmdclass={'build_ext': cpp_extension.BuildExtension}) + python_requires=">=3.6", + install_requires=requirements, + ext_modules=get_extensions(), + test_suite="test", + cmdclass={ + "build_ext": BuildExtension, + "clean": clean, + }, +) diff --git a/test/__init__.py b/test/__init__.py index e69de29..83766c4 100644 --- a/test/__init__.py +++ b/test/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) Meta Platforms, Inc. and its affiliates. All Rights Reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. diff --git a/test/test_csprng.py b/test/test_csprng.py index 8447458..3245ffd 100644 --- a/test/test_csprng.py +++ b/test/test_csprng.py @@ -1,36 +1,71 @@ -import unittest -import torch -from scipy import stats -import numpy as np +# Copyright (c) Meta Platforms, Inc. and its affiliates. All Rights Reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + import math +import os import random import time +import unittest + +import numpy as np +import torch +from Crypto.Cipher import AES +from Crypto.Util import Counter +from scipy import stats try: - import torch_csprng as csprng + import torchcsprng as csprng except ImportError: raise RuntimeError("CSPRNG not available") +IS_SANDCASTLE = ( + os.getenv("SANDCASTLE") == "1" or os.getenv("TW_JOB_USER") == "sandcastle" +) +IS_FBCODE = os.getenv("PYTORCH_TEST_FBCODE") == "1" + + +def to_numpy(t, dtype=torch.float): + if t.dtype == torch.bfloat16: + t = t.to(dtype) + return t.numpy() + + +def to_bytes(t): + if t.dtype == torch.bfloat16: + t = t.view(torch.int16) + return t.cpu().numpy().view(np.int8) + + class TestCSPRNG(unittest.TestCase): all_generators = [ csprng.create_random_device_generator(), - csprng.create_random_device_generator('/dev/urandom'), + csprng.create_random_device_generator("/dev/urandom"), csprng.create_mt19937_generator(), - csprng.create_mt19937_generator(42) + csprng.create_mt19937_generator(42), ] int_dtypes = [torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64] - fp_ftypes = [torch.float, torch.double] + standard_fp_dtypes = [torch.float, torch.double] + + non_standard_fp_dtypes = [torch.half, torch.bfloat16] + + fp_dtypes = standard_fp_dtypes + non_standard_fp_dtypes - num_dtypes = int_dtypes + fp_ftypes + num_dtypes = int_dtypes + fp_dtypes all_dtypes = num_dtypes + [torch.bool] size = 1000 - all_devices = ['cpu', 'cuda'] if csprng.supports_cuda() else ['cpu'] + all_devices = ( + ["cpu", "cuda"] + if (torch.cuda.is_available() and csprng.supports_cuda()) + else ["cpu"] + ) def test_random_kstest(self): for device in self.all_devices: @@ -40,20 +75,38 @@ def test_random_kstest(self): to_inc = 2**24 elif dtype == torch.double: to_inc = 2**53 + elif dtype == torch.half: + to_inc = 2**11 + elif dtype == torch.bfloat16: + to_inc = 2**8 else: to_inc = torch.iinfo(dtype).max - t = torch.empty(self.size, dtype=dtype, device=device).random_(generator=gen) - res = stats.kstest(t.cpu(), stats.randint.cdf, args=(0, to_inc)) + t = torch.empty(self.size, dtype=dtype, device=device).random_( + generator=gen + ) + res = stats.kstest( + to_numpy(t.cpu()), stats.randint.cdf, args=(0, to_inc) + ) self.assertTrue(res.statistic < 0.1) - @unittest.skipIf(not csprng.supports_cuda(), "csprng was not compiled with CUDA support") + no_cuda = not torch.cuda.is_available() or not csprng.supports_cuda() + + no_cuda_message = ( + "CUDA is not available or csprng was not compiled with CUDA support" + ) + + @unittest.skipIf(no_cuda, no_cuda_message) def test_random_cpu_vs_cuda(self): for dtype in self.num_dtypes: gen = csprng.create_mt19937_generator(42) - cpu_t = torch.empty(self.size, dtype=dtype, device='cpu').random_(generator=gen) + cpu_t = torch.empty(self.size, dtype=dtype, device="cpu").random_( + generator=gen + ) gen = csprng.create_mt19937_generator(42) - cuda_t = torch.empty(self.size, dtype=dtype, device='cuda').random_(generator=gen) + cuda_t = torch.empty(self.size, dtype=dtype, device="cuda").random_( + generator=gen + ) self.assertTrue((cpu_t == cuda_t.cpu()).all()) def test_random_to_kstest(self): @@ -61,18 +114,26 @@ def test_random_to_kstest(self): for device in self.all_devices: for gen in self.all_generators: for dtype in self.num_dtypes: - t = torch.zeros(self.size, dtype=dtype, device=device).random_(to_, generator=gen) - res = stats.kstest(t.cpu(), stats.randint.cdf, args=(0, to_)) + t = torch.zeros(self.size, dtype=dtype, device=device).random_( + to_, generator=gen + ) + res = stats.kstest( + to_numpy(t.cpu()), stats.randint.cdf, args=(0, to_) + ) self.assertTrue(res.statistic < 0.1) - @unittest.skipIf(not csprng.supports_cuda(), "csprng was not compiled with CUDA support") + @unittest.skipIf(no_cuda, no_cuda_message) def test_random_to_cpu_vs_cuda(self): to_ = 42 for dtype in self.num_dtypes: gen = csprng.create_mt19937_generator(42) - cpu_t = torch.zeros(self.size, dtype=dtype, device='cpu').random_(to_, generator=gen) + cpu_t = torch.zeros(self.size, dtype=dtype, device="cpu").random_( + to_, generator=gen + ) gen = csprng.create_mt19937_generator(42) - cuda_t = torch.zeros(self.size, dtype=dtype, device='cuda').random_(to_, generator=gen) + cuda_t = torch.zeros(self.size, dtype=dtype, device="cuda").random_( + to_, generator=gen + ) self.assertTrue((cpu_t == cuda_t.cpu()).all()) def test_random_from_to_kstest(self): @@ -82,20 +143,30 @@ def test_random_from_to_kstest(self): for from_ in [0, 24, 42]: for to_ in [42, 99, 123]: if from_ < to_: - t = torch.zeros(self.size, dtype=dtype, device=device).random_(from_, to_, generator=gen) - res = stats.kstest(t.cpu(), stats.randint.cdf, args=(from_, to_)) + t = torch.zeros( + self.size, dtype=dtype, device=device + ).random_(from_, to_, generator=gen) + res = stats.kstest( + to_numpy(t.cpu()), + stats.randint.cdf, + args=(from_, to_), + ) self.assertTrue(res.statistic < 0.2) - @unittest.skipIf(not csprng.supports_cuda(), "csprng was not compiled with CUDA support") + @unittest.skipIf(no_cuda, no_cuda_message) def test_random_from_to_cpu_vs_cuda(self): for dtype in self.num_dtypes: for from_ in [0, 24, 42]: for to_ in [42, 99, 123]: if from_ < to_: gen = csprng.create_mt19937_generator(42) - cpu_t = torch.zeros(self.size, dtype=dtype, device='cpu').random_(from_, to_, generator=gen) + cpu_t = torch.zeros( + self.size, dtype=dtype, device="cpu" + ).random_(from_, to_, generator=gen) gen = csprng.create_mt19937_generator(42) - cuda_t = torch.zeros(self.size, dtype=dtype, device='cuda').random_(from_, to_, generator=gen) + cuda_t = torch.zeros( + self.size, dtype=dtype, device="cuda" + ).random_(from_, to_, generator=gen) self.assertTrue((cpu_t == cuda_t.cpu()).all()) def test_random_bool(self): @@ -107,147 +178,225 @@ def test_random_bool(self): t.random_(generator=gen) self.assertEqual(t.min(), False) self.assertEqual(t.max(), True) - self.assertTrue(0.4 < (t.eq(True)).to(torch.int).sum().item() / self.size < 0.6) + self.assertTrue( + 0.4 < (t.eq(True)).to(torch.int).sum().item() / self.size < 0.6 + ) t.fill_(True) t.random_(generator=gen) self.assertEqual(t.min(), False) self.assertEqual(t.max(), True) - self.assertTrue(0.4 < (t.eq(True)).to(torch.int).sum().item() / self.size < 0.6) + self.assertTrue( + 0.4 < (t.eq(True)).to(torch.int).sum().item() / self.size < 0.6 + ) - @unittest.skipIf(not csprng.supports_cuda(), "csprng was not compiled with CUDA support") + @unittest.skipIf(no_cuda, no_cuda_message) def test_random_bool_cpu_vs_cuda(self): gen = csprng.create_mt19937_generator(42) - cpu_t = torch.empty(self.size, dtype=torch.bool, device='cpu').random_(generator=gen) + cpu_t = torch.empty(self.size, dtype=torch.bool, device="cpu").random_( + generator=gen + ) gen = csprng.create_mt19937_generator(42) - cuda_t = torch.empty(self.size, dtype=torch.bool, device='cuda').random_(generator=gen) + cuda_t = torch.empty(self.size, dtype=torch.bool, device="cuda").random_( + generator=gen + ) self.assertTrue((cpu_t == cuda_t.cpu()).all()) def test_uniform_kstest(self): for device in self.all_devices: for gen in self.all_generators: - for dtype in self.fp_ftypes: + for dtype in self.fp_dtypes: for from_ in [-42, 0, 4.2]: for to_ in [-4.2, 0, 42]: if to_ > from_: - t = torch.empty(self.size, dtype=dtype, device=device).uniform_(from_, to_, generator=gen) - res = stats.kstest(t.cpu().to(torch.double), 'uniform', args=(from_, (to_ - from_))) + t = torch.empty( + self.size, dtype=dtype, device=device + ).uniform_(from_, to_, generator=gen) + res = stats.kstest( + to_numpy(t.cpu(), torch.double), + "uniform", + args=(from_, (to_ - from_)), + ) self.assertTrue(res.statistic < 0.1) - @unittest.skipIf(not csprng.supports_cuda(), "csprng was not compiled with CUDA support") + @unittest.skipIf(no_cuda, no_cuda_message) def test_uniform_cpu_vs_cuda(self): - for dtype in self.fp_ftypes: + for dtype in self.fp_dtypes: for from_ in [-42, 0, 4.2]: for to_ in [-4.2, 0, 42]: if to_ > from_: gen = csprng.create_mt19937_generator(42) - cpu_t = torch.empty(self.size, dtype=dtype, device='cpu').uniform_(from_, to_, generator=gen) + cpu_t = torch.empty( + self.size, dtype=dtype, device="cpu" + ).uniform_(from_, to_, generator=gen) gen = csprng.create_mt19937_generator(42) - cuda_t = torch.empty(self.size, dtype=dtype, device='cuda').uniform_(from_, to_, generator=gen) - self.assertTrue((cpu_t - cuda_t.cpu()).abs().max() < 1e-9) + cuda_t = torch.empty( + self.size, dtype=dtype, device="cuda" + ).uniform_(from_, to_, generator=gen) + self.assertTrue(torch.allclose(cpu_t, cuda_t.cpu(), 1e-9)) def test_normal_kstest(self): for device in self.all_devices: for gen in self.all_generators: - for dtype in self.fp_ftypes: + for dtype in self.fp_dtypes: for mean in [-3, 0, 7]: for std in [1, 5, 7]: - t = torch.empty(self.size, dtype=dtype, device=device).normal_(mean=mean, std=std, generator=gen) - res = stats.kstest(t.cpu().to(torch.double), 'norm', args=(mean, std)) + t = torch.empty( + self.size, dtype=dtype, device=device + ).normal_(mean=mean, std=std, generator=gen) + res = stats.kstest( + to_numpy(t.cpu(), torch.double), + "norm", + args=(mean, std), + ) self.assertTrue(res.statistic < 0.1) - @unittest.skipIf(not csprng.supports_cuda(), "csprng was not compiled with CUDA support") + @unittest.skipIf(no_cuda, no_cuda_message) def test_normal_cpu_vs_cuda(self): - for dtype in self.fp_ftypes: + for dtype in self.fp_dtypes: for mean in [-3, 0, 7]: for std in [1, 5, 7]: gen = csprng.create_mt19937_generator(42) - cpu_t = torch.empty(self.size, dtype=dtype, device='cpu').normal_(mean=mean, std=std, generator=gen) + cpu_t = torch.empty(self.size, dtype=dtype, device="cpu").normal_( + mean=mean, std=std, generator=gen + ) gen = csprng.create_mt19937_generator(42) - cuda_t = torch.empty(self.size, dtype=dtype, device='cuda').normal_(mean=mean, std=std, generator=gen) - self.assertTrue((cpu_t - cuda_t.cpu()).abs().max() < 1e-9) + cuda_t = torch.empty(self.size, dtype=dtype, device="cuda").normal_( + mean=mean, std=std, generator=gen + ) + self.assertTrue(torch.allclose(cpu_t, cuda_t.cpu(), 1e-9)) def test_log_normal_kstest(self): for device in self.all_devices: for gen in self.all_generators: - for dtype in self.fp_ftypes: + for dtype in self.fp_dtypes: for mean in [-3, 0, 7]: for std in [1, 5, 7]: - t = torch.empty(self.size, dtype=dtype, device=device).log_normal_(mean=mean, std=std, generator=gen) - res = stats.kstest(t.cpu().to(torch.double), 'lognorm', args=(std, 0, math.exp(mean))) - self.assertTrue(res.statistic < 0.1) + t = torch.empty( + self.size, dtype=dtype, device=device + ).log_normal_(mean=mean, std=std, generator=gen) + res = stats.kstest( + to_numpy(t.cpu(), torch.double), + "lognorm", + args=(std, 0, math.exp(mean)), + ) + if dtype in [torch.half, torch.bfloat16]: + self.assertTrue(res.statistic < 0.4) + else: + self.assertTrue(res.statistic < 0.1) - @unittest.skipIf(not csprng.supports_cuda(), "csprng was not compiled with CUDA support") + @unittest.skipIf(no_cuda, no_cuda_message) def test_log_normal_cpu_vs_cuda(self): - for dtype in self.fp_ftypes: + for dtype in self.fp_dtypes: for mean in [-3, 0, 7]: for std in [1, 5, 7]: gen = csprng.create_mt19937_generator(42) - cpu_t = torch.empty(self.size, dtype=dtype, device='cpu').log_normal_(mean=mean, std=std, generator=gen) + cpu_t = torch.empty( + self.size, dtype=dtype, device="cpu" + ).log_normal_(mean=mean, std=std, generator=gen) gen = csprng.create_mt19937_generator(42) - cuda_t = torch.empty(self.size, dtype=dtype, device='cuda').log_normal_(mean=mean, std=std, generator=gen) - self.assertTrue((cpu_t - cuda_t.cpu()).abs().max() < 1e-4) + cuda_t = torch.empty( + self.size, dtype=dtype, device="cuda" + ).log_normal_(mean=mean, std=std, generator=gen) + self.assertTrue( + torch.allclose(cpu_t, cuda_t.cpu(), 1e-4, equal_nan=True) + ) def test_exponential_kstest(self): for device in self.all_devices: for gen in self.all_generators: - for dtype in self.fp_ftypes: + for dtype in self.fp_dtypes: for lambd in [0.5, 1.0, 5.0]: - t = torch.empty(self.size, dtype=dtype, device=device).exponential_(lambd=lambd, generator=gen) - res = stats.kstest(t.cpu().to(torch.double), 'expon', args=(0, 1 / lambd,)) + t = torch.empty( + self.size, dtype=dtype, device=device + ).exponential_(lambd=lambd, generator=gen) + res = stats.kstest( + to_numpy(t.cpu(), torch.double), + "expon", + args=( + 0, + 1 / lambd, + ), + ) self.assertTrue(res.statistic < 0.1) - @unittest.skipIf(not csprng.supports_cuda(), "csprng was not compiled with CUDA support") + @unittest.skipIf(no_cuda, no_cuda_message) + @unittest.skip("https://github.com/pytorch/pytorch/issues/38662") def test_exponential_cpu_vs_cuda(self): - for dtype in self.fp_ftypes: + for dtype in self.fp_dtypes: for lambd in [0.5, 1.0, 5.0]: gen = csprng.create_mt19937_generator(42) - cpu_t = torch.empty(self.size, dtype=dtype, device='cpu').exponential_(lambd=lambd, generator=gen) + cpu_t = torch.empty(self.size, dtype=dtype, device="cpu").exponential_( + lambd=lambd, generator=gen + ) gen = csprng.create_mt19937_generator(42) - cuda_t = torch.empty(self.size, dtype=dtype, device='cuda').exponential_(lambd=lambd, generator=gen) - self.assertTrue((cpu_t - cuda_t.cpu()).abs().max() < 1e-9) + cuda_t = torch.empty( + self.size, dtype=dtype, device="cuda" + ).exponential_(lambd=lambd, generator=gen) + self.assertTrue(torch.allclose(cpu_t, cuda_t.cpu(), 1e-9)) def test_cauchy_kstest(self): for device in self.all_devices: for gen in self.all_generators: - for dtype in self.fp_ftypes: + for dtype in self.fp_dtypes: for median in [-10, 0, 50]: for sigma in [0.5, 1.0, 10.0]: - t = torch.empty(self.size, dtype=dtype, device=device).cauchy_(median=median, sigma=sigma, generator=gen) - res = stats.kstest(t.cpu().to(torch.double), 'cauchy', args=(median, sigma)) - self.assertTrue(res.statistic < 0.1) + t = torch.empty( + self.size, dtype=dtype, device=device + ).cauchy_(median=median, sigma=sigma, generator=gen) + res = stats.kstest( + to_numpy(t.cpu(), torch.double), + "cauchy", + args=(median, sigma), + ) + if dtype in [torch.half, torch.bfloat16]: + self.assertTrue(res.statistic < 0.4) + else: + self.assertTrue(res.statistic < 0.1) - @unittest.skipIf(not csprng.supports_cuda(), "csprng was not compiled with CUDA support") + @unittest.skipIf(no_cuda, no_cuda_message) def test_cauchy_cpu_vs_cuda(self): - for dtype in self.fp_ftypes: + for dtype in self.fp_dtypes: for median in [-10, 0, 50]: for sigma in [0.5, 1.0, 10.0]: gen = csprng.create_mt19937_generator(42) - cpu_t = torch.empty(self.size, dtype=dtype, device='cpu').cauchy_(median=median, sigma=sigma, generator=gen) + cpu_t = torch.empty(self.size, dtype=dtype, device="cpu").cauchy_( + median=median, sigma=sigma, generator=gen + ) gen = csprng.create_mt19937_generator(42) - cuda_t = torch.empty(self.size, dtype=dtype, device='cuda').cauchy_(median=median, sigma=sigma, generator=gen) - self.assertTrue((cpu_t - cuda_t.cpu()).abs().max() < 1e-9) + cuda_t = torch.empty(self.size, dtype=dtype, device="cuda").cauchy_( + median=median, sigma=sigma, generator=gen + ) + self.assertTrue(torch.allclose(cpu_t, cuda_t.cpu(), 1e-9)) def test_geometric(self): for device in self.all_devices: for gen in self.all_generators: - for dtype in self.fp_ftypes: + for dtype in self.fp_dtypes: for p in [0.2, 0.5, 0.8]: - t = torch.empty(self.size, dtype=dtype, device=device).geometric_(p=p, generator=gen) + t = torch.empty( + self.size, dtype=dtype, device=device + ).geometric_(p=p, generator=gen) # actual = np.histogram(t.cpu().to(torch.double), np.arange(1, 100))[0] # expected = stats.geom(p).pmf(np.arange(1, 99)) * self.size # res = stats.chisquare(actual, expected) # self.assertAlmostEqual(res.pvalue, 1.0, delta=0.5) TODO https://github.com/pytorch/csprng/issues/7 - @unittest.skipIf(not csprng.supports_cuda(), "csprng was not compiled with CUDA support") + @unittest.skipIf(no_cuda, no_cuda_message) def test_geometric_cpu_vs_cuda(self): - for dtype in self.fp_ftypes: + for dtype in self.fp_dtypes: for p in [0.2, 0.5, 0.8]: gen = csprng.create_mt19937_generator(42) - cpu_t = torch.empty(self.size, dtype=dtype, device='cpu').geometric_(p=p, generator=gen) + cpu_t = torch.empty(self.size, dtype=dtype, device="cpu").geometric_( + p=p, generator=gen + ) gen = csprng.create_mt19937_generator(42) - cuda_t = torch.empty(self.size, dtype=dtype, device='cuda').geometric_(p=p, generator=gen) - self.assertTrue((cpu_t - cuda_t.cpu()).abs().max() < 1e-9) + cuda_t = torch.empty(self.size, dtype=dtype, device="cuda").geometric_( + p=p, generator=gen + ) + self.assertTrue( + torch.allclose(cpu_t, cuda_t.cpu(), 1e-9, equal_nan=True) + ) def test_non_contiguous_vs_contiguous(self): size = 10 @@ -262,7 +411,7 @@ def test_non_contiguous_vs_contiguous(self): y2 = random.randrange(y1 + 1, max(y1 + 2, size)) z2 = random.randrange(z1 + 1, max(z1 + 2, size)) maybe_non_contiguous = t[x1:x2, y1:y2, z1:z2] - assert(maybe_non_contiguous.numel() > 0) + assert maybe_non_contiguous.numel() > 0 if not maybe_non_contiguous.is_contiguous(): seed = random.randrange(1000) @@ -275,23 +424,28 @@ def test_non_contiguous_vs_contiguous(self): gen = csprng.create_mt19937_generator(seed) contiguous.random_(generator=gen) - assert(contiguous.is_contiguous()) + assert contiguous.is_contiguous() self.assertTrue((non_contiguous == contiguous).all()) for x in range(0, size): for y in range(0, size): for z in range(0, size): - if not x1 <= x < x2 and not y1 <= y < y2 and not z1 <= z < z2: + if ( + not x1 <= x < x2 + and not y1 <= y < y2 + and not z1 <= z < z2 + ): self.assertTrue(t[x, y, z] == 0) + @unittest.skipIf(IS_SANDCASTLE or IS_FBCODE, "Does not work on Sandcastle") @unittest.skipIf(torch.get_num_threads() < 2, "requires multithreading CPU") def test_cpu_parallel(self): - urandom_gen = csprng.create_random_device_generator('/dev/urandom') + urandom_gen = csprng.create_random_device_generator("/dev/urandom") def measure(size): - t = torch.empty(size, dtype=torch.float32, device='cpu') + t = torch.empty(size, dtype=torch.float32, device="cpu") start = time.time() - for i in range(10): + for i in range(20): t.normal_(generator=urandom_gen) finish = time.time() return finish - start @@ -299,7 +453,202 @@ def measure(size): time_for_1K = measure(1000) time_for_1M = measure(1000000) # Pessimistic check that parallel execution gives >= 1.5 performance boost - self.assertTrue(time_for_1M/time_for_1K < 1000 / min(1.5, torch.get_num_threads())) + self.assertTrue(time_for_1M / time_for_1K < 1000 / 1.5) -if __name__ == '__main__': + @unittest.skipIf(IS_SANDCASTLE or IS_FBCODE, "Does not work on Sandcastle") + def test_version(self): + self.assertTrue(csprng.__version__) + self.assertTrue(csprng.git_version) + + def test_randperm(self): + for device in self.all_devices: + for gen in self.all_generators: + for dtype in self.int_dtypes: + for size in range(0, 20): + expected = torch.arange(size, dtype=dtype, device=device) + + actual = torch.randperm( + size, dtype=dtype, device=device, generator=gen + ) + + actual_out = torch.empty(1, dtype=dtype, device=device) + torch.randperm(size, out=actual_out, generator=gen) + + if size >= 10: + self.assertTrue(not torch.allclose(expected, actual)) + self.assertTrue(not torch.allclose(expected, actual_out)) + + actual = actual.sort()[0] + actual_out = actual.sort()[0] + + self.assertTrue(torch.allclose(expected, actual)) + self.assertTrue(torch.allclose(expected, actual_out)) + + def test_encrypt_decrypt(self): + key_size_bytes = 16 + block_size_bytes = 16 + + def sizeof(dtype): + if dtype == torch.bool: + return 1 + elif dtype.is_floating_point: + return torch.finfo(dtype).bits // 8 + else: + return torch.iinfo(dtype).bits // 8 + + def pad(data, pad_size): + if len(data) % pad_size == 0: + return data + length = pad_size - (len(data) % pad_size) + return data + bytes([0]) * length + + def create_aes(m, k): + if m == "ecb": + return AES.new(k.tobytes(), AES.MODE_ECB) + elif m == "ctr": + ctr = Counter.new( + AES.block_size * 8, initial_value=0, little_endian=True + ) + return AES.new(k.tobytes(), AES.MODE_CTR, counter=ctr) + else: + return None + + for key_dtype in self.all_dtypes: + key_size = key_size_bytes // sizeof(key_dtype) + key = torch.empty(key_size, dtype=key_dtype).random_() + key_np = to_bytes(key) + for initial_dtype in self.all_dtypes: + for initial_size in [0, 4, 8, 15, 16, 23, 42]: + initial = torch.empty(initial_size, dtype=initial_dtype).random_() + initial_np = to_bytes(initial) + initial_size_bytes = initial_size * sizeof(initial_dtype) + for encrypted_dtype in self.all_dtypes: + encrypted_size = ( + (initial_size_bytes + block_size_bytes - 1) + // block_size_bytes + * block_size_bytes + // sizeof(encrypted_dtype) + ) + encrypted = torch.zeros(encrypted_size, dtype=encrypted_dtype) + for decrypted_dtype in self.all_dtypes: + decrypted_size = ( + initial_size_bytes + sizeof(decrypted_dtype) - 1 + ) // sizeof(decrypted_dtype) + decrypted = torch.zeros( + decrypted_size, dtype=decrypted_dtype + ) + for mode in ["ecb", "ctr"]: + for device in self.all_devices: + key = key.to(device) + initial = initial.to(device) + encrypted = encrypted.to(device) + decrypted = decrypted.to(device) + + csprng.encrypt( + initial, encrypted, key, "aes128", mode + ) + encrypted_np = to_bytes(encrypted) + + aes = create_aes(mode, key_np) + + encrypted_expected = np.frombuffer( + aes.encrypt( + pad(initial_np.tobytes(), block_size_bytes) + ), + dtype=np.int8, + ) + self.assertTrue( + np.array_equal(encrypted_np, encrypted_expected) + ) + + csprng.decrypt( + encrypted, decrypted, key, "aes128", mode + ) + decrypted_np = to_bytes(decrypted)[ + :initial_size_bytes + ] + + aes = create_aes(mode, key_np) + + decrypted_expected = np.frombuffer( + aes.decrypt( + pad( + encrypted_np.tobytes(), block_size_bytes + ) + ), + dtype=np.int8, + )[:initial_size_bytes] + self.assertTrue( + np.array_equal(decrypted_np, decrypted_expected) + ) + + self.assertTrue( + np.array_equal(initial_np, decrypted_np) + ) + + def test_encrypt_decrypt_inplace(self): + key_size_bytes = 16 + + def sizeof(dtype): + if dtype == torch.bool: + return 1 + elif dtype.is_floating_point: + return torch.finfo(dtype).bits // 8 + else: + return torch.iinfo(dtype).bits // 8 + + def create_aes(m, k): + if m == "ecb": + return AES.new(k.tobytes(), AES.MODE_ECB) + elif m == "ctr": + ctr = Counter.new( + AES.block_size * 8, initial_value=0, little_endian=True + ) + return AES.new(k.tobytes(), AES.MODE_CTR, counter=ctr) + else: + return None + + for key_dtype in self.all_dtypes: + key_size = key_size_bytes // sizeof(key_dtype) + key = torch.empty(key_size, dtype=key_dtype).random_() + key_np = to_bytes(key) + for initial_dtype in self.all_dtypes: + for initial_size_bytes in [0, 16, 256]: + initial_size = initial_size_bytes // sizeof(initial_dtype) + initial = torch.empty(initial_size, dtype=initial_dtype).random_() + initial_np = to_bytes(initial) + initial_np_copy = np.copy(initial_np) + for mode in ["ecb", "ctr"]: + for device in self.all_devices: + key = key.to(device) + initial = initial.to(device) + + csprng.encrypt(initial, initial, key, "aes128", mode) + encrypted_np = to_bytes(initial) + aes = create_aes(mode, key_np) + encrypted_expected = np.frombuffer( + aes.encrypt(initial_np_copy.tobytes()), dtype=np.int8 + ) + self.assertTrue( + np.array_equal(encrypted_np, encrypted_expected) + ) + + encrypted_np_copy = np.copy(encrypted_np) + + csprng.decrypt(initial, initial, key, "aes128", mode) + decrypted_np = to_bytes(initial) + aes = create_aes(mode, key_np) + decrypted_expected = np.frombuffer( + aes.decrypt(encrypted_np_copy.tobytes()), dtype=np.int8 + ) + self.assertTrue( + np.array_equal(decrypted_np, decrypted_expected) + ) + + self.assertTrue( + np.array_equal(initial_np_copy, decrypted_np) + ) + + +if __name__ == "__main__": unittest.main() diff --git a/torch_csprng/csrc/block_cipher.h b/torch_csprng/csrc/block_cipher.h deleted file mode 100644 index ddde94c..0000000 --- a/torch_csprng/csrc/block_cipher.h +++ /dev/null @@ -1,156 +0,0 @@ -#pragma once - -#include "macros.h" -#include -#include -#include "OffsetCalculator.cuh" -#include -#include -#include - -#if defined(__CUDACC__) || defined(__HIPCC__) -#include -#include -#endif - -#if defined(__CUDACC__) || defined(__HIPCC__) -#define UNROLL_IF_CUDA #pragma unroll -#else -#define UNROLL_IF_CUDA -#endif - -namespace torch { -namespace custom_prng { - -// Generates `block_t_size`-bytes random key Tensor on CPU -// using `generator`, which must be an instance of `at::CPUGeneratorImpl` -// and passes it to the `device`. -template -at::Tensor key_tensor(c10::optional generator, size_t block_t_size, at::Device device) { - std::lock_guard lock(generator->mutex()); - auto gen = at::check_generator(generator); - auto t = torch::empty({static_cast(block_t_size)}, torch::kUInt8); - for (size_t i = 0; i < block_t_size; i++) { - t[i] = static_cast(gen->random()); - } - return t.to(device); -} - -// A simple container for random state sub-blocks that implements RNG interface -// with random() and random64() methods, that are used by transformation function -template -struct RNGValues { - TORCH_CSPRNG_HOST_DEVICE RNGValues(uint64_t* vals) { - memcpy(&vals_, vals, size * sizeof(uint64_t)); - } - uint32_t TORCH_CSPRNG_HOST_DEVICE random() { auto res = static_cast(vals_[index]); index++; return res; } - uint64_t TORCH_CSPRNG_HOST_DEVICE random64() { auto res = vals_[index]; index++; return res; } -private: - uint64_t vals_[size]; - int index = 0; -}; - -// Runs a block cipher in a counter mode in approximately `numel / (block_t_size / sizeof(uint_t) / N)` CUDA threads, -// without any assumption about target tensor layout. It uses `index_calc` to find memory locations of -// the tensor elements. -// `scalar_t` is a scalar type equivalent of target tensor dtype -// `uint_t` is an unsigned integral type of sub-blocks that random state is divided to -// (e.g, 16 bytes random state block can be divided into 16 uint8_t sub-blocks -// or 8 uint16_t sub-block or 4 uint32_t sub-block or 2 uint64_t sub-blocks) -// `N` is a number of sub-block which is used by `transform_func` -// to generate a random value of specific distribution (e.g. `normal` uses 2) -// `numel` is a number of elements in target tensor -// `block_t_size` is a number of bytes in cipher's block (e.g. 16 for AES128) -// `cipher` is a callable that receives a counter `idx` and returns an encrypted block -// `transform_func` is a callable that converts N `uint_t` random state sub-blocks passed in RNGValues into target dtype `scalar_t` -template -TORCH_CSPRNG_HOST_DEVICE static void block_cipher_kernel_helper(int idx, scalar_t* data, int64_t numel, size_t block_t_size, cipher_t cipher, transform_t transform_func, index_calc_t index_calc) { - const int unroll_factor = block_t_size / sizeof(uint_t) / N; - if (unroll_factor * idx < numel) { - auto block = cipher(idx); - UNROLL_IF_CUDA - for (auto i = 0; i < unroll_factor; ++i) { - const auto li = unroll_factor * idx + i; - if (li < numel) { - uint64_t vals[N]; - UNROLL_IF_CUDA - for (size_t j = 0; j < N; j++) { - vals[j] = (reinterpret_cast(&block))[N * i + j]; - } - RNGValues rng(vals); - data[index_calc(li)] = transform_func(&rng); - } - } - } -} - -#if defined(__CUDACC__) || defined(__HIPCC__) -template -__global__ static void block_cipher_kernel_cuda(scalar_t* data, int64_t numel, int block_t_size, cipher_t cipher, transform_t transform_func, index_calc_t index_calc) { - const auto idx = blockIdx.x * blockDim.x + threadIdx.x; - block_cipher_kernel_helper(idx, data, numel, block_t_size, cipher, transform_func, index_calc); -} -#endif - -template -static void block_cipher_kernel_cpu_serial(int64_t begin, int64_t end, scalar_t* data, int64_t numel, int block_t_size, cipher_t cipher, transform_t transform_func, index_calc_t index_calc) { - for (auto idx = begin; idx < end; ++idx) { - block_cipher_kernel_helper(idx, data, numel, block_t_size, cipher, transform_func, index_calc); - } -} - -template -static void block_cipher_kernel_cpu(int64_t total, scalar_t* data, int64_t numel, int block_t_size, cipher_t cipher, transform_t transform_func, index_calc_t index_calc) { - if (total < at::internal::GRAIN_SIZE || at::get_num_threads() == 1) { - block_cipher_kernel_cpu_serial(0, total, data, numel, block_t_size, cipher, transform_func, index_calc); - } else { - at::parallel_for(0, total, at::internal::GRAIN_SIZE, [&](int64_t begin, int64_t end) { - block_cipher_kernel_cpu_serial(begin, end, data, numel, block_t_size, cipher, transform_func, index_calc); - }); - } -} - -// Runs a block cipher in a counter mode in approximately `numel / (block_t_size / sizeof(uint_t) / N)` CUDA threads. -// Each CUDA thread generates `block_t_size`-bytes random state and divides it into `block_t_size / sizeof(uint_t)` sub-blocks. -// Then `transform_func` transforms `N` random state sub-blocks passed in a `RNGValues` to final random values of type `scalar_t`. -template -void block_cipher_ctr_mode(at::TensorIterator& iter, int block_t_size, cipher_t cipher, transform_t transform_func) { - const auto numel = iter.numel(); - if (numel == 0) { - return; - } - const int unroll_factor = block_t_size / sizeof(uint_t) / N; - const auto block = 256; - const auto grid = (numel + (block * unroll_factor) - 1) / (block * unroll_factor); - scalar_t* data = (scalar_t*)iter.data_ptr(0); - auto offset_calc = make_offset_calculator<1>(iter); - auto index_calc_identity = [] TORCH_CSPRNG_HOST_DEVICE (int li) -> int { return li; }; - auto index_calc_offset = [offset_calc] TORCH_CSPRNG_HOST_DEVICE (int li) -> int { return offset_calc.get(li)[0] / sizeof(scalar_t); }; - if (iter.device_type() == at::kCPU) { - if (iter.output(0).is_contiguous()) { - block_cipher_kernel_cpu( - grid * block, data, numel, block_t_size, cipher, transform_func, index_calc_identity); - } else { - block_cipher_kernel_cpu( - grid * block, data, numel, block_t_size, cipher, transform_func, index_calc_offset); - } - } else if (iter.device_type() == at::kCUDA) { -#if defined(__CUDACC__) || defined(__HIPCC__) - auto stream = at::cuda::getCurrentCUDAStream(); - if (iter.output(0).is_contiguous()) { - block_cipher_kernel_cuda<<>>( - data, numel, block_t_size, cipher, transform_func, index_calc_identity); - } else { - block_cipher_kernel_cuda<<>>( - data, numel, block_t_size, cipher, transform_func, index_calc_offset); - } - AT_CUDA_CHECK(cudaGetLastError()); -#else - TORCH_CHECK(false, "csprng was compiled without CUDA support"); -#endif - } else { - TORCH_CHECK(false, "block_cipher_ctr_mode supports only CPU and CUDA devices"); - } -} - -}} diff --git a/torch_csprng/csrc/csprng.cpp b/torch_csprng/csrc/csprng.cpp deleted file mode 100644 index c526086..0000000 --- a/torch_csprng/csrc/csprng.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "csprng.h" diff --git a/torch_csprng/csrc/csprng.cu b/torch_csprng/csrc/csprng.cu deleted file mode 100644 index c526086..0000000 --- a/torch_csprng/csrc/csprng.cu +++ /dev/null @@ -1 +0,0 @@ -#include "csprng.h" diff --git a/torch_csprng/csrc/csprng.h b/torch_csprng/csrc/csprng.h deleted file mode 100644 index 39aa3e1..0000000 --- a/torch_csprng/csrc/csprng.h +++ /dev/null @@ -1,387 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "macros.h" -#include "block_cipher.h" -#include "aes.h" - -#if defined(__CUDACC__) || defined(__HIPCC__) -#include -#include -#endif - -using namespace at; -using namespace at::native::templates; -using namespace torch::custom_prng; - -inline uint64_t make64BitsFrom32Bits(uint32_t hi, uint32_t lo) { - return (static_cast(hi) << 32) | lo; -} - -// CUDA CSPRNG is actually CPU generator which is used only to generate a random key on CPU for AES running in a block mode on CUDA -struct CustomGeneratorImpl : public c10::GeneratorImpl { - CustomGeneratorImpl(bool use_rd) : c10::GeneratorImpl{Device(DeviceType::CPU), DispatchKeySet(DispatchKey::CustomRNGKeyId)}, use_rd_{use_rd} {} - CustomGeneratorImpl(const std::string& token) : c10::GeneratorImpl{Device(DeviceType::CPU), DispatchKeySet(DispatchKey::CustomRNGKeyId)}, use_rd_{true}, rd_{token} {} - CustomGeneratorImpl(uint64_t seed) : c10::GeneratorImpl{Device(DeviceType::CPU), DispatchKeySet(DispatchKey::CustomRNGKeyId)}, use_rd_{false}, mt_{static_cast(seed)} { } - ~CustomGeneratorImpl() = default; - uint32_t random() { return use_rd_ ? rd_() : mt_(); } - uint64_t random64() { return use_rd_ ? make64BitsFrom32Bits(rd_(), rd_()) : make64BitsFrom32Bits(mt_(), mt_()); } - - void set_current_seed(uint64_t seed) override { throw std::runtime_error("not implemented"); } - uint64_t current_seed() const override { throw std::runtime_error("not implemented"); } - uint64_t seed() override { throw std::runtime_error("not implemented"); } - CustomGeneratorImpl* clone_impl() const override { throw std::runtime_error("not implemented"); } - - static DeviceType device_type() { return DeviceType::CPU; } - - bool use_rd_; - std::random_device rd_; - std::mt19937 mt_; -}; - -// ==================================================================================================================== - -// Applies AES in CTR mode with the `key` for passed TensorIterator iter. -// `scalar_t` is a scalar type equivalent of target tensor dtype -// `uint_t` is an unsigned integral type of sub-blocks that random state is divided to -// (e.g, 16 bytes random state block can be divided into 16 uint8_t sub-blocks -// or 8 uint16_t sub-block or 4 uint32_t sub-block or 2 uint64_t sub-blocks) -// `N` is a number of sub-block which is used by `transform_func` -// to generate a random value of specific distribution (e.g. `normal` uses 2) -// `key` is a CUDA pointer to random key memory block -// `transform_func` is a callable that converts N `uint_t` random state sub-blocks passed in RNGValues into target dtype `scalar_t` -template -void aes_helper(TensorIterator& iter, const uint8_t* key, transform_t transform_func) { - block_cipher_ctr_mode(iter, aes::block_t_size, - [key] TORCH_CSPRNG_HOST_DEVICE (unsigned int idx) -> aes::block_t { - aes::block_t block; - memset(&block, 0, aes::block_t_size); - *(reinterpret_cast(&block)) = idx; - aes::encrypt(reinterpret_cast(&block), key); - return block; - }, - transform_func - ); -} - -// ==================================================================================================================== - -// A mapping between scalar type and corresponding unsigned integer type of random state sub-block. -// uint64_t for double and long, uint32_t for the rest -template -struct UIntType {}; - -template <> struct UIntType { using type = uint64_t; }; -template <> struct UIntType { using type = uint32_t; }; -template <> struct UIntType { using type = uint64_t; }; -template <> struct UIntType { using type = uint32_t; }; -template <> struct UIntType { using type = uint32_t; }; -template <> struct UIntType { using type = uint32_t; }; -template <> struct UIntType { using type = uint32_t; }; -template <> struct UIntType { using type = uint32_t; }; - -// ==================================================== Random ======================================================== - -template -struct RandomKernel { - void operator()(TensorIterator& iter, c10::optional generator) { - const Tensor key_t = key_tensor(generator, aes::block_t_size, iter.device()); - const auto key = key_t.data_ptr(); - AT_DISPATCH_ALL_TYPES_AND(ScalarType::Bool, iter.dtype(), "random_kernel", [&] { - aes_helper::type>(iter, key, - [] TORCH_CSPRNG_HOST_DEVICE (RNGValues<1>* generator) -> scalar_t { - uniform_int_distribution random; - return random(generator); - } - ); - }); - } -}; - -template -void random_from_to_kernel_helper(TensorIterator& iter, uint64_t range, int64_t base, const uint8_t* key) { - aes_helper(iter, key, - [range, base] TORCH_CSPRNG_HOST_DEVICE (RNGValues<1>* generator) -> scalar_t { - uniform_int_from_to_distribution random(range, base); - return random(generator); - } - ); -} - -template -void random_full_range_kernel_helper(TensorIterator& iter, const uint8_t* key) { - aes_helper(iter, key, - [] TORCH_CSPRNG_HOST_DEVICE (RNGValues<1>* generator) -> scalar_t { - uniform_int_full_range_distribution random; - return random(generator); - } - ); -} - -template -struct RandomFromToKernel { - void operator()(TensorIterator& iter, uint64_t range, int64_t base, c10::optional generator) { - const Tensor key_t = key_tensor(generator, aes::block_t_size, iter.device()); - const auto key = key_t.data_ptr(); - AT_DISPATCH_ALL_TYPES_AND3(at::ScalarType::Bool, at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "random_from_to_kernel", [&] { - if (( - std::is_same::value || - std::is_same::value || - std::is_same::value || - std::is_same::value) && range >= 1ULL << 32) - { - random_from_to_kernel_helper(iter, range, base, key); - } else { - random_from_to_kernel_helper(iter, range, base, key); - } - }); - } - void operator()(TensorIterator& iter, c10::optional generator) { - const Tensor key_t = key_tensor(generator, aes::block_t_size, iter.device()); - const auto key = key_t.data_ptr(); - AT_DISPATCH_ALL_TYPES_AND(at::ScalarType::BFloat16, iter.dtype(), "random_full_64_bits_range_kernel", [&] { - if (std::is_same::value || - std::is_same::value || - std::is_same::value || - std::is_same::value) - { - random_full_range_kernel_helper(iter, key); - } else { - TORCH_CHECK(false, "random_full_64_bits_range_kernel_cuda handles only int64, double, float and bfloat16"); - } - }); - } -}; - -Tensor& random_(Tensor& self, c10::optional generator) { - return random_impl(self, generator); -} - -Tensor& random_from_to(Tensor& self, int64_t from, optional to, c10::optional generator) { - return random_from_to_impl(self, from, to, generator); -} - -Tensor& random_to(Tensor& self, int64_t to, c10::optional generator) { - return random_from_to(self, 0, to, generator); -} - -// ==================================================== Uniform ======================================================= - -template -struct UniformKernel { - void operator()(TensorIterator& iter, double from, double to, c10::optional generator) { - const Tensor key_t = key_tensor(generator, aes::block_t_size, iter.device()); - const auto key = key_t.data_ptr(); - AT_DISPATCH_FLOATING_TYPES(iter.dtype(), "uniform_kernel", [&] { - aes_helper(iter, key, - [from, to] TORCH_CSPRNG_HOST_DEVICE (RNGValues<1>* generator) -> scalar_t { - uniform_real_distribution uniform(from, to); - return static_cast(uniform(generator)); - } - ); - }); - } -}; - -Tensor& uniform_(Tensor& self, double from, double to, c10::optional generator) { - return uniform_impl_(self, from, to, generator); -} - -// ==================================================== Normal ======================================================== - -template -struct NormalKernel { - void operator()(Tensor& self, double mean, double std, c10::optional generator) { - auto iter = TensorIterator::nullary_op(self); - const Tensor key_t = key_tensor(generator, aes::block_t_size, iter.device()); - const auto key = key_t.data_ptr(); - AT_DISPATCH_FLOATING_TYPES(iter.dtype(), "normal_kernel", [&] { - aes_helper(iter, key, - [mean, std] TORCH_CSPRNG_HOST_DEVICE (RNGValues<2>* gen) -> scalar_t { - normal_distribution normal(mean, std); - return static_cast(normal(gen)); - } - ); - }); - } -}; - -Tensor& normal_(Tensor& self, double mean, double std, c10::optional generator) { - return normal_impl_(self, mean, std, generator); -} - -Tensor& normal_Tensor_float_out(Tensor& output, const Tensor& mean, double std, c10::optional gen) { - return normal_out_impl(output, mean, std, gen); -} - -Tensor& normal_float_Tensor_out(Tensor& output, double mean, const Tensor& std, c10::optional gen) { - return normal_out_impl(output, mean, std, gen); -} - -Tensor& normal_Tensor_Tensor_out(Tensor& output, const Tensor& mean, const Tensor& std, c10::optional gen) { - return normal_out_impl(output, mean, std, gen); -} - -Tensor normal_Tensor_float(const Tensor& mean, double std, c10::optional gen) { - return normal_impl(mean, std, gen); -} - -Tensor normal_float_Tensor(double mean, const Tensor& std, c10::optional gen) { - return normal_impl(mean, std, gen); -} - -Tensor normal_Tensor_Tensor(const Tensor& mean, const Tensor& std, c10::optional gen) { - return normal_impl(mean, std, gen); -} - -// ==================================================== Cauchy ======================================================== - -template -struct CauchyKernel { - void operator()(TensorIterator& iter, double median, double sigma, c10::optional generator) { - const Tensor key_t = key_tensor(generator, aes::block_t_size, iter.device()); - const auto key = key_t.data_ptr(); - AT_DISPATCH_FLOATING_TYPES(iter.dtype(), "cauchy_kernel", [&] { - aes_helper(iter, key, - [median, sigma] TORCH_CSPRNG_HOST_DEVICE (RNGValues<1>* gen) -> scalar_t { - cauchy_distribution cauchy(median, sigma); - return static_cast(cauchy(gen)); - } - ); - }); - } -}; - -Tensor& cauchy_(Tensor& self, double median, double sigma, c10::optional generator) { - return cauchy_impl_(self, median, sigma, generator); -} - -// ================================================== LogNormal ======================================================= - -template -struct LogNormalKernel { - void operator()(TensorIterator& iter, double mean, double std, c10::optional generator) { - const Tensor key_t = key_tensor(generator, aes::block_t_size, iter.device()); - const auto key = key_t.data_ptr(); - AT_DISPATCH_FLOATING_TYPES(iter.dtype(), "log_normal", [&] { - aes_helper(iter, key, - [mean, std] TORCH_CSPRNG_HOST_DEVICE (RNGValues<2>* gen) -> scalar_t { - lognormal_distribution logNormal(mean, std); - return static_cast(logNormal(gen)); - } - ); - }); - } -}; - -Tensor& log_normal_(Tensor& self, double mean, double std, c10::optional gen) { - return log_normal_impl_(self, mean, std, gen); -} - -// ================================================== Geometric ======================================================= - -template -struct GeometricKernel { - void operator()(TensorIterator& iter, double p, c10::optional generator) { - const Tensor key_t = key_tensor(generator, aes::block_t_size, iter.device()); - const auto key = key_t.data_ptr(); - AT_DISPATCH_FLOATING_TYPES(iter.dtype(), "geometric_kernel", [&] { - aes_helper::type, 1>(iter, key, - [p] TORCH_CSPRNG_HOST_DEVICE (RNGValues<1>* gen) -> scalar_t { - geometric_distribution geometric(p); - return geometric(gen); - } - ); - }); - } -}; - -Tensor& geometric_(Tensor& self, double p, c10::optional gen) { - return geometric_impl_(self, p, gen); -} - -// ================================================== Exponential ===================================================== - -template -struct ExponentialKernel { - void operator()(TensorIterator& iter, double lambda, c10::optional generator) { - const Tensor key_t = key_tensor(generator, aes::block_t_size, iter.device()); - const auto key = key_t.data_ptr(); - AT_DISPATCH_FLOATING_TYPES(iter.dtype(), "exponential_kernel", [&] { - aes_helper(iter, key, - [lambda] TORCH_CSPRNG_HOST_DEVICE (RNGValues<1>* gen) -> scalar_t { - exponential_distribution exponential(lambda); - return static_cast(exponential(gen)); - } - ); - }); - } -}; - -Tensor& exponential_(Tensor& self, double lambda, c10::optional gen) { - return exponential_impl_(self, lambda, gen); -} - -// ==================================================================================================================== - -Generator create_random_device_generator(c10::optional token = c10::nullopt) { - if (token.has_value()) { - return make_generator(*token); - } else { - return make_generator(true); - } -} - -Generator create_mt19937_generator(c10::optional seed = c10::nullopt) { - if (seed.has_value()) { - return make_generator(*seed); - } else { - return make_generator(false); - } -} - -bool supports_cuda() { -#if defined(__CUDACC__) || defined(__HIPCC__) - return true; -#else - return false; -#endif -} - -TORCH_LIBRARY_IMPL(aten, CustomRNGKeyId, m) { - // Random - m.impl_UNBOXED("random_.from", random_from_to); - m.impl_UNBOXED("random_.to", random_to); - m.impl_UNBOXED("random_", random_); - // Uniform - m.impl_UNBOXED("uniform_", uniform_); - // Normal - m.impl_UNBOXED("normal_", normal_); - m.impl_UNBOXED("normal.Tensor_float_out", normal_Tensor_float_out); - m.impl_UNBOXED("normal.float_Tensor_out", normal_float_Tensor_out); - m.impl_UNBOXED("normal.Tensor_Tensor_out", normal_Tensor_Tensor_out); - m.impl_UNBOXED("normal.Tensor_float", normal_Tensor_float); - m.impl_UNBOXED("normal.float_Tensor", normal_float_Tensor); - m.impl_UNBOXED("normal.Tensor_Tensor", normal_Tensor_Tensor); - // Cauchy - m.impl_UNBOXED("cauchy_", cauchy_); - // LogNormal - m.impl_UNBOXED("log_normal_", log_normal_); - // Geometric - m.impl_UNBOXED("geometric_", geometric_); - // Exponential - m.impl_UNBOXED("exponential_", exponential_); -} - -PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { - m.def("supports_cuda", &supports_cuda); - m.def("create_random_device_generator", &create_random_device_generator, py::arg("token") = nullptr); - m.def("create_mt19937_generator", &create_mt19937_generator, py::arg("seed") = nullptr); -} diff --git a/torchcsprng/__init__.py b/torchcsprng/__init__.py new file mode 100644 index 0000000..60a98d6 --- /dev/null +++ b/torchcsprng/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) Meta Platforms, Inc. and its affiliates. All Rights Reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import torch + +from torchcsprng._C import * + + +try: + from .version import __version__, git_version # noqa: F401 +except ImportError: + pass diff --git a/torchcsprng/__init__.pyi b/torchcsprng/__init__.pyi new file mode 100644 index 0000000..dcc28c2 --- /dev/null +++ b/torchcsprng/__init__.pyi @@ -0,0 +1,14 @@ +# Copyright (c) Meta Platforms, Inc. and its affiliates. All Rights Reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from torch import Generator, Tensor + +def supports_cuda() -> bool: ... +def create_random_device_generator(token: str = "") -> Generator: ... +def create_mt19937_generator(seed: int = 0): ... +def encrypt(input: Tensor, output: Tensor, key: Tensor, cipher, mode): ... +def decrypt(input: Tensor, output: Tensor, key: Tensor, cipher, mode): ... +def __version__() -> str: ... +def git_version() -> str: ... diff --git a/torch_csprng/csrc/OffsetCalculator.cuh b/torchcsprng/csrc/OffsetCalculator.cuh similarity index 93% rename from torch_csprng/csrc/OffsetCalculator.cuh rename to torchcsprng/csrc/OffsetCalculator.cuh index 1c76d70..671e37d 100644 --- a/torch_csprng/csrc/OffsetCalculator.cuh +++ b/torchcsprng/csrc/OffsetCalculator.cuh @@ -1,3 +1,10 @@ +/* + * Copyright (c) Meta Platforms, Inc. and its affiliates. All Rights Reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + #pragma once #include diff --git a/torch_csprng/csrc/THCIntegerDivider.cuh b/torchcsprng/csrc/THCIntegerDivider.cuh similarity index 94% rename from torch_csprng/csrc/THCIntegerDivider.cuh rename to torchcsprng/csrc/THCIntegerDivider.cuh index 9d57ef9..bc124b2 100644 --- a/torch_csprng/csrc/THCIntegerDivider.cuh +++ b/torchcsprng/csrc/THCIntegerDivider.cuh @@ -1,3 +1,10 @@ +/* + * Copyright (c) Meta Platforms, Inc. and its affiliates. All Rights Reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + #ifndef THC_INTEGER_DIVIDER_INC #define THC_INTEGER_DIVIDER_INC diff --git a/torch_csprng/csrc/aes.h b/torchcsprng/csrc/aes.inc similarity index 66% rename from torch_csprng/csrc/aes.h rename to torchcsprng/csrc/aes.inc index 7a9a287..463dd5c 100644 --- a/torch_csprng/csrc/aes.h +++ b/torchcsprng/csrc/aes.inc @@ -1,10 +1,10 @@ -#pragma once +/* + * Copyright (c) Meta Platforms, Inc. and its affiliates. All Rights Reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ -#include "macros.h" -#include - -namespace torch { -namespace custom_prng { namespace aes { // This AES implementation is based on @@ -55,15 +55,7 @@ namespace aes { #define Nr 10 // The number of rounds in AES Cipher. #endif -#if !defined(__CUDACC__) && !defined(__HIPCC__) -struct ulonglong2 // TODO: should have something like `__builtin_align__(16)` -{ - unsigned long long int x, y; -}; -#endif - -typedef ulonglong2 block_t; -constexpr size_t block_t_size = sizeof(block_t); +constexpr size_t block_t_size = 16; typedef uint8_t state_t[4][4]; @@ -89,6 +81,24 @@ TORCH_CSPRNG_CONSTANT const uint8_t sbox[256] = { 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 }; +TORCH_CSPRNG_CONSTANT const uint8_t rsbox[256] = { + 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, + 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, + 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, + 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, + 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, + 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, + 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, + 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, + 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, + 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, + 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, + 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, + 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, + 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, + 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, + 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d }; + // The round constant word array, Rcon[i], contains the values given by // x to the power (i-1) being powers of x (x is denoted as {02}) in the field GF(2^8) TORCH_CSPRNG_CONSTANT const uint8_t Rcon[11] = { @@ -96,6 +106,8 @@ TORCH_CSPRNG_CONSTANT const uint8_t Rcon[11] = { #define getSBoxValue(num) (sbox[(num)]) +#define getSBoxInvert(num) (rsbox[(num)]) + // This function produces Nb(Nr+1) round keys. The round keys are used in each round to decrypt the states. TORCH_CSPRNG_HOST_DEVICE void KeyExpansion(uint8_t* RoundKey, const uint8_t* Key){ unsigned int i, j, k; @@ -249,6 +261,78 @@ TORCH_CSPRNG_HOST_DEVICE void MixColumns(state_t* state) } } +TORCH_CSPRNG_HOST_DEVICE uint8_t Multiply(uint8_t x, uint8_t y) +{ + return (((y & 1) * x) ^ + ((y>>1 & 1) * xtime(x)) ^ + ((y>>2 & 1) * xtime(xtime(x))) ^ + ((y>>3 & 1) * xtime(xtime(xtime(x)))) ^ + ((y>>4 & 1) * xtime(xtime(xtime(xtime(x)))))); /* this last call to xtime() can be omitted */ +} + +// MixColumns function mixes the columns of the state matrix. +// The method used to multiply may be difficult to understand for the inexperienced. +// Please use the references to gain more information. +TORCH_CSPRNG_HOST_DEVICE void InvMixColumns(state_t* state) +{ + int i; + uint8_t a, b, c, d; + for (i = 0; i < 4; ++i) + { + a = (*state)[i][0]; + b = (*state)[i][1]; + c = (*state)[i][2]; + d = (*state)[i][3]; + + (*state)[i][0] = Multiply(a, 0x0e) ^ Multiply(b, 0x0b) ^ Multiply(c, 0x0d) ^ Multiply(d, 0x09); + (*state)[i][1] = Multiply(a, 0x09) ^ Multiply(b, 0x0e) ^ Multiply(c, 0x0b) ^ Multiply(d, 0x0d); + (*state)[i][2] = Multiply(a, 0x0d) ^ Multiply(b, 0x09) ^ Multiply(c, 0x0e) ^ Multiply(d, 0x0b); + (*state)[i][3] = Multiply(a, 0x0b) ^ Multiply(b, 0x0d) ^ Multiply(c, 0x09) ^ Multiply(d, 0x0e); + } +} + +// The SubBytes Function Substitutes the values in the +// state matrix with values in an S-box. +TORCH_CSPRNG_HOST_DEVICE void InvSubBytes(state_t* state) +{ + uint8_t i, j; + for (i = 0; i < 4; ++i) + { + for (j = 0; j < 4; ++j) + { + (*state)[j][i] = getSBoxInvert((*state)[j][i]); + } + } +} + +TORCH_CSPRNG_HOST_DEVICE void InvShiftRows(state_t* state) +{ + uint8_t temp; + + // Rotate first row 1 columns to right + temp = (*state)[3][1]; + (*state)[3][1] = (*state)[2][1]; + (*state)[2][1] = (*state)[1][1]; + (*state)[1][1] = (*state)[0][1]; + (*state)[0][1] = temp; + + // Rotate second row 2 columns to right + temp = (*state)[0][2]; + (*state)[0][2] = (*state)[2][2]; + (*state)[2][2] = temp; + + temp = (*state)[1][2]; + (*state)[1][2] = (*state)[3][2]; + (*state)[3][2] = temp; + + // Rotate third row 3 columns to right + temp = (*state)[0][3]; + (*state)[0][3] = (*state)[1][3]; + (*state)[1][3] = (*state)[2][3]; + (*state)[2][3] = (*state)[3][3]; + (*state)[3][3] = temp; +} + TORCH_CSPRNG_HOST_DEVICE void encrypt(uint8_t* state, const uint8_t* key) { uint8_t RoundKey[176]; KeyExpansion(RoundKey, key); @@ -276,4 +360,29 @@ TORCH_CSPRNG_HOST_DEVICE void encrypt(uint8_t* state, const uint8_t* key) { AddRoundKey(Nr, (state_t*)state, RoundKey); } -}}} +TORCH_CSPRNG_HOST_DEVICE void decrypt(uint8_t* state, const uint8_t* key) { + uint8_t RoundKey[176]; + KeyExpansion(RoundKey, key); + + uint8_t round = 0; + + // Add the First round key to the state before starting the rounds. + AddRoundKey(Nr, (state_t*)state, RoundKey); + + // There will be Nr rounds. + // The first Nr-1 rounds are identical. + // These Nr rounds are executed in the loop below. + // Last one without InvMixColumn() + for (round = (Nr - 1); ; --round) + { + InvShiftRows((state_t*)state); + InvSubBytes((state_t*)state); + AddRoundKey(round, (state_t*)state, RoundKey); + if (round == 0) { + break; + } + InvMixColumns((state_t*)state); + } +} + +} diff --git a/torchcsprng/csrc/block_cipher.h b/torchcsprng/csrc/block_cipher.h new file mode 100644 index 0000000..a949d52 --- /dev/null +++ b/torchcsprng/csrc/block_cipher.h @@ -0,0 +1,201 @@ +/* + * Copyright (c) Meta Platforms, Inc. and its affiliates. All Rights Reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include "macros.h" +#include +#include +#include "OffsetCalculator.cuh" +#include +#include +#include + +#if defined(__CUDACC__) || defined(__HIPCC__) +#include +#include +#endif + +#if defined(__CUDACC__) || defined(__HIPCC__) +#define UNROLL_IF_CUDA #pragma unroll +#else +#define UNROLL_IF_CUDA +#endif + +namespace torch { +namespace csprng { + +template +TORCH_CSPRNG_HOST_DEVICE static void copy_input_to_block(int64_t idx, uint8_t* block, int block_size, + void* input_ptr, int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc) { + for (auto i = 0; i < block_size / input_type_size; ++i) { + const auto linear_index = idx * (block_size / input_type_size) + i; + if (linear_index < input_numel) { + std::memcpy( + block + i * input_type_size, + &(reinterpret_cast(input_ptr)[input_index_calc(linear_index)]), + input_type_size + ); + } + } +} + +template +TORCH_CSPRNG_HOST_DEVICE static void copy_block_to_output(int64_t idx, uint8_t* block, int output_elem_per_block, + void* output_ptr, int64_t output_numel, int output_type_size, output_index_calc_t output_index_calc) { + for (auto i = 0; i < output_elem_per_block; ++i) { + const auto linear_index = idx * output_elem_per_block + i; + if (linear_index < output_numel) { + std::memcpy( + &(reinterpret_cast(output_ptr)[output_index_calc(linear_index)]), + block + i * output_type_size, + output_type_size + ); + } + } +} + +template +TORCH_CSPRNG_HOST_DEVICE static void block_cipher_kernel_helper( + int64_t idx, cipher_t cipher, int output_elem_per_block, + void* input_ptr, int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc, + void* output_ptr, int64_t output_numel, int output_type_size, output_index_calc_t output_index_calc, + transform_t transform) { + uint8_t block[block_size]; + std::memset(&block, 0, block_size); // is it ok to use zeros as padding? + if (input_ptr != nullptr) { + copy_input_to_block(idx, block, block_size, input_ptr, input_numel, input_type_size, input_index_calc); + } + cipher(idx, block); + transform(block); + copy_block_to_output(idx, block, output_elem_per_block, output_ptr, output_numel, output_type_size, output_index_calc); +} + +#if defined(__CUDACC__) || defined(__HIPCC__) +template +__global__ static void block_cipher_kernel_cuda(cipher_t cipher, int output_elem_per_block, + void* input_ptr, int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc, + void* output_ptr, int64_t output_numel, int output_type_size, output_index_calc_t output_index_calc, + transform_t transform) { + const auto idx = blockIdx.x * blockDim.x + threadIdx.x; + block_cipher_kernel_helper(idx, cipher, output_elem_per_block, + input_ptr, input_numel, input_type_size, input_index_calc, + output_ptr, output_numel, output_type_size, output_index_calc, + transform); +} +#endif + +template +static void block_cipher_kernel_cpu_serial(int64_t begin, int64_t end, cipher_t cipher, int output_elem_per_block, + void* input_ptr, int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc, + void* output_ptr, int64_t output_numel, int output_type_size, output_index_calc_t output_index_calc, + transform_t transform) { + for (auto idx = begin; idx < end; ++idx) { + block_cipher_kernel_helper(idx, cipher, output_elem_per_block, + input_ptr, input_numel, input_type_size, input_index_calc, + output_ptr, output_numel, output_type_size, output_index_calc, + transform); + } +} + +template +static void block_cipher_kernel_cpu(int64_t total, cipher_t cipher, int output_elem_per_block, + void* input_ptr, int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc, + void* output_ptr, int64_t output_numel, int output_type_size, output_index_calc_t output_index_calc, + transform_t transform_func) { + if (total < at::internal::GRAIN_SIZE || at::get_num_threads() == 1) { + block_cipher_kernel_cpu_serial(0, total, cipher, output_elem_per_block, + input_ptr, input_numel, input_type_size, input_index_calc, + output_ptr, output_numel, output_type_size, output_index_calc, + transform_func); + } else { + at::parallel_for(0, total, at::internal::GRAIN_SIZE, [&](int64_t begin, int64_t end) { + block_cipher_kernel_cpu_serial(begin, end, cipher, output_elem_per_block, + input_ptr, input_numel, input_type_size, input_index_calc, + output_ptr, output_numel, output_type_size, output_index_calc, + transform_func); + }); + } +} + +template +void block_cipher( + void* input_ptr, int64_t input_numel, int input_type_size, input_index_calc_t input_index_calc, + void* output_ptr, int64_t output_numel, int output_type_size, output_index_calc_t output_index_calc, + at::Device device, cipher_t cipher, int output_elem_per_block, transform_t transform_func) { + if (output_ptr == nullptr || output_numel == 0) { + return; + } + + if (device.type() == at::kCPU) { + const auto total = (output_numel + output_elem_per_block - 1) / output_elem_per_block; + block_cipher_kernel_cpu(total, + cipher, output_elem_per_block, + input_ptr, input_numel, input_type_size, input_index_calc, + output_ptr, output_numel, output_type_size, output_index_calc, + transform_func + ); + } else if (device.type() == at::kCUDA) { +#if defined(__CUDACC__) || defined(__HIPCC__) + const auto threads = 256; + const auto grid = (output_numel + (threads * output_elem_per_block) - 1) / (threads * output_elem_per_block); + auto stream = at::cuda::getCurrentCUDAStream(); + block_cipher_kernel_cuda<<>>( + cipher, output_elem_per_block, + input_ptr, input_numel, input_type_size, input_index_calc, + output_ptr, output_numel, output_type_size, output_index_calc, + transform_func + ); + AT_CUDA_CHECK(cudaGetLastError()); +#else + TORCH_CHECK(false, "torchcsprng was compiled without CUDA support"); +#endif + } else { + TORCH_CHECK(false, "block_cipher supports only CPU and CUDA devices"); + } +} + +template +void block_cipher(at::Tensor input, at::Tensor output, cipher_t cipher) { + const auto input_ptr = input.data_ptr(); + const auto input_numel = input.numel(); + + // Otherwise OffsetCalculator/IntDivider crashes with integer division by zero + if (input_ptr == nullptr || input_numel == 0) { + return; + } + + const auto input_type_size = input.element_size(); + const auto input_offset_calc = make_offset_calculator<1>(at::TensorIterator::nullary_op(input)); + const auto input_index_calc = [input_offset_calc] TORCH_CSPRNG_HOST_DEVICE (uint32_t li) -> uint32_t { + return input_offset_calc.get(li)[0]; + }; + + const auto output_ptr = output.data_ptr(); + const auto output_numel = output.numel(); + + // Otherwise OffsetCalculator/IntDivider crashes with integer division by zero + if (output_ptr == nullptr || output_numel == 0) { + return; + } + + const auto output_type_size = output.element_size(); + const auto output_offset_calc = make_offset_calculator<1>(at::TensorIterator::nullary_op(output)); + const auto output_index_calc = [output_offset_calc] TORCH_CSPRNG_HOST_DEVICE (uint32_t li) -> uint32_t { + return output_offset_calc.get(li)[0]; + }; + + const auto device = output.device(); + + torch::csprng::block_cipher( + input_ptr, input_numel, input_type_size, input_index_calc, + output_ptr, output_numel, output_type_size, output_index_calc, + device, cipher, block_size / output_type_size, + [] TORCH_CSPRNG_HOST_DEVICE (uint8_t* x) {}); +} + +}} diff --git a/torchcsprng/csrc/cpu/kernels.cpp b/torchcsprng/csrc/cpu/kernels.cpp new file mode 100644 index 0000000..5f86829 --- /dev/null +++ b/torchcsprng/csrc/cpu/kernels.cpp @@ -0,0 +1,16 @@ +/* + * Copyright (c) Meta Platforms, Inc. and its affiliates. All Rights Reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "../kernels_commons.h" + +namespace torch { +namespace csprng { +namespace cpu { + +#include "../kernels_body.inc" + +}}} diff --git a/torchcsprng/csrc/cpu/kernels.h b/torchcsprng/csrc/cpu/kernels.h new file mode 100644 index 0000000..f84af4a --- /dev/null +++ b/torchcsprng/csrc/cpu/kernels.h @@ -0,0 +1,19 @@ +/* + * Copyright (c) Meta Platforms, Inc. and its affiliates. All Rights Reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include + +namespace torch { +namespace csprng { +namespace cpu { + +#include "../kernels_decls.inc" + +}}} diff --git a/torchcsprng/csrc/csprng.cpp b/torchcsprng/csrc/csprng.cpp new file mode 100644 index 0000000..8494253 --- /dev/null +++ b/torchcsprng/csrc/csprng.cpp @@ -0,0 +1,377 @@ +/* + * Copyright (c) Meta Platforms, Inc. and its affiliates. All Rights Reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include + +#include +#include +#include + +#include "kernels_commons.h" +#include "cpu/kernels.h" +#ifdef WITH_CUDA +#include "cuda/kernels.cuh" +#endif + +using namespace at; +using namespace torch::csprng; + +static const auto GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE = "generator does not support tensor device type"; +static const auto TENSOR_DEVICE_TYPE_IS_NOT_SUPPORTED = "tensor device type is not supported"; + +// ==================================================== Random ======================================================== + +Tensor& random_(Tensor& self, c10::optional gen) { + if (self.device().type() == DeviceType::CPU) { + return cpu::random_(self, gen); +#ifdef WITH_CUDA + } else if (self.device().type() == DeviceType::CUDA) { + return torch::csprng::cuda::random_(self, gen); +#endif + } else { + TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); + } +} + +Tensor& random_from_to(Tensor& self, int64_t from, optional to, + c10::optional gen) { + if (self.device().type() == DeviceType::CPU) { + return cpu::random_from_to(self, from, to, gen); +#ifdef WITH_CUDA + } else if (self.device().type() == DeviceType::CUDA) { + return torch::csprng::cuda::random_from_to(self, from, to, gen); +#endif + } else { + TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); + } +} + +Tensor& random_to(Tensor& self, int64_t to, + c10::optional gen) { + if (self.device().type() == DeviceType::CPU) { + return cpu::random_to(self, to, gen); +#ifdef WITH_CUDA + } else if (self.device().type() == DeviceType::CUDA) { + return torch::csprng::cuda::random_to(self, to, gen); +#endif + } else { + TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); + } +} + +// ==================================================== Uniform ======================================================= + +Tensor& uniform_(Tensor& self, double from, double to, c10::optional gen) { + if (self.device().type() == DeviceType::CPU) { + return cpu::uniform_(self, from, to, gen); +#ifdef WITH_CUDA + } else if (self.device().type() == DeviceType::CUDA) { + return torch::csprng::cuda::uniform_(self, from, to, gen); +#endif + } else { + TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); + } +} + +// ==================================================== Normal ======================================================== + +Tensor& normal_(Tensor& self, double mean, double std, c10::optional gen) { + if (self.device().type() == DeviceType::CPU) { + return cpu::normal_(self, mean, std, gen); +#ifdef WITH_CUDA + } else if (self.device().type() == DeviceType::CUDA) { + return torch::csprng::cuda::normal_(self, mean, std, gen); +#endif + } else { + TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); + } +} + +Tensor& normal_Tensor_float_out(const Tensor& mean, double std, c10::optional gen, Tensor& output) { + if (output.device().type() == DeviceType::CPU) { + return cpu::normal_Tensor_float_out(output, mean, std, gen); +#ifdef WITH_CUDA + } else if (output.device().type() == DeviceType::CUDA) { + return torch::csprng::cuda::normal_Tensor_float_out(output, mean, std, gen); +#endif + } else { + TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); + } +} + +Tensor& normal_float_Tensor_out(double mean, const Tensor& std, c10::optional gen, Tensor& output) { + if (output.device().type() == DeviceType::CPU) { + return cpu::normal_float_Tensor_out(output, mean, std, gen); +#ifdef WITH_CUDA + } else if (output.device().type() == DeviceType::CUDA) { + return torch::csprng::cuda::normal_float_Tensor_out(output, mean, std, gen); +#endif + } else { + TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); + } +} + +Tensor& normal_Tensor_Tensor_out(const Tensor& mean, const Tensor& std, c10::optional gen, Tensor& output) { + if (output.device().type() == DeviceType::CPU) { + return cpu::normal_Tensor_Tensor_out(output, mean, std, gen); +#ifdef WITH_CUDA + } else if (output.device().type() == DeviceType::CUDA) { + return torch::csprng::cuda::normal_Tensor_Tensor_out(output, mean, std, gen); +#endif + } else { + TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); + } +} + +Tensor normal_Tensor_float(const Tensor& mean, double std, c10::optional gen) { + if (mean.device().type() == DeviceType::CPU) { + return cpu::normal_Tensor_float(mean, std, gen); +#ifdef WITH_CUDA + } else if (mean.device().type() == DeviceType::CUDA) { + return torch::csprng::cuda::normal_Tensor_float(mean, std, gen); +#endif + } else { + TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); + } +} + +Tensor normal_float_Tensor(double mean, const Tensor& std, c10::optional gen) { + if (std.device().type() == DeviceType::CPU) { + return cpu::normal_float_Tensor(mean, std, gen); +#ifdef WITH_CUDA + } else if (std.device().type() == DeviceType::CUDA) { + return torch::csprng::cuda::normal_float_Tensor(mean, std, gen); +#endif + } else { + TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); + } +} + +Tensor normal_Tensor_Tensor(const Tensor& mean, const Tensor& std, c10::optional gen) { + if (mean.device().type() == DeviceType::CPU) { + return cpu::normal_Tensor_Tensor(mean, std, gen); +#ifdef WITH_CUDA + } else if (mean.device().type() == DeviceType::CUDA) { + return torch::csprng::cuda::normal_Tensor_Tensor(mean, std, gen); +#endif + } else { + TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); + } +} + +// ==================================================== Cauchy ======================================================== + +Tensor& cauchy_(Tensor& self, double median, double sigma, c10::optional gen) { + if (self.device().type() == DeviceType::CPU) { + return cpu::cauchy_(self, median, sigma, gen); +#ifdef WITH_CUDA + } else if (self.device().type() == DeviceType::CUDA) { + return torch::csprng::cuda::cauchy_(self, median, sigma, gen); +#endif + } else { + TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); + } +} + +// ================================================== LogNormal ======================================================= + +Tensor& log_normal_(Tensor& self, double mean, double std, c10::optional gen) { + if (self.device().type() == DeviceType::CPU) { + return cpu::log_normal_(self, mean, std, gen); +#ifdef WITH_CUDA + } else if (self.device().type() == DeviceType::CUDA) { + return torch::csprng::cuda::log_normal_(self, mean, std, gen); +#endif + } else { + TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); + } +} + +// ================================================== Geometric ======================================================= + +Tensor& geometric_(Tensor& self, double p, c10::optional gen) { + if (self.device().type() == DeviceType::CPU) { + return cpu::geometric_(self, p, gen); +#ifdef WITH_CUDA + } else if (self.device().type() == DeviceType::CUDA) { + return torch::csprng::cuda::geometric_(self, p, gen); +#endif + } else { + TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); + } +} + +// ================================================== Exponential ===================================================== + +Tensor& exponential_(Tensor& self, double lambda, c10::optional gen) { + if (self.device().type() == DeviceType::CPU) { + return cpu::exponential_(self, lambda, gen); +#ifdef WITH_CUDA + } else if (self.device().type() == DeviceType::CUDA) { + return torch::csprng::cuda::exponential_(self, lambda, gen); +#endif + } else { + TORCH_CHECK(false, GENERATOR_DOES_NOT_SUPPORT_TENSOR_DEVICE_TYPE); + } +} + +// =============================================== Random permutation ================================================= + +// randperm implementation was copied from PyTorch to unblock CSPRNG users, but ultimately CSPRNG must reuse +// refactored randperm from PyTorch, see https://github.com/pytorch/pytorch/issues/43816 + +namespace { + + inline void check_supported_max_int_with_precision(int64_t n, const Tensor& tensor) { + TORCH_CHECK(at::scalar_tensor(n, tensor.options()).defined(), + "n is too large for result tensor type: '", tensor.toString(), "'"); + + // Ensure sufficient precision for floating point representation. + switch (tensor.scalar_type()) { + case at::ScalarType::Half: + TORCH_CHECK(n <= (int64_t(1) << 11) + 1, "n cannot be greater than 2049 for Half type."); + break; + case at::ScalarType::Float: + TORCH_CHECK(n <= (int64_t(1) << 24) + 1, "n cannot be greater than 2^24+1 for Float type."); + break; + case at::ScalarType::Double: // Unlikely to happen, but doesn't hurt to check + TORCH_CHECK(n <= (int64_t(1) << 53) + 1, "n cannot be greater than 2^53+1 for Double type."); + break; + default: + break; + } + } + + template + void randperm(Tensor& result, int64_t n, c10::optional generator) { + auto gen = at::check_generator(generator); + scalar_t *r__data = result.data_ptr(); + + result.resize_({n}); + int64_t r__stride_0 = result.stride(0); + + at::parallel_for(0, n, internal::GRAIN_SIZE, + [&r__data, &r__stride_0](int64_t p_begin, int64_t p_end) { + for(int64_t i = p_begin; i < p_end; i++) + r__data[i*r__stride_0] = static_cast(i); + }); + + for(int64_t i = 0; i < n - 1; i++) + { + int64_t z = gen->random() % (n-i); + scalar_t sav = r__data[i*r__stride_0]; + r__data[i*r__stride_0] = r__data[(z+i)*r__stride_0]; + r__data[(z+i)*r__stride_0] = sav; + } + } +} // namespace + +Tensor& randperm_generator_out(int64_t n, c10::optional generator, Tensor& result) { + TORCH_CHECK(n >= 0, "n must be non-negative, got", n); + check_supported_max_int_with_precision(n, result); + if (result.device().type() == at::kCUDA) { + auto result_cpu = at::empty({n}, result.options().device(kCPU)); + randperm_generator_out(n, generator, result_cpu); + result.resize_({n}); + return result.copy_(result_cpu); + } + result.resize_({n}); + // See Note [Acquire lock when using random generators] + std::lock_guard lock(generator->mutex()); + AT_DISPATCH_ALL_TYPES_AND(at::ScalarType::Half, result.scalar_type(), "randperm", [&]() -> void { + randperm(result, n, generator); + }); + return result; +} + +// ================================================Encrypt/Decrypt===================================================== + +Tensor encrypt_pybind(Tensor input, Tensor output, Tensor key, const std::string& cipher, const std::string& mode) { + if (input.device().type() == DeviceType::CPU) { + return cpu::encrypt(input, output, key, cipher, mode); +#ifdef WITH_CUDA + } else if (input.device().type() == DeviceType::CUDA) { + return torch::csprng::cuda::encrypt(input, output, key, cipher, mode); +#endif + } else { + TORCH_CHECK(false, TENSOR_DEVICE_TYPE_IS_NOT_SUPPORTED); + } +} + +Tensor decrypt_pybind(Tensor input, Tensor output, Tensor key, const std::string& cipher, const std::string& mode) { + if (input.device().type() == DeviceType::CPU) { + return cpu::decrypt(input, output, key, cipher, mode); +#ifdef WITH_CUDA + } else if (input.device().type() == DeviceType::CUDA) { + return torch::csprng::cuda::decrypt(input, output, key, cipher, mode); +#endif + } else { + TORCH_CHECK(false, TENSOR_DEVICE_TYPE_IS_NOT_SUPPORTED); + } +} + +// ==================================================================================================================== + +Generator create_random_device_generator(c10::optional token = c10::nullopt) { + if (token.has_value()) { + return make_generator(*token); + } else { + return make_generator(true); + } +} + +Generator create_mt19937_generator(c10::optional seed = c10::nullopt) { + if (seed.has_value()) { + return make_generator(*seed); + } else { + return make_generator(false); + } +} + +bool supports_cuda() { +#ifdef WITH_CUDA + return true; +#else + return false; +#endif +} + +TORCH_LIBRARY_IMPL(aten, CustomRNGKeyId, m) { + // Random + m.impl("random_.from", random_from_to); + m.impl("random_.to", random_to); + m.impl("random_", random_); + // Uniform + m.impl("uniform_", uniform_); + // Normal + m.impl("normal_", normal_); + m.impl("normal.Tensor_float_out", normal_Tensor_float_out); + m.impl("normal.float_Tensor_out", normal_float_Tensor_out); + m.impl("normal.Tensor_Tensor_out", normal_Tensor_Tensor_out); + m.impl("normal.Tensor_float", normal_Tensor_float); + m.impl("normal.float_Tensor", normal_float_Tensor); + m.impl("normal.Tensor_Tensor", normal_Tensor_Tensor); + // Cauchy + m.impl("cauchy_", cauchy_); + // LogNormal + m.impl("log_normal_", log_normal_); + // Geometric + m.impl("geometric_", geometric_); + // Exponential + m.impl("exponential_", exponential_); + // Random permutation + m.impl("randperm.generator_out", randperm_generator_out); +} + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("supports_cuda", &supports_cuda); + m.def("create_random_device_generator", &create_random_device_generator, py::arg("token") = nullptr); + m.def("create_mt19937_generator", &create_mt19937_generator, py::arg("seed") = nullptr); + m.def("encrypt", &encrypt_pybind); + m.def("decrypt", &decrypt_pybind); +} diff --git a/torchcsprng/csrc/cuda/kernels.cu b/torchcsprng/csrc/cuda/kernels.cu new file mode 100644 index 0000000..6842ffb --- /dev/null +++ b/torchcsprng/csrc/cuda/kernels.cu @@ -0,0 +1,16 @@ +/* + * Copyright (c) Meta Platforms, Inc. and its affiliates. All Rights Reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "../kernels_commons.h" + +namespace torch { +namespace csprng { +namespace cuda { + +#include "../kernels_body.inc" + +}}} diff --git a/torchcsprng/csrc/cuda/kernels.cuh b/torchcsprng/csrc/cuda/kernels.cuh new file mode 100644 index 0000000..b2a05d4 --- /dev/null +++ b/torchcsprng/csrc/cuda/kernels.cuh @@ -0,0 +1,19 @@ +/* + * Copyright (c) Meta Platforms, Inc. and its affiliates. All Rights Reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include + +namespace torch { +namespace csprng { +namespace cuda { + +#include "../kernels_decls.inc" + +}}} diff --git a/torchcsprng/csrc/kernels_body.inc b/torchcsprng/csrc/kernels_body.inc new file mode 100644 index 0000000..a2be40d --- /dev/null +++ b/torchcsprng/csrc/kernels_body.inc @@ -0,0 +1,437 @@ +/* + * Copyright (c) Meta Platforms, Inc. and its affiliates. All Rights Reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "aes.inc" + +// Generates `block_t_size`-bytes random key Tensor on CPU +// using `generator`, which must be an instance of `at::CPUGeneratorImpl` +// and passes it to the `device`. +template +at::Tensor key_tensor(size_t block_t_size, c10::optional generator) { + std::lock_guard lock(generator->mutex()); + auto gen = at::check_generator(generator); + auto key = torch::empty({static_cast(block_t_size)}, torch::kUInt8); + using random_t = typename std::result_of::type; + constexpr size_t random_t_size = sizeof(random_t); + for (size_t i = 0; i < block_t_size / random_t_size; i++) { + const auto rand = gen->random(); + for (size_t j = 0; j < random_t_size; j++) { + size_t k = i * random_t_size + j; + key[k] = static_cast((rand >> (j * 8)) & 0xff); + } + } + return key; +} + +template +at::Tensor aes128_key_tensor(at::Generator generator) { + return key_tensor(aes::block_t_size, generator); +} + +// ==================================================================================================================== + +// A simple container for random state sub-blocks that implements RNG interface +// with random() and random64() methods, that are used by transformation function +template +struct RNGValues { + TORCH_CSPRNG_HOST_DEVICE RNGValues(uint64_t* vals) { + memcpy(&vals_, vals, size * sizeof(uint64_t)); + } + uint32_t TORCH_CSPRNG_HOST_DEVICE random() { auto res = static_cast(vals_[index]); index++; return res; } + uint64_t TORCH_CSPRNG_HOST_DEVICE random64() { auto res = vals_[index]; index++; return res; } +private: + uint64_t vals_[size]; + int index = 0; +}; + +// Applies AES in CTR mode with the `key` for passed TensorIterator iter. +// `scalar_t` is a scalar type equivalent of target tensor dtype +// `uint_t` is an unsigned integral type of sub-blocks that random state is divided to +// (e.g, 16 bytes random state block can be divided into 16 uint8_t sub-blocks +// or 8 uint16_t sub-block or 4 uint32_t sub-block or 2 uint64_t sub-blocks) +// `N` is a number of sub-block which is used by `transform_func` +// to generate a random value of specific distribution (e.g. `normal` uses 2) +// `key` is a CUDA pointer to random key memory block +// `transform_func` is a callable that converts N `uint_t` random state sub-blocks passed in RNGValues into target dtype `scalar_t` +template +void aes_helper(at::TensorIterator& iter, const uint8_t* key_bytes, transform_t transform_func) { + auto output = iter.tensor(0); + const auto output_offset_calc = make_offset_calculator<1>(at::TensorIterator::nullary_op(output)); + const auto output_index_calc = [output_offset_calc] TORCH_CSPRNG_HOST_DEVICE (uint32_t li) -> uint32_t { + return output_offset_calc.get(li)[0]; + }; + torch::csprng::block_cipher( + nullptr, 0, 0, output_index_calc, + output.data_ptr(), output.numel(), output.element_size(), output_index_calc, + iter.device_type(), + [key_bytes] TORCH_CSPRNG_HOST_DEVICE (int64_t idx, uint8_t* block) -> void { + uint8_t idx_block[aes::block_t_size]; + std::memset(&idx_block, 0, aes::block_t_size); + *(reinterpret_cast(idx_block)) = idx; + aes::encrypt(idx_block, key_bytes); + for (size_t i = 0; i < aes::block_t_size; i++) { + block[i] ^= idx_block[i]; + } + }, + aes::block_t_size / (N * sizeof(uint_t)), + [transform_func] TORCH_CSPRNG_HOST_DEVICE (uint8_t* block) { + const auto n = aes::block_t_size / (N * sizeof(uint_t)); + for (size_t i = 0; i < n; ++i) { + uint64_t vals[N]; + for (size_t j = 0; j < N; ++j) { + vals[j] = (reinterpret_cast(block))[N * i + j]; + } + RNGValues rng(vals); + reinterpret_cast(block)[i] = transform_func(&rng); + } + } + ); +} + +// ==================================================================================================================== + +// A mapping between scalar type and corresponding unsigned integer type of random state sub-block. +// uint64_t for double and long, uint32_t for the rest +template +struct UIntType {}; + +template <> struct UIntType { using type = uint64_t; }; +template <> struct UIntType { using type = uint32_t; }; +template <> struct UIntType { using type = uint16_t; }; +template <> struct UIntType { using type = uint16_t; }; +template <> struct UIntType { using type = uint64_t; }; +template <> struct UIntType { using type = uint32_t; }; +template <> struct UIntType { using type = uint32_t; }; +template <> struct UIntType { using type = uint32_t; }; +template <> struct UIntType { using type = uint32_t; }; +template <> struct UIntType { using type = uint32_t; }; + +// ==================================================== Random ======================================================== + +template +struct RandomKernel { + void operator()(TensorIterator& iter, c10::optional generator) { + const Tensor key_t = aes128_key_tensor(*generator).to(iter.device()); + const auto key = key_t.data_ptr(); + AT_DISPATCH_ALL_TYPES_AND3(at::ScalarType::Bool, at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "random_kernel", [&] { + aes_helper::type>(iter, key, + [] TORCH_CSPRNG_HOST_DEVICE (RNGValues<1>* generator) -> scalar_t { + uniform_int_distribution random; + return random(generator); + } + ); + }); + } +}; + +template +void random_from_to_kernel_helper(TensorIterator& iter, uint64_t range, int64_t base, const uint8_t* key) { + aes_helper(iter, key, + [range, base] TORCH_CSPRNG_HOST_DEVICE (RNGValues<1>* generator) -> scalar_t { + uniform_int_from_to_distribution random(range, base); + return random(generator); + } + ); +} + +template +void random_full_range_kernel_helper(TensorIterator& iter, const uint8_t* key) { + aes_helper(iter, key, + [] TORCH_CSPRNG_HOST_DEVICE (RNGValues<1>* generator) -> scalar_t { + uniform_int_full_range_distribution random; + return random(generator); + } + ); +} + +template +struct RandomFromToKernel { + void operator()(TensorIterator& iter, uint64_t range, int64_t base, c10::optional generator) { + const Tensor key_t = aes128_key_tensor(*generator).to(iter.device()); + const auto key = key_t.data_ptr(); + AT_DISPATCH_ALL_TYPES_AND3(at::ScalarType::Bool, at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "random_from_to_kernel", [&] { + if (( + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value)/* TODO: && range >= 1ULL << 32*/) + { + random_from_to_kernel_helper(iter, range, base, key); + } else { + random_from_to_kernel_helper(iter, range, base, key); + } + }); + } + void operator()(TensorIterator& iter, c10::optional generator) { + const Tensor key_t = aes128_key_tensor(*generator).to(iter.device()); + const auto key = key_t.data_ptr(); + AT_DISPATCH_ALL_TYPES_AND3(at::ScalarType::Bool, at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "random_full_64_bits_range_kernel", [&] { + if (std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value) + { + random_full_range_kernel_helper(iter, key); + } else { + TORCH_CHECK(false, "random_full_64_bits_range_kernel_cuda handles only int64, double, float and bfloat16"); + } + }); + } +}; + +at::Tensor& random_(at::Tensor& self, c10::optional generator) { + return at::native::templates::random_impl(self, generator); +} + +at::Tensor& random_from_to(at::Tensor& self, int64_t from, c10::optional to, c10::optional generator) { + return at::native::templates::random_from_to_impl(self, from, to, generator); +} + +at::Tensor& random_to(at::Tensor& self, int64_t to, c10::optional generator) { + return random_from_to(self, 0, to, generator); +} + +// ==================================================== Uniform ======================================================= + +template +struct UniformKernel { + void operator()(TensorIterator& iter, double from, double to, c10::optional generator) { + const Tensor key_t = aes128_key_tensor(*generator).to(iter.device()); + const auto key = key_t.data_ptr(); + AT_DISPATCH_FLOATING_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "uniform_kernel", [&] { + aes_helper(iter, key, + [from, to] TORCH_CSPRNG_HOST_DEVICE (RNGValues<1>* generator) -> scalar_t { + uniform_real_distribution uniform(from, to); + return static_cast(uniform(generator)); + } + ); + }); + } +}; + +at::Tensor& uniform_(at::Tensor& self, double from, double to, c10::optional generator) { + return at::native::templates::uniform_impl_(self, from, to, generator); +} + +// ==================================================== Normal ======================================================== + +template +struct NormalKernel { + void operator()(Tensor& self, double mean, double std, c10::optional generator) { + auto iter = TensorIterator::nullary_op(self); + const Tensor key_t = aes128_key_tensor(*generator).to(iter.device()); + const auto key = key_t.data_ptr(); + AT_DISPATCH_FLOATING_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "normal_kernel", [&] { + aes_helper(iter, key, + [mean, std] TORCH_CSPRNG_HOST_DEVICE (RNGValues<2>* gen) -> scalar_t { + normal_distribution normal(mean, std); + return static_cast(normal(gen)); + } + ); + }); + } +}; + +at::Tensor& normal_(at::Tensor& self, double mean, double std, c10::optional generator) { + return at::native::templates::normal_impl_(self, mean, std, generator); +} + +at::Tensor& normal_Tensor_float_out(at::Tensor& output, const at::Tensor& mean, double std, c10::optional gen) { + return at::native::templates::normal_out_impl(output, mean, std, gen); +} + +at::Tensor& normal_float_Tensor_out(at::Tensor& output, double mean, const at::Tensor& std, c10::optional gen) { + return at::native::templates::normal_out_impl(output, mean, std, gen); +} + +at::Tensor& normal_Tensor_Tensor_out(at::Tensor& output, const at::Tensor& mean, const at::Tensor& std, c10::optional gen) { + return at::native::templates::normal_out_impl(output, mean, std, gen); +} + +at::Tensor normal_Tensor_float(const at::Tensor& mean, double std, c10::optional gen) { + return at::native::templates::normal_impl(mean, std, gen); +} + +at::Tensor normal_float_Tensor(double mean, const at::Tensor& std, c10::optional gen) { + return at::native::templates::normal_impl(mean, std, gen); +} + +at::Tensor normal_Tensor_Tensor(const at::Tensor& mean, const at::Tensor& std, c10::optional gen) { + return at::native::templates::normal_impl(mean, std, gen); +} + +// ==================================================== Cauchy ======================================================== + +template +struct CauchyKernel { + void operator()(TensorIterator& iter, double median, double sigma, c10::optional generator) { + const Tensor key_t = aes128_key_tensor(*generator).to(iter.device()); + const auto key = key_t.data_ptr(); + AT_DISPATCH_FLOATING_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "cauchy_kernel", [&] { + aes_helper(iter, key, + [median, sigma] TORCH_CSPRNG_HOST_DEVICE (RNGValues<1>* gen) -> scalar_t { + cauchy_distribution cauchy(median, sigma); + return static_cast(cauchy(gen)); + } + ); + }); + } +}; + +at::Tensor& cauchy_(at::Tensor& self, double median, double sigma, c10::optional generator) { + return at::native::templates::cauchy_impl_(self, median, sigma, generator); +} + +// ================================================== LogNormal ======================================================= + +template +struct LogNormalKernel { + void operator()(TensorIterator& iter, double mean, double std, c10::optional generator) { + const Tensor key_t = aes128_key_tensor(*generator).to(iter.device()); + const auto key = key_t.data_ptr(); + AT_DISPATCH_FLOATING_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "log_normal", [&] { + aes_helper(iter, key, + [mean, std] TORCH_CSPRNG_HOST_DEVICE (RNGValues<2>* gen) -> scalar_t { + lognormal_distribution logNormal(mean, std); + return static_cast(logNormal(gen)); + } + ); + }); + } +}; + +at::Tensor& log_normal_(at::Tensor& self, double mean, double std, c10::optional gen) { + return at::native::templates::log_normal_impl_(self, mean, std, gen); +} + +// ================================================== Geometric ======================================================= + +template +struct GeometricKernel { + void operator()(TensorIterator& iter, double p, c10::optional generator) { + const Tensor key_t = aes128_key_tensor(*generator).to(iter.device()); + const auto key = key_t.data_ptr(); + AT_DISPATCH_FLOATING_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "geometric_kernel", [&] { + aes_helper::type, 1>(iter, key, + [p] TORCH_CSPRNG_HOST_DEVICE (RNGValues<1>* gen) -> scalar_t { + geometric_distribution geometric(p); + return geometric(gen); + } + ); + }); + } +}; + +at::Tensor& geometric_(at::Tensor& self, double p, c10::optional gen) { + return at::native::templates::geometric_impl_(self, p, gen); +} + +// ================================================== Exponential ===================================================== + +template +struct ExponentialKernel { + void operator()(TensorIterator& iter, double lambda, c10::optional generator) { + const Tensor key_t = aes128_key_tensor(*generator).to(iter.device()); + const auto key = key_t.data_ptr(); + AT_DISPATCH_FLOATING_TYPES_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, iter.dtype(), "exponential_kernel", [&] { + aes_helper(iter, key, + [lambda] TORCH_CSPRNG_HOST_DEVICE (RNGValues<1>* gen) -> scalar_t { + exponential_distribution exponential(lambda); + return static_cast(exponential(gen)); + } + ); + }); + } +}; + +at::Tensor& exponential_(at::Tensor& self, double lambda, c10::optional gen) { + return at::native::templates::exponential_impl_(self, lambda, gen); +} + +// ================================================Encrypt/Decrypt===================================================== + +void check_cipher(const std::string& cipher, Tensor key) { + if (cipher == "aes128") { + TORCH_CHECK(key.element_size() * key.numel() == 16, "key tensor must have 16 bytes(128 bits)"); + } else { + TORCH_CHECK(false, "encrypt/decrypt supports \"aes128\" cipher, \"", cipher, "\" is not supported."); + } +} + +void aes_ecb_encrypt(Tensor input, Tensor output, uint8_t* key_bytes) { + block_cipher(input, output, + [key_bytes] TORCH_CSPRNG_HOST_DEVICE (int64_t idx, uint8_t* block) -> void { + aes::encrypt(block, key_bytes); + } + ); +} + +void aes_ecb_decrypt(Tensor input, Tensor output, uint8_t* key_bytes) { + block_cipher(input, output, + [key_bytes] TORCH_CSPRNG_HOST_DEVICE (int64_t idx, uint8_t* block) -> void { + aes::decrypt(block, key_bytes); + } + ); +} + +void aes_ctr_encrypt(Tensor input, Tensor output, uint8_t* key_bytes) { + block_cipher(input, output, + [key_bytes] TORCH_CSPRNG_HOST_DEVICE (int64_t idx, uint8_t* block) -> void { + uint8_t idx_block[aes::block_t_size]; + std::memset(&idx_block, 0, aes::block_t_size); + *(reinterpret_cast(idx_block)) = idx; + aes::encrypt(idx_block, key_bytes); + for (size_t i = 0; i < aes::block_t_size; i++) { + block[i] ^= idx_block[i]; + } + } + ); +} + +void aes_ctr_decrypt(Tensor input, Tensor output, uint8_t* key_bytes) { + aes_ctr_encrypt(input, output, key_bytes); +} + +Tensor encrypt(Tensor input, Tensor output, Tensor key, const std::string& cipher, const std::string& mode) { + TORCH_CHECK(input.device() == output.device() && input.device() == key.device(), "input, output and key tensors must have the same device"); + const auto output_size_bytes = output.numel() * output.itemsize(); + const auto input_size_bytes = input.numel() * input.itemsize(); + const auto input_size_bytes_rounded = (input_size_bytes + aes::block_t_size - 1) / aes::block_t_size * aes::block_t_size; + TORCH_CHECK(output_size_bytes == input_size_bytes_rounded, + "output size in bytes(", output_size_bytes, + ") is not equal to input size in bytes rounded to block size(", + input_size_bytes_rounded, ")"); + check_cipher(cipher, key); + const auto key_bytes = reinterpret_cast(key.contiguous().data_ptr()); + if (mode == "ecb") { + aes_ecb_encrypt(input, output, key_bytes); + } else if (mode == "ctr") { + aes_ctr_encrypt(input, output, key_bytes); + } else { + TORCH_CHECK(false, "encrypt/decrypt supports \"ecb\" and \"ctr\" modes, \"", mode, "\" is not supported."); + } + return output; +} + +Tensor decrypt(Tensor input, Tensor output, Tensor key, const std::string& cipher, const std::string& mode) { + TORCH_CHECK(input.device() == output.device() && input.device() == key.device(), "input, output and key tensors must have the same device"); + const auto output_size_bytes = output.numel() * output.itemsize(); + const auto input_size_bytes = input.numel() * input.itemsize(); + const auto diff = input_size_bytes - output_size_bytes; + TORCH_CHECK(0 <= diff && diff < aes::block_t_size, "output tensor size in bytes must be less then or equal to input tensor size in bytes, the difference must be less than block size"); + TORCH_CHECK(input_size_bytes % aes::block_t_size == 0, "input tensor size in bytes must divisible by cipher block size in bytes"); + check_cipher(cipher, key); + const auto key_bytes = reinterpret_cast(key.contiguous().data_ptr()); + if (mode == "ecb") { + aes_ecb_decrypt(input, output, key_bytes); + } else if (mode == "ctr") { + aes_ctr_decrypt(input, output, key_bytes); + } else { + TORCH_CHECK(false, "encrypt/decrypt supports \"ecb\" and \"ctr\" modes, \"", mode, "\" is not supported."); + } + return output; +} diff --git a/torchcsprng/csrc/kernels_commons.h b/torchcsprng/csrc/kernels_commons.h new file mode 100644 index 0000000..f4021a7 --- /dev/null +++ b/torchcsprng/csrc/kernels_commons.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) Meta Platforms, Inc. and its affiliates. All Rights Reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include "macros.h" +#include "block_cipher.h" + +inline uint64_t make64BitsFrom32Bits(uint32_t hi, uint32_t lo) { + return (static_cast(hi) << 32) | lo; +} + +// CUDA CSPRNG is actually CPU generator which is used only to generate a random key on CPU for AES running in a block mode on CUDA +struct CSPRNGGeneratorImpl : public c10::GeneratorImpl { + CSPRNGGeneratorImpl(bool use_rd) : c10::GeneratorImpl{at::Device(at::DeviceType::CPU), at::DispatchKeySet(at::DispatchKey::CustomRNGKeyId)}, use_rd_{use_rd} {} + CSPRNGGeneratorImpl(const std::string& token) : c10::GeneratorImpl{at::Device(at::DeviceType::CPU), at::DispatchKeySet(at::DispatchKey::CustomRNGKeyId)}, use_rd_{true}, rd_{token} {} + CSPRNGGeneratorImpl(uint64_t seed) : c10::GeneratorImpl{at::Device(at::DeviceType::CPU), at::DispatchKeySet(at::DispatchKey::CustomRNGKeyId)}, use_rd_{false}, mt_{static_cast(seed)} { } + ~CSPRNGGeneratorImpl() = default; + uint32_t random() { return use_rd_ ? rd_() : mt_(); } + uint64_t random64() { return use_rd_ ? make64BitsFrom32Bits(rd_(), rd_()) : make64BitsFrom32Bits(mt_(), mt_()); } + + void set_current_seed(uint64_t seed) override { throw std::runtime_error("not implemented"); } + uint64_t current_seed() const override { throw std::runtime_error("not implemented"); } + uint64_t seed() override { throw std::runtime_error("not implemented"); } + CSPRNGGeneratorImpl* clone_impl() const override { throw std::runtime_error("not implemented"); } + + static at::DeviceType device_type() { return at::DeviceType::CPU; } + + void set_state(const c10::TensorImpl& new_state) override { throw std::runtime_error("not implemented"); } + c10::intrusive_ptr get_state() const override { throw std::runtime_error("not implemented"); } + + bool use_rd_; + std::random_device rd_; + std::mt19937 mt_; +}; diff --git a/torchcsprng/csrc/kernels_decls.inc b/torchcsprng/csrc/kernels_decls.inc new file mode 100644 index 0000000..d07aa09 --- /dev/null +++ b/torchcsprng/csrc/kernels_decls.inc @@ -0,0 +1,56 @@ +/* + * Copyright (c) Meta Platforms, Inc. and its affiliates. All Rights Reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +// ==================================================== Random ======================================================== + +at::Tensor& random_(at::Tensor& self, c10::optional generator); + +at::Tensor& random_from_to(at::Tensor& self, int64_t from, optional to, c10::optional generator); + +at::Tensor& random_to(at::Tensor& self, int64_t to, c10::optional generator); + +// ==================================================== Uniform ======================================================= + +at::Tensor& uniform_(at::Tensor& self, double from, double to, c10::optional generator); + +// ==================================================== Normal ======================================================== + +at::Tensor& normal_(at::Tensor& self, double mean, double std, c10::optional generator); + +at::Tensor& normal_Tensor_float_out(at::Tensor& output, const at::Tensor& mean, double std, c10::optional gen); + +at::Tensor& normal_float_Tensor_out(at::Tensor& output, double mean, const at::Tensor& std, c10::optional gen); + +at::Tensor& normal_Tensor_Tensor_out(at::Tensor& output, const at::Tensor& mean, const at::Tensor& std, c10::optional gen); + +at::Tensor normal_Tensor_float(const at::Tensor& mean, double std, c10::optional gen); + +at::Tensor normal_float_Tensor(double mean, const at::Tensor& std, c10::optional gen); + +at::Tensor normal_Tensor_Tensor(const at::Tensor& mean, const at::Tensor& std, c10::optional gen); + +// ==================================================== Cauchy ======================================================== + +at::Tensor& cauchy_(at::Tensor& self, double median, double sigma, c10::optional generator); + +// ================================================== LogNormal ======================================================= + +at::Tensor& log_normal_(at::Tensor& self, double mean, double std, c10::optional gen); + +// ================================================== Geometric ======================================================= + +at::Tensor& geometric_(at::Tensor& self, double p, c10::optional gen); + +// ================================================== Exponential ===================================================== + +at::Tensor& exponential_(at::Tensor& self, double lambda, c10::optional gen); + +// ================================================Encrypt/Decrypt===================================================== + +Tensor encrypt(Tensor input, Tensor output, Tensor key, const std::string& cipher, const std::string& mode); + +Tensor decrypt(Tensor input, Tensor output, Tensor key, const std::string& cipher, const std::string& mode); diff --git a/torch_csprng/csrc/macros.h b/torchcsprng/csrc/macros.h similarity index 51% rename from torch_csprng/csrc/macros.h rename to torchcsprng/csrc/macros.h index aaa6b67..d21b25c 100644 --- a/torch_csprng/csrc/macros.h +++ b/torchcsprng/csrc/macros.h @@ -1,3 +1,10 @@ +/* + * Copyright (c) Meta Platforms, Inc. and its affiliates. All Rights Reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + #pragma once #if defined(__CUDACC__) || defined(__HIPCC__) diff --git a/version.txt b/version.txt index 5d192a8..c181bf5 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -0.1.0a0 \ No newline at end of file +0.3.0a0