.github/workflows/win-cuda-x64-build.yml

name: "Windows CUDA x64 Build"
on:
  workflow_dispatch:
  push:
    branches:
    - main
    - rel-*
  pull_request:

concurrency:
  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
  cancel-in-progress: true

env:
  AZCOPY_AUTO_LOGIN_TYPE: MSI
  AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4
  cuda_dir: "${{ github.workspace }}\\cuda_sdk"
  cuda_version: "11.8"
  CUDA_PATH: ${{ github.workspace }}\\cuda_sdk\\v11.8
  binaryDir: 'build/cuda'
  ORT_NIGHTLY_REST_API: "https://feeds.dev.azure.com/aiinfra/PublicPackages/_apis/packaging/Feeds/ORT-Nightly/packages?packageNameQuery=Microsoft.ML.OnnxRuntime.Gpu.Windows&api-version=6.0-preview.1"
  ORT_PACKAGE_NAME: "Microsoft.ML.OnnxRuntime.Gpu.Windows"

jobs:
  windows-cuda-x64-build:
    runs-on: [ "self-hosted", "1ES.Pool=onnxruntime-genai-Win2022-GPU-A10" ]
    steps:
    - name: Checkout OnnxRuntime GenAI repo
      uses: actions/checkout@v4
      with:
        submodules: true

    - uses: actions/setup-python@v5
      with:
        python-version: '3.11.x'
        architecture: 'x64'

    - name: Download cuda
      run: |
        azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/cuda_sdk/v${{ env.cuda_version }}" ${{ env.cuda_dir}}

    - uses: actions/setup-dotnet@v4
      with:
        dotnet-version: '8.0.x'

    - name : Install jq and curl
      run: |
        choco install -y jq curl

    - name: Download OnnxRuntime Nightly
      shell: pwsh
      run: |
        $ORT_NIGHTLY_VERSION=$(curl -s "${{ env.ORT_NIGHTLY_REST_API }}" | jq -r '.value[0].versions[0].normalizedVersion')
        echo "$ORT_NIGHTLY_VERSION" 
        "ORT_NIGHTLY_VERSION=$ORT_NIGHTLY_VERSION" | Out-File -FilePath $env:GITHUB_ENV -Append
        nuget install ${{ env.ORT_PACKAGE_NAME }} -version $ORT_NIGHTLY_VERSION -ExcludeVersion -NonInteractive

    - run: Get-ChildItem  ${{ env.ORT_PACKAGE_NAME }} -Recurse
      continue-on-error: true

    - name: Extract OnnxRuntime library and header files
      run: |
        mkdir ort/lib
        move ${{ env.ORT_PACKAGE_NAME }}/buildTransitive/native/include ort/
        move ${{ env.ORT_PACKAGE_NAME }}/runtimes/win-x64/native/* ort/lib/    

    - name: Configure CMake
      run: |
        cmake --preset windows_x64_cuda_release -T cuda=${{ env.cuda_dir }}\\v${{ env.cuda_version }} -DTEST_PHI2=False

    - name: Build with CMake
      run: |
        cmake --build --preset windows_x64_cuda_release --parallel

    - name: Add CUDA to PATH
      run: |
        echo "${{ env.cuda_dir }}\\v${{ env.cuda_version }}\\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append

    - name: Build the C# API and Run the C# Tests
      run: |
        cd test\csharp
        dotnet test /p:Configuration=release /p:NativeBuildOutputDir="$env:GITHUB_WORKSPACE\$env:binaryDir\Release"

    - name: Install the Python Wheel and Test Dependencies
      run: |
        python -m pip install (Get-ChildItem ("$env:binaryDir\wheel\*.whl"))
        python -m pip install -r test\python\requirements-nightly-cpu.txt

    - name: Get HuggingFace Token
      run: |
        az login --identity --username 63b63039-6328-442f-954b-5a64d124e5b4
        $HF_TOKEN = (az keyvault secret show --vault-name anubissvcsecret --name ANUBIS-HUGGINGFACE-TOKEN --query value)
        Write-Output "::add-mask::$HF_TOKEN"
        Add-Content -Path $env:GITHUB_ENV -Value "HF_TOKEN=$HF_TOKEN"

    - name: Run the Python Tests
      run: |
        python test/python/test_onnxruntime_genai.py --cwd "test\python" --test_models "test\test_models"


    - name: Verify Build Artifacts
      if: always()
      continue-on-error: true
      run: |
        
        Get-ChildItem -Path $env:GITHUB_WORKSPACE\$env:binaryDir -Recurse

    - name: Prepend CUDA to PATH and Run tests
      run: |
        $env:PATH = "${{ env.cuda_dir }}\\v${{ env.cuda_version }}\\bin;" + $env:PATH 
        echo "Current PATH variable is: $env:PATH" 
        .\build\cuda\test\Release\unit_tests.exe