|
10 | 10 | schedule:
|
11 | 11 | - cron: '0 9 * * *'
|
12 | 12 |
|
| 13 | +# TODO: port this to integration tests in 0.31.0 and then delete this file |
13 | 14 | jobs:
|
14 |
| - create-runners: |
15 |
| - runs-on: [self-hosted, scheduler] |
| 15 | + fast-fail: |
| 16 | + runs-on: ubuntu-latest |
16 | 17 | steps:
|
17 |
| - - name: Create new G6 instance |
18 |
| - id: create_gpu1 |
| 18 | + - name: Fail if run on master branch |
| 19 | + id: fast_fail |
| 20 | + if: github.ref == 'refs/heads/master' |
19 | 21 | run: |
|
20 |
| - cd /home/ubuntu/djl_benchmark_script/scripts |
21 |
| - token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \ |
22 |
| - https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \ |
23 |
| - --fail \ |
24 |
| - | jq '.token' | tr -d '"' ) |
25 |
| - ./start_instance.sh action_g6 $token djl-serving |
26 |
| - - name: Create new G6 instance |
27 |
| - id: create_gpu2 |
28 |
| - run: | |
29 |
| - cd /home/ubuntu/djl_benchmark_script/scripts |
30 |
| - token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \ |
31 |
| - https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \ |
32 |
| - --fail \ |
33 |
| - | jq '.token' | tr -d '"' ) |
34 |
| - ./start_instance.sh action_g6 $token djl-serving |
35 |
| - - name: Create new Inf2.24xl instance |
36 |
| - id: create_inf2 |
37 |
| - run: | |
38 |
| - cd /home/ubuntu/djl_benchmark_script/scripts |
39 |
| - token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \ |
40 |
| - https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \ |
41 |
| - --fail \ |
42 |
| - | jq '.token' | tr -d '"' ) |
43 |
| - ./start_instance.sh action_inf2 $token djl-serving |
44 |
| - outputs: |
45 |
| - gpu_instance_id_1: ${{ steps.create_gpu1.outputs.action_g6_instance_id }} |
46 |
| - gpu_instance_id_2: ${{ steps.create_gpu2.outputs.action_g6_instance_id }} |
47 |
| - inf2_instance_id: ${{ steps.create_inf2.outputs.action_inf2_instance_id }} |
48 |
| - |
49 |
| - test: |
50 |
| - runs-on: [ "${{ matrix.test.instance }}" ] |
51 |
| - timeout-minutes: 90 |
52 |
| - needs: create-runners |
53 |
| - strategy: |
54 |
| - fail-fast: false |
55 |
| - matrix: |
56 |
| - test: |
57 |
| - - test: TestCorrectnessTrtLlm |
58 |
| - instance: g6 |
59 |
| - - test: TestCorrectnessLmiDist |
60 |
| - instance: g6 |
61 |
| - - test: TestCorrectnessNeuronx |
62 |
| - instance: inf2 |
63 |
| - steps: |
64 |
| - - uses: actions/checkout@v4 |
65 |
| - - name: Clean env |
66 |
| - run: | |
67 |
| - yes | docker system prune -a --volumes |
68 |
| - sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/ |
69 |
| - echo "wait dpkg lock..." |
70 |
| - while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done |
71 |
| - - name: Set up JDK 17 |
72 |
| - uses: actions/setup-java@v4 |
73 |
| - with: |
74 |
| - distribution: 'corretto' |
75 |
| - java-version: 17 |
76 |
| - - name: Set up Python3 |
77 |
| - uses: actions/setup-python@v5 |
78 |
| - with: |
79 |
| - python-version: '3.10.x' |
80 |
| - - name: Install pip dependencies |
81 |
| - run: pip3 install pytest requests "numpy<2" pillow huggingface_hub |
82 |
| - - name: Install torch |
83 |
| - # Use torch to get cuda capability of current device to selectively run tests |
84 |
| - # Torch version doesn't really matter that much |
85 |
| - run: | |
86 |
| - pip3 install torch==2.3.0 |
87 |
| - - name: Install awscurl |
88 |
| - working-directory: tests/integration |
89 |
| - run: | |
90 |
| - curl -OL https://publish.djl.ai/awscurl/awscurl |
91 |
| - chmod +x awscurl |
92 |
| - mkdir outputs |
93 |
| - - name: Test |
94 |
| - working-directory: tests/integration |
95 |
| - env: |
96 |
| - TEST_DJL_VERSION: ${{ inputs.djl-version }} |
97 |
| - run: | |
98 |
| - python -m pytest -k ${{ matrix.test.test }} tests.py |
99 |
| - - name: Cleanup |
100 |
| - working-directory: tests/integration |
101 |
| - run: | |
102 |
| - rm -rf outputs |
103 |
| - rm awscurl |
104 |
| - - name: On Failure |
105 |
| - if: ${{ failure() }} |
106 |
| - working-directory: tests/integration |
107 |
| - run: | |
108 |
| - for file in outputs/*; do if [ -f "$file" ]; then echo "Contents of $file:"; cat "$file"; echo; fi; done |
109 |
| - sudo rm -rf outputs && sudo rm -rf models |
110 |
| - rm awscurl |
111 |
| - docker rm -f $(docker ps -aq) || true |
112 |
| - - name: Upload test logs |
113 |
| - if: ${{ always() }} |
114 |
| - uses: actions/upload-artifact@v4 |
115 |
| - with: |
116 |
| - name: test-${{ matrix.test.test }}-logs |
117 |
| - path: tests/integration/all_logs/ |
118 |
| - |
119 |
| - stop-runners: |
120 |
| - if: always() |
121 |
| - runs-on: [ self-hosted, scheduler ] |
122 |
| - needs: [ create-runners, test] |
123 |
| - steps: |
124 |
| - - name: Stop all instances |
125 |
| - run: | |
126 |
| - cd /home/ubuntu/djl_benchmark_script/scripts |
127 |
| - instance_id=${{ needs.create-runners.outputs.gpu_instance_id_1 }} |
128 |
| - ./stop_instance.sh $instance_id |
129 |
| - instance_id=${{ needs.create-runners.outputs.gpu_instance_id_2 }} |
130 |
| - ./stop_instance.sh $instance_id |
131 |
| - instance_id=${{ needs.create-runners.outputs.inf2_instance_id }} |
132 |
| - ./stop_instance.sh $instance_id |
| 22 | + echo "Fast fail" |
| 23 | + exit 1 |
0 commit comments