Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CI] Fix deadlock #395

Merged
merged 2 commits into from
Dec 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 6 additions & 11 deletions .github/scripts/run_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,15 @@
def run_command(cmd):
cmd = " ".join(cmd)
print("Running command: " + cmd)
popen = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, shell=True)
outputs = []
for line in popen.stdout:
print(line, end='')
outputs.append(line)
popen.stdout.close()
ret = popen.wait()
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, shell=True)
stdout, stderr = process.communicate()
ret = process.returncode
if ret:
print('STDERR:')
for line in popen.stderr:
for line in stderr:
print(line, end='')
print(f'Command {cmd} failed with return code {ret}.')
return None
return outputs
raise RuntimeError(f'Command {cmd} failed with return code {ret}.')
return stdout

def get_bench_cmd(run_type, run_id, run_name, run_param_name, dtype):
# Get the name of the benchmark script from DB
Expand Down
3 changes: 2 additions & 1 deletion .github/scripts/start_instances.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ def run_command(cmd):

# e.g., ' 1, 2, ,3,,' -> ['1', '2', '3']
hw_config_ids = os.environ.get('HW_CONFIG').replace(' ', '')
repo_org = os.environ.get('REPO_NAME').split('/')[0]
if hw_config_ids == 'all':
query = (
'SELECT id FROM hardware_config'
Expand All @@ -34,7 +35,7 @@ def run_command(cmd):
query = (
'SELECT cloud_provider_id, instance_id, hardware_config.name as hw_config FROM cloud_instance '
'JOIN hardware_config ON cloud_instance.hardware_config_id = hardware_config.id '
f'WHERE hardware_config_id = {hw_config_id} LIMIT 1'
f'WHERE hardware_config_id = {hw_config_id} AND cloud_instance.org = \'{repo_org}\' LIMIT 1'
)
cursor.execute(query)
rows = cursor.fetchall()
Expand Down
14 changes: 10 additions & 4 deletions .github/workflows/regression.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ on:
description: 'Shut down GPU instances when finished.'
required: true
type: boolean
default: false
default: true
issue_comment:
types: [created]

Expand All @@ -29,8 +29,8 @@ jobs:
start_instances:
if: |
github.event_name == 'workflow_dispatch' ||
github.event_name == 'issue_comment' && github.event.issue.pull_request &&
contains(fromJSON('["MEMBER", "OWNER"]'), github.event.comment.author_association) &&
github.event_name == 'issue_comment' && github.event.issue.pull_request != '' &&
contains(fromJSON('["MEMBER", "OWNER", "COLLABORATOR"]'), github.event.comment.author_association) &&
contains(github.event.comment.body, '$hidet-ci launch')
runs-on: ubuntu-latest
outputs:
Expand All @@ -48,6 +48,7 @@ jobs:
run: timeout 900 python ./.github/scripts/start_instances.py
env:
HW_CONFIG: all
REPO_NAME: ${{ github.repository }}
# TODO: Allow launching only specified GPU instances

- name: Upload run configs
Expand All @@ -59,6 +60,7 @@ jobs:

run_tests:
needs: start_instances
timeout-minutes: 2880
strategy:
matrix:
hw_configs: ${{ fromJSON(needs.start_instances.outputs.hw_configs) }}
Expand Down Expand Up @@ -110,6 +112,7 @@ jobs:
name: run_configs

- name: Run tests
timeout-minutes: 2880
run: |
python hidet/.github/scripts/run_tests.py
env:
Expand Down Expand Up @@ -165,7 +168,10 @@ jobs:
HW_CONFIGS: ${{ needs.start_instances.outputs.hw_configs }}

stop_instances:
if: ${{ inputs.shutdown_instances }}
if: |
github.event_name == 'workflow_dispatch' && inputs.shutdown_instances ||
github.event_name == 'issue_comment' && github.event.issue.pull_request != '' &&
!contains(github.event.comment.body, '--keep')
runs-on: ubuntu-latest
needs: [start_instances, run_tests]
steps:
Expand Down