Skip to content

gpu1

gpu1 #50

Workflow file for this run

name: Test branch or PR
on:
pull_request:
types: [opened, review_requested, ready_for_review, synchronize, unlocked]
merge_group:
types: [checks_requested]
push:
branches:
- 'dev_test_workflow'
workflow_dispatch:
concurrency:
group: comodels-test-${{ github.ref }}
cancel-in-progress: true
env:
CO_MODELS_SRC: CoModels
TEST_DIR: test_dir
TEST_IMG_TAG: comodels_test:latest
TEST_CONTAINER_NAME: co_models_test
jobs:
test_job:
name: Collect information about PR and source
runs-on: [self-hosted]
steps:
- name: Set up MODEL_ARCH
if: github.event_name == 'push'
run: |
# use resnet50 for debug
echo "MODEL_ARCH=resnet50" >> $GITHUB_ENV
- name: Set up MODEL_ARCH from PR
if: github.event.pull_request.draft == false && github.base_ref == 'master'
run: |
model_arch=${{ github.event.pull_request.title }}
# use pr title as the MODEL_ARCH
echo "MODEL_ARCH=$model_arch" >> $GITHUB_ENV
- name: Set up MODEL_ARCH from workflow_dispatch branch
if: github.event_name == 'workflow_dispatch'
run: |
# use branch name as the MODEL_ARCH
model_arch=${{ github.ref_name }}
echo "MODEL_ARCH=$model_arch" >> $GITHUB_ENV
- name: Checkout CoModels branch
uses: actions/checkout@v4
with:
ref: ${{ env.BRANCH }}
path: ${{ env.CO_MODELS_SRC}}
- name: Prepare test directory
run: |
model_dir='${{ env.CO_MODELS_SRC }}/cv/classification/${{ env.MODEL_ARCH }}'
cp -rL $model_dir ${{ env.TEST_DIR }}
- name: Prepare Container
run: |
docker rm -f ${{ env.TEST_CONTAINER_NAME }} || true
docker run --gpus=all -d --rm --privileged --shm-size=8g \
--pids-limit 2000 \
--cap-add=SYS_PTRACE --security-opt seccomp=unconfined \
-v /share_nfs/dataset/ImageNet:/data/dataset/ImageNet \
-v /share_nfs/model_zoo/flowvision_cache:/oneflow/flowvision_cache \
-e FLOWVISION_CACHE=/oneflow/flowvision_cache \
-v $(pwd)/${{ env.TEST_DIR }}:/workspace/${{ env.TEST_DIR }} \
-w /workspace/${{ env.TEST_DIR }} \
--name ${{ env.TEST_CONTAINER_NAME }} \
${{ env.TEST_IMG_TAG }} \
sleep 5400
- name: run inference
run: |
docker exec ${{ env.TEST_CONTAINER_NAME }} nvidia-smi
#docker exec ${{ env.TEST_CONTAINER_NAME }} bash ./infer.sh
- name: run training
run: |
docker exec ${{ env.TEST_CONTAINER_NAME }} sed -i 's/main.py/main.py --epochs 1/g' train.sh
docker exec ${{ env.TEST_CONTAINER_NAME }} sed -i 's/GPU_NUMS=[0-9]*/GPU_NUMS=2/g' train.sh
docker exec ${{ env.TEST_CONTAINER_NAME }} bash ./train.sh