From 6899263daf50a9848b47c561a6e08d72991f52dc Mon Sep 17 00:00:00 2001 From: Dmitry Rogozhkin Date: Sun, 22 Dec 2024 21:14:13 -0800 Subject: [PATCH] ci/transformers: add pipeline and trainer tests (#1185) Changes: * Add testing of tests/pipelines * Add testing of tests/trainer * Add printing results summary (to workload summary page) * Add printing failure lines (to workload summary page) Summary page contains sections in this order: * Results summary * Failure lines * Annotations --------- Signed-off-by: Dmitry Rogozhkin --- .github/workflows/_linux_transformers.yml | 168 ++++++++++++++++------ 1 file changed, 123 insertions(+), 45 deletions(-) diff --git a/.github/workflows/_linux_transformers.yml b/.github/workflows/_linux_transformers.yml index 95aee8e7e..65dde1b6d 100644 --- a/.github/workflows/_linux_transformers.yml +++ b/.github/workflows/_linux_transformers.yml @@ -104,7 +104,6 @@ jobs: rm -rf reports cp ${{ github.workspace }}/torch-xpu-ops/.github/scripts/spec.py ./ - name: Report installed versions - id: installed run: | source activate huggingface_transformers_test echo "pip installed packages:" @@ -113,48 +112,6 @@ jobs: lspci -d ::0380 | tee ${{ github.workspace }}/transformers/tests_log/lspci_0380.txt echo "GPU render nodes:" cat /sys/class/drm/render*/device/device | tee ${{ github.workspace }}/transformers/tests_log/device_IDs.txt - # printing annotations for the key packages - echo "### Annotations" >> $GITHUB_STEP_SUMMARY - echo "| | |" >> $GITHUB_STEP_SUMMARY - echo "| --- | --- |" >> $GITHUB_STEP_SUMMARY - echo "| jobs.$GITHUB_JOB.versions.os | $(source /etc/os-release && echo $VERSION_ID) |" >> $GITHUB_STEP_SUMMARY - echo "| jobs.$GITHUB_JOB.versions.linux-kernel | $(uname -r) |" >> $GITHUB_STEP_SUMMARY - echo "| jobs.$GITHUB_JOB.versions.python | $(python --version | cut -f2 -d' ') |" >> $GITHUB_STEP_SUMMARY - packages=" \ - level-zero \ - libigc1 \ - libigc2 \ - libze1 \ - libze-intel-gpu1 \ - intel-i915-dkms \ - intel-level-zero-gpu \ - intel-opencl-icd" - for package in $packages; do - package_version=$(dpkg -l | grep $package | grep ii | head -1 | sed "s/ */ /g" | cut -f3 -d" ") - echo "| jobs.$GITHUB_JOB.versions.$package | $package_version |" >> $GITHUB_STEP_SUMMARY - done - packages="accelerate \ - numpy \ - torch \ - torchaudio \ - torchvision \ - transformers" - for package in $packages; do - package_version=$(python -c "import $package; print($package.__version__)") - echo "| jobs.$GITHUB_JOB.versions.$package | $package_version |" >> $GITHUB_STEP_SUMMARY - done - # printing annotations for GPU cards - var="[$(cat /sys/class/drm/render*/device/vendor)]" - echo "| jobs.$GITHUB_JOB.drm.render_nodes_vendor_ids | $(echo $var | sed "s/ /,/g") |" >> $GITHUB_STEP_SUMMARY - var="[$(cat /sys/class/drm/render*/device/device)]" - echo "| jobs.$GITHUB_JOB.drm.render_nodes_device_ids | $(echo $var | sed "s/ /,/g") |" >> $GITHUB_STEP_SUMMARY - var=$(python -c "import torch; print(torch.version.xpu)") - echo "| jobs.$GITHUB_JOB.torch.version.xpu | $var |" >> $GITHUB_STEP_SUMMARY - var=$(python -c "import torch; print(torch.xpu.device_count())") - echo "| jobs.$GITHUB_JOB.torch.xpu.device_count | $var |" >> $GITHUB_STEP_SUMMARY - # printing annotations with key environment variables - echo "| jobs.$GITHUB_JOB.env.ZE_AFFINITY_MASK | $ZE_AFFINITY_MASK |" >> $GITHUB_STEP_SUMMARY - echo "| jobs.$GITHUB_JOB.env.NEOReadDebugKeys | $NEOReadDebugKeys |" >> $GITHUB_STEP_SUMMARY - name: Sanitry check installed packages run: | source activate huggingface_transformers_test @@ -164,11 +121,132 @@ jobs: pip show torchaudio | grep Version | grep xpu pip show torchvision | grep Version | grep xpu python -c 'import torch; exit(not torch.xpu.is_available())' - - name: Run XPU backbone + - name: Run -k backbone tests run: | source activate huggingface_transformers_test cd transformers - python3 -m pytest -rsf --make-reports=tests_benchmark -k backbone tests + python3 -m pytest -rsf --make-reports=tests_backbone -k backbone tests + - name: Run tests/pipelines + run: | + source activate huggingface_transformers_test + cd transformers + # Some tests are known to fail w/o clear pattern + # TODO: drop ||true after triage and fixes + python3 -m pytest -rsf --make-reports=tests_pipelines tests/pipelines || true + - name: Run tests/trainer + run: | + source activate huggingface_transformers_test + cd transformers + # Excluding tests due to: + # * Some ray tests hang, reason unknown + # * torch.distributed.* not yet supported by XPU + pattern=" \ + not ray and \ + not TestTrainerDistributed and \ + not TestTrainerDistributedXPU and \ + not TestFSDPTrainer" + python3 -m pytest -rsf --make-reports=tests_trainer tests/trainer -k "$pattern" + - name: Print results table + if: ${{ ! cancelled() }} + run: | + # Helper function to return number preceeding given pattern, i.e: + # === 25 failed, 11 warnings, 0 errors === + # Call as follows: + # parse_stat $line "failed" + function parse_stat() { + stat=$(cat $1 | grep $2 | sed "s/.* \([0-9]*\) $2.*/\1/") + if [ -n "$stat" ]; then echo $stat; else echo "0"; fi + } + cd transformers + { + echo "### Results" + echo "| Test group | Errors | Failed | Passed | Skipped |" + echo "| --- | --- | --- | --- | --- |" + for stat in $(find reports -name stats.txt); do + # Each stat.txt is located in: reports/$test_group/stats.txt + test_group=$(echo $stat | cut -f 2 -d/) + # Get failed, passed, skipped, etc. counters + failed=$(parse_stat $stat failed) + passed=$(parse_stat $stat passed) + skipped=$(parse_stat $stat skipped) + warnings=$(parse_stat $stat warnings) + errors=$(parse_stat $stat errors) + echo "| $test_group | $errors | $failed | $passed | $skipped |" + done + } >> $GITHUB_STEP_SUMMARY + - name: Print failure lines + if: ${{ ! cancelled() }} + run: | + cd transformers + { + echo "### Failure lines" + echo "| File | Error | Comment |" + echo "| --- | --- | --- |" + rm -rf _failures.txt + for failure in $(find reports -name failures_line.txt); do + tail -n +2 $failure >> _failures.txt + done + # failures_line.txt file does not have test case information, + # so we can just sort the output and report uniq values + sort _failures.txt | uniq > _failures_uniq.txt + while read line; do + file=$(echo $line | cut -f1 -d" " | sed "s/\(.*\):$/\1/") + error=$(echo $line | cut -f2 -d" " | sed "s/\(.*\):$/\1/") + # Failure comments often contain special characters which complicate + # parsing failure lines. But fortunately we know for sure where comments + # start. So we just output all contents starting from this position and + # wrap everything in
 to avoid collisions with Markdown formatting.
+              comment="
$(echo $line | cut -f3- -d' ' | sed 's/\(.*\):$/\1/')
" + echo "| $file | $error | $comment |" + done <_failures_uniq.txt + } >> $GITHUB_STEP_SUMMARY + - name: Print annotations + if: ${{ ! cancelled() }} + run: | + source activate huggingface_transformers_test + { + echo "### Annotations" + echo "| | |" + echo "| --- | --- |" + echo "| jobs.$GITHUB_JOB.versions.os | $(source /etc/os-release && echo $VERSION_ID) |" + echo "| jobs.$GITHUB_JOB.versions.linux-kernel | $(uname -r) |" + echo "| jobs.$GITHUB_JOB.versions.python | $(python --version | cut -f2 -d' ') |" + packages=" \ + level-zero \ + libigc1 \ + libigc2 \ + libze1 \ + libze-intel-gpu1 \ + intel-i915-dkms \ + intel-level-zero-gpu \ + intel-opencl-icd" + for package in $packages; do + package_version=$(dpkg -l | grep $package | grep ii | head -1 | sed "s/ */ /g" | cut -f3 -d" ") + echo "| jobs.$GITHUB_JOB.versions.$package | $package_version |" + done + packages="accelerate \ + numpy \ + torch \ + torchaudio \ + torchvision \ + transformers" + for package in $packages; do + package_version=$(python -c "import $package; print($package.__version__)" || true) + echo "| jobs.$GITHUB_JOB.versions.$package | $package_version |" + done + # printing annotations for GPU cards + var="[$(cat /sys/class/drm/render*/device/vendor || true)]" + echo "| jobs.$GITHUB_JOB.drm.render_nodes_vendor_ids | $(echo $var | sed 's/ /,/g') |" + var="[$(cat /sys/class/drm/render*/device/device || true)]" + echo "| jobs.$GITHUB_JOB.drm.render_nodes_device_ids | $(echo $var | sed 's/ /,/g') |" + var=$(python -c "import torch; print(torch.version.xpu)" || true) + echo "| jobs.$GITHUB_JOB.torch.version.xpu | $var |" + var=$(python -c "import torch; print(torch.xpu.device_count())" || true) + echo "| jobs.$GITHUB_JOB.torch.xpu.device_count | $var |" + # printing annotations with key environment variables + echo "| jobs.$GITHUB_JOB.env.ZE_AFFINITY_MASK | $ZE_AFFINITY_MASK |" + echo "| jobs.$GITHUB_JOB.env.NEOReadDebugKeys | $NEOReadDebugKeys |" + } >> $GITHUB_STEP_SUMMARY - name: Upload Test log if: ${{ ! cancelled() }} uses: actions/upload-artifact@v4