neuralmagic · dbarbuzzi · Jul 2, 2024 · Jul 2, 2024 · Jul 2, 2024 · Jul 3, 2024
diff --git a/.github/actions/nm-benchmark/action.yml b/.github/actions/nm-benchmark/action.yml
@@ -27,7 +27,9 @@ runs:
         VENV="${{ inputs.venv }}-${COMMIT:0:7}"
         source $(pyenv root)/versions/${{ inputs.python }}/envs/${VENV}/bin/activate
       fi
+      echo "::group::install requirements"
       pip3 install -r neuralmagic/benchmarks/requirements-benchmark.txt
+      echo "::endgroup::"
       SUCCESS=0
       .github/scripts/nm-run-benchmarks.sh ${{ inputs.benchmark_config_list_file }} ${{ inputs.output_directory }} || SUCCESS=$?
       echo "test=${SUCCESS}" >> "$GITHUB_OUTPUT"

diff --git a/.github/actions/nm-github-action-benchmark/action.yml b/.github/actions/nm-github-action-benchmark/action.yml
@@ -5,38 +5,27 @@ inputs:
     description: "Name of the benchmark. Metrics are grouped by benchmark names. github_action_benchmark alert-trigger looks for the previous benchmark value in the benchmark-name group on the previous commit"
   gh_action_benchmark_json_file_path:
     description: "Path to the benchmark json file to upload (Note that this JSON should be in a `github-action-benchmark` consumable format - This is typically the output of  neuralmagic/benchmarks/scripts/logging/gha_benchmark_logging.py)"
-    type: string
     required: true
   gh_action_benchmark_tool:
-    description: "A string that is input to the `tool` argument of  `github-action-benchmark`. This should be either `customBiggerIsBetter` or `customSmallerIsBetter`"
-    type: choice
-    options:
-        - 'customBiggerIsBetter'
-        - 'customSmallerIsBetter'
+    description: "A string that is input to the `tool` argument of `github-action-benchmark`. This should be either `customBiggerIsBetter` or `customSmallerIsBetter`"
     required: true
   gh_pages_branch:
     description: "Github branch where the `github-action-benchmark` creates its index.html and data.js"
-    type: string
+    required: true
+  benchmark_data_dir_path:
+    description: "Path to a directory that contains benchmark files on the GitHub pages branch."
     required: true
   auto_push:
-    description: "When set to true, pushes the benchmark results to the `nm-gh-pages` branch."
-    type: choice
-    options:
-        - 'true'
-        - 'false'
+    description: "When set to true, pushes the benchmark results to the `nm-gh-pages` branch. This should be either 'true' or 'false'."
     required: true
   reporting_enabled:
-    description: "When set to true, if there is a regression, do 3 things. 1. Mark the workflow as failed. 2. Add commit comments"
-    type: choice
-    options:
-        - 'true'
-        - 'false'
+    description: "When set to true, if there is a regression, do 2 things: 1. Mark the workflow as failed, 2. Add commit comments. This should be either 'true' or 'false'."
     required: true
   github_token:
     description: "secrets.GITHUB_TOKEN from the caller"
     required: true
-          
-runs: 
+
+runs:
   using: composite
   steps:
     # A previous invocation of this action may have left the github pages branch in an
@@ -53,17 +42,18 @@ runs:
         output-file-path: ${{ inputs.gh_action_benchmark_json_file_path }}
         tool: ${{ inputs.gh_action_benchmark_tool }}
         gh-pages-branch: ${{ inputs.gh_pages_branch }}
-        # Token required for pushing to nm-gh-pages branch 
+        benchmark-data-dir-path: ${{ inputs.benchmark_data_dir_path }}
+        # Token required for pushing to nm-gh-pages branch
         github-token: ${{ inputs.github_token }}
         # Push and deploy to Github pages automatically
         auto-push: ${{ inputs.auto_push == 'true' }}
         # Add a commit comment comparing the current benchmark with the previous.
         comment-always: ${{ inputs.reporting_enabled == 'true' }}
-        # Create an alert when some value has regressed more than 10% 
+        # Create an alert when some value has regressed more than 10%
         alert-threshold: "110%"
         # Mark the workflow as a failure when some alert is triggered
         fail-on-alert: ${{ inputs.reporting_enabled == 'true' }}
         # Add a commit comment describing what triggered the alert
         comment-on-alert: ${{ inputs.reporting_enabled == 'true' }}
-        # TODO (varun): Is this a reasonable number ? 
+        # TODO (varun): Is this a reasonable number ?
         max-items-in-chart: 50
diff --git a/.github/actions/nm-produce-gha-benchmark-json/action.yml b/.github/actions/nm-produce-gha-benchmark-json/action.yml
@@ -4,14 +4,17 @@ inputs:
   vllm_benchmark_jsons_path:
     description: 'Path to a directory containing a list of BenchmarkResult JSONs'
     required: true
-  bigger_is_better_output_file_path:
-    description: 'Path to a file where the GHA CustomBiggerIsBetter JSON is to be stored'
+  output_directory:
+    description: 'Path to directory where JSON files will be stored'
     required: true
-  smaller_is_better_output_file_path:
-    description: 'Path to a file where the GHA CustomSmallerIsBetter JSON is to be stored'
+  bigger_is_better_output_filename:
+    description: 'Name of file where the GHA CustomBiggerIsBetter JSON is to be stored'
     required: true
-  observation_metrics_output_file_path:
-    description: 'Path to a file where metrics that we only want to observe are stored'
+  smaller_is_better_output_filename:
+    description: 'Name of file where the GHA CustomSmallerIsBetter JSON is to be stored'
+    required: true
+  observation_metrics_output_filename:
+    description: 'Name of file where metrics that we only want to observe are stored'
   python:
     description: 'python version, e.g. 3.10.12'
     required: true
@@ -29,7 +32,12 @@ runs:
         source $(pyenv root)/versions/${{ inputs.python }}/envs/${VENV}/bin/activate
       fi
       SUCCESS=0
-      python3 -m neuralmagic.benchmarks.scripts.logging.gha_benchmark_logging -i ${{inputs.vllm_benchmark_jsons_path}} --bigger-is-better-metrics-output-file-path ${{ inputs.bigger_is_better_output_file_path }} --smaller-is-better-metrics-output-file-path ${{ inputs.smaller_is_better_output_file_path }} --observation-metrics-output-file-path ${{ inputs.observation_metrics_output_file_path }} || SUCCESS=$?
+      python3 -m neuralmagic.benchmarks.scripts.logging.gha_benchmark_logging \
+        -i ${{inputs.vllm_benchmark_jsons_path}} \
+        --output-directory ${{ inputs.output_directory }} \
+        --bigger-is-better-metrics-output-filename ${{ inputs.bigger_is_better_output_filename }} \
+        --smaller-is-better-metrics-output-filename ${{ inputs.smaller_is_better_output_filename }} \
+        --observation-metrics-output-filename ${{ inputs.observation_metrics_output_filename }} || SUCCESS=$?
       echo "test=${SUCCESS}" >> "$GITHUB_OUTPUT"
       exit ${SUCCESS}
     shell: bash
diff --git a/.github/workflows/nm-benchmark.yml b/.github/workflows/nm-benchmark.yml
@@ -164,12 +164,13 @@ jobs:
         uses: ./.github/actions/nm-produce-gha-benchmark-json
         with:
           vllm_benchmark_jsons_path: benchmark-results
+          output_directory: gh-action-benchmark-jsons
           # Metrics that are "better" when the value is greater are stored here
-          bigger_is_better_output_file_path: gh-action-benchmark-jsons/bigger_is_better.json
+          bigger_is_better_output_filename: bigger_is_better.json
           # Metrics that are "better" when the value is smaller are stored here
-          smaller_is_better_output_file_path: gh-action-benchmark-jsons/smaller_is_better.json
+          smaller_is_better_output_filename: smaller_is_better.json
           # Metrics that we only want to observe are stored here
-          observation_metrics_output_file_path: gh-action-benchmark-jsons/observation_metrics.json
+          observation_metrics_output_filename: observation_metrics.json
           python: ${{ inputs.python }}
           venv:
 
@@ -222,45 +223,94 @@ jobs:
       - name: display structure of downloaded files
         run: ls -R ./downloads
 
-      - name: nm-github-action-benchmark(bigger_is_better.json)
+      - name: nm-github-action-benchmark(serving/bigger_is_better.json)
         # Absence of the file indicates that there were no "bigger_is_better" metrics
-        if: (success() || failure()) && (hashFiles('downloads/bigger_is_better.json') != '')
+        if: (success() || failure()) && (hashFiles('downloads/benchmark_serving/bigger_is_better.json') != '')
         uses: ./.github/actions/nm-github-action-benchmark
         with:
           gh_action_benchmark_name: "bigger_is_better"
-          gh_action_benchmark_json_file_path:  "downloads/bigger_is_better.json"
+          gh_action_benchmark_json_file_path: "downloads/benchmark_serving/bigger_is_better.json"
           gh_action_benchmark_tool: "customBiggerIsBetter"
           gh_pages_branch: "nm-gh-pages"
+          benchmark_data_dir_path: "dev/bench/serving"
           auto_push: ${{ inputs.push_benchmark_results_to_gh_pages }}
           reporting_enabled: "true"
           github_token: ${{ secrets.GITHUB_TOKEN }}
 
-      - name: nm-github-action-benchmark(smaller_is_better.json)
+      - name: nm-github-action-benchmark(serving/smaller_is_better.json)
         # Absence of the file indicates that there were no "smaller_is_better" metrics
-        if: (success() || failure()) && (hashFiles('downloads/smaller_is_better.json') != '')
+        if: (success() || failure()) && (hashFiles('downloads/benchmark_serving/smaller_is_better.json') != '')
         uses: ./.github/actions/nm-github-action-benchmark
         with:
           gh_action_benchmark_name: "smaller_is_better"
-          gh_action_benchmark_json_file_path:  "downloads/smaller_is_better.json"
+          gh_action_benchmark_json_file_path: "downloads/benchmark_serving/smaller_is_better.json"
           gh_action_benchmark_tool: "customSmallerIsBetter"
           gh_pages_branch: "nm-gh-pages"
+          benchmark_data_dir_path: "dev/bench/serving"
           auto_push: ${{ inputs.push_benchmark_results_to_gh_pages }}
           reporting_enabled: "true"
           github_token: ${{ secrets.GITHUB_TOKEN }}
 
-      - name: nm-github-action-benchmark(observation_metrics.json)
+      - name: nm-github-action-benchmark(serving/observation_metrics.json)
         # Absence of the file indicates that there were no "observation" metrics
-        if: (success() || failure()) && (hashFiles('downloads/observation_metrics.json') != '')
+        if: (success() || failure()) && (hashFiles('downloads/benchmark_serving/observation_metrics.json') != '')
         uses: ./.github/actions/nm-github-action-benchmark
         with:
           gh_action_benchmark_name: "observation_metrics"
-          gh_action_benchmark_json_file_path:  "downloads/observation_metrics.json"
+          gh_action_benchmark_json_file_path: "downloads/benchmark_serving/observation_metrics.json"
           # `github-action-benchmark` expects a tool name that is either
           # "customBiggerIsBetter" or "customSmallerIsBetter". This is a hack to
           # work around that. Since we mark the action to not report failures, this
           # is fine.
           gh_action_benchmark_tool: "customBiggerIsBetter"
           gh_pages_branch: "nm-gh-pages"
+          benchmark_data_dir_path: "dev/bench/serving"
+          auto_push: ${{ inputs.push_benchmark_results_to_gh_pages }}
+          reporting_enabled: "false"
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: nm-github-action-benchmark(throughput/bigger_is_better.json)
+        # Absence of the file indicates that there were no "bigger_is_better" metrics
+        if: (success() || failure()) && (hashFiles('downloads/benchmark_throughput/bigger_is_better.json') != '')
+        uses: ./.github/actions/nm-github-action-benchmark
+        with:
+          gh_action_benchmark_name: "bigger_is_better"
+          gh_action_benchmark_json_file_path: "downloads/benchmark_throughput/bigger_is_better.json"
+          gh_action_benchmark_tool: "customBiggerIsBetter"
+          gh_pages_branch: "nm-gh-pages"
+          benchmark_data_dir_path: "dev/bench/throughput"
+          auto_push: ${{ inputs.push_benchmark_results_to_gh_pages }}
+          reporting_enabled: "true"
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: nm-github-action-benchmark(throughput/smaller_is_better.json)
+        # Absence of the file indicates that there were no "smaller_is_better" metrics
+        if: (success() || failure()) && (hashFiles('downloads/benchmark_throughput/smaller_is_better.json') != '')
+        uses: ./.github/actions/nm-github-action-benchmark
+        with:
+          gh_action_benchmark_name: "smaller_is_better"
+          gh_action_benchmark_json_file_path: "downloads/benchmark_throughput/smaller_is_better.json"
+          gh_action_benchmark_tool: "customSmallerIsBetter"
+          gh_pages_branch: "nm-gh-pages"
+          benchmark_data_dir_path: "dev/bench/throughput"
+          auto_push: ${{ inputs.push_benchmark_results_to_gh_pages }}
+          reporting_enabled: "true"
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: nm-github-action-benchmark(throughput/observation_metrics.json)
+        # Absence of the file indicates that there were no "observation" metrics
+        if: (success() || failure()) && (hashFiles('downloads/benchmark_throughput/observation_metrics.json') != '')
+        uses: ./.github/actions/nm-github-action-benchmark
+        with:
+          gh_action_benchmark_name: "observation_metrics"
+          gh_action_benchmark_json_file_path: "downloads/benchmark_throughput/observation_metrics.json"
+          # `github-action-benchmark` expects a tool name that is either
+          # "customBiggerIsBetter" or "customSmallerIsBetter". This is a hack to
+          # work around that. Since we mark the action to not report failures, this
+          # is fine.
+          gh_action_benchmark_tool: "customBiggerIsBetter"
+          gh_pages_branch: "nm-gh-pages"
+          benchmark_data_dir_path: "dev/bench/throughput"
           auto_push: ${{ inputs.push_benchmark_results_to_gh_pages }}
           reporting_enabled: "false"
           github_token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/neuralmagic/benchmarks/run_benchmarks.py b/neuralmagic/benchmarks/run_benchmarks.py
@@ -9,6 +9,8 @@
 def run(config_file_path: Path, output_directory: Path) -> None:
 
     for config in benchmark_configs(config_file_path):
+        output_directory = output_directory.joinpath(config.script_name)
+        output_directory.mkdir(parents=True, exist_ok=True)
         if config.script_name == "benchmark_serving":
             run_benchmark_serving_script(config, output_directory)
             continue

diff --git a/neuralmagic/benchmarks/scripts/logging/gha_benchmark_logging.py b/neuralmagic/benchmarks/scripts/logging/gha_benchmark_logging.py
@@ -106,10 +106,12 @@ def process(json_file_path: Path) -> Iterable[Type_Record_T]:
             GHARecord.from_metric_template(metric, extra=hover_data)), metrics)
 
 
-def main(args: argparse.Namespace) -> None:
-    input_directory = Path(args.input_directory)
+def process_folder(input_directory: Path):
+    print(f"processing folder : {input_directory}")
 
-    json_file_paths = input_directory.glob('*.json')
+    json_file_paths = list(input_directory.glob('*.json'))
+    if not json_file_paths:
+        return
 
     type_records: List[Type_Record_T] = list(
         reduce(lambda whole, part: whole + part,
@@ -142,15 +144,30 @@ def filter_and_dump_if_non_empty(type_records: List[Type_Record_T],
         with open(output_path, 'w+') as f:
             json.dump(gha_record_dicts, f, indent=4)
 
+    output_directory = Path(args.output_directory).joinpath(
+        input_directory.name)
     filter_and_dump_if_non_empty(
         type_records, BenchmarkMetricType.BiggerIsBetter,
-        Path(args.bigger_is_better_metrics_output_file_path))
+        output_directory.joinpath(
+            args.bigger_is_better_metrics_output_filename))
     filter_and_dump_if_non_empty(
         type_records, BenchmarkMetricType.SmallerIsBetter,
-        Path(args.smaller_is_better_metrics_output_file_path))
+        output_directory.joinpath(
+            args.smaller_is_better_metrics_output_filename))
     filter_and_dump_if_non_empty(
         type_records, BenchmarkMetricType.Observation,
-        Path(args.observation_metrics_output_file_path))
+        output_directory.joinpath(args.observation_metrics_output_filename))
+
+
+def main(args: argparse.Namespace) -> None:
+    groups = ["benchmark_serving", "benchmark_throughput"]
+    input_base_directory = Path(args.input_directory)
+    input_directories = [
+        input_base_directory.joinpath(group) for group in groups
+    ]
+
+    for input_directory in input_directories:
+        process_folder(input_directory)
 
 
 if __name__ == '__main__':
@@ -167,28 +184,35 @@ def filter_and_dump_if_non_empty(type_records: List[Type_Record_T],
         "--input-directory",
         required=True,
         type=str,
-        help="""Path to the directory containing BenchmarkResult 
-                jsons. This is typically the output directory passed 
-                to the benchmark runner scripts like 
+        help="""Path to the directory containing BenchmarkResult
+                jsons. This is typically the output directory passed
+                to the benchmark runner scripts like
                 neuralmagic/benchmarks/run_benchmarks.py.""")
 
-    parser.add_argument("--bigger-is-better-metrics-output-file-path",
+    parser.add_argument(
+        "-o",
+        "--output-directory",
+        required=True,
+        type=str,
+        help="Path to directory where JSON files will be stored")
+
+    parser.add_argument("--bigger-is-better-metrics-output-filename",
                         required=True,
                         type=str,
                         help="""
         An output file path, where the BenchmarkMetricType
         BiggerIsBetter metrics are stored.
         """)
 
-    parser.add_argument("--smaller-is-better-metrics-output-file-path",
+    parser.add_argument("--smaller-is-better-metrics-output-filename",
                         required=True,
                         type=str,
                         help="""
         An output file path, where the BenchmarkMetricType
         SmallerIsBetter metrics are stored.
         """)
 
-    parser.add_argument("--observation-metrics-output-file-path",
+    parser.add_argument("--observation-metrics-output-filename",
                         required=True,
                         type=str,
                         help="""