Skip to content
This repository has been archived by the owner on Oct 11, 2024. It is now read-only.

Benchmarking separation #362

Closed
wants to merge 11 commits into from
2 changes: 2 additions & 0 deletions .github/actions/nm-benchmark/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@ runs:
VENV="${{ inputs.venv }}-${COMMIT:0:7}"
source $(pyenv root)/versions/${{ inputs.python }}/envs/${VENV}/bin/activate
fi
echo "::group::install requirements"
pip3 install -r neuralmagic/benchmarks/requirements-benchmark.txt
echo "::endgroup::"
SUCCESS=0
.github/scripts/nm-run-benchmarks.sh ${{ inputs.benchmark_config_list_file }} ${{ inputs.output_directory }} || SUCCESS=$?
echo "test=${SUCCESS}" >> "$GITHUB_OUTPUT"
Expand Down
34 changes: 12 additions & 22 deletions .github/actions/nm-github-action-benchmark/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,38 +5,27 @@ inputs:
description: "Name of the benchmark. Metrics are grouped by benchmark names. github_action_benchmark alert-trigger looks for the previous benchmark value in the benchmark-name group on the previous commit"
gh_action_benchmark_json_file_path:
description: "Path to the benchmark json file to upload (Note that this JSON should be in a `github-action-benchmark` consumable format - This is typically the output of neuralmagic/benchmarks/scripts/logging/gha_benchmark_logging.py)"
type: string
required: true
gh_action_benchmark_tool:
description: "A string that is input to the `tool` argument of `github-action-benchmark`. This should be either `customBiggerIsBetter` or `customSmallerIsBetter`"
type: choice
options:
- 'customBiggerIsBetter'
- 'customSmallerIsBetter'
description: "A string that is input to the `tool` argument of `github-action-benchmark`. This should be either `customBiggerIsBetter` or `customSmallerIsBetter`"
required: true
gh_pages_branch:
description: "Github branch where the `github-action-benchmark` creates its index.html and data.js"
type: string
required: true
benchmark_data_dir_path:
description: "Path to a directory that contains benchmark files on the GitHub pages branch."
required: true
auto_push:
description: "When set to true, pushes the benchmark results to the `nm-gh-pages` branch."
type: choice
options:
- 'true'
- 'false'
description: "When set to true, pushes the benchmark results to the `nm-gh-pages` branch. This should be either 'true' or 'false'."
required: true
reporting_enabled:
description: "When set to true, if there is a regression, do 3 things. 1. Mark the workflow as failed. 2. Add commit comments"
type: choice
options:
- 'true'
- 'false'
description: "When set to true, if there is a regression, do 2 things: 1. Mark the workflow as failed, 2. Add commit comments. This should be either 'true' or 'false'."
required: true
github_token:
description: "secrets.GITHUB_TOKEN from the caller"
required: true
runs:

runs:
using: composite
steps:
# A previous invocation of this action may have left the github pages branch in an
Expand All @@ -53,17 +42,18 @@ runs:
output-file-path: ${{ inputs.gh_action_benchmark_json_file_path }}
tool: ${{ inputs.gh_action_benchmark_tool }}
gh-pages-branch: ${{ inputs.gh_pages_branch }}
# Token required for pushing to nm-gh-pages branch
benchmark-data-dir-path: ${{ inputs.benchmark_data_dir_path }}
# Token required for pushing to nm-gh-pages branch
github-token: ${{ inputs.github_token }}
# Push and deploy to Github pages automatically
auto-push: ${{ inputs.auto_push == 'true' }}
# Add a commit comment comparing the current benchmark with the previous.
comment-always: ${{ inputs.reporting_enabled == 'true' }}
# Create an alert when some value has regressed more than 10%
# Create an alert when some value has regressed more than 10%
alert-threshold: "110%"
# Mark the workflow as a failure when some alert is triggered
fail-on-alert: ${{ inputs.reporting_enabled == 'true' }}
# Add a commit comment describing what triggered the alert
comment-on-alert: ${{ inputs.reporting_enabled == 'true' }}
# TODO (varun): Is this a reasonable number ?
# TODO (varun): Is this a reasonable number ?
max-items-in-chart: 50
22 changes: 15 additions & 7 deletions .github/actions/nm-produce-gha-benchmark-json/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,17 @@ inputs:
vllm_benchmark_jsons_path:
description: 'Path to a directory containing a list of BenchmarkResult JSONs'
required: true
bigger_is_better_output_file_path:
description: 'Path to a file where the GHA CustomBiggerIsBetter JSON is to be stored'
output_directory:
description: 'Path to directory where JSON files will be stored'
required: true
smaller_is_better_output_file_path:
description: 'Path to a file where the GHA CustomSmallerIsBetter JSON is to be stored'
bigger_is_better_output_filename:
description: 'Name of file where the GHA CustomBiggerIsBetter JSON is to be stored'
required: true
observation_metrics_output_file_path:
description: 'Path to a file where metrics that we only want to observe are stored'
smaller_is_better_output_filename:
description: 'Name of file where the GHA CustomSmallerIsBetter JSON is to be stored'
required: true
observation_metrics_output_filename:
description: 'Name of file where metrics that we only want to observe are stored'
python:
description: 'python version, e.g. 3.10.12'
required: true
Expand All @@ -29,7 +32,12 @@ runs:
source $(pyenv root)/versions/${{ inputs.python }}/envs/${VENV}/bin/activate
fi
SUCCESS=0
python3 -m neuralmagic.benchmarks.scripts.logging.gha_benchmark_logging -i ${{inputs.vllm_benchmark_jsons_path}} --bigger-is-better-metrics-output-file-path ${{ inputs.bigger_is_better_output_file_path }} --smaller-is-better-metrics-output-file-path ${{ inputs.smaller_is_better_output_file_path }} --observation-metrics-output-file-path ${{ inputs.observation_metrics_output_file_path }} || SUCCESS=$?
python3 -m neuralmagic.benchmarks.scripts.logging.gha_benchmark_logging \
-i ${{inputs.vllm_benchmark_jsons_path}} \
--output-directory ${{ inputs.output_directory }} \
--bigger-is-better-metrics-output-filename ${{ inputs.bigger_is_better_output_filename }} \
--smaller-is-better-metrics-output-filename ${{ inputs.smaller_is_better_output_filename }} \
--observation-metrics-output-filename ${{ inputs.observation_metrics_output_filename }} || SUCCESS=$?
echo "test=${SUCCESS}" >> "$GITHUB_OUTPUT"
exit ${SUCCESS}
shell: bash
74 changes: 62 additions & 12 deletions .github/workflows/nm-benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -164,12 +164,13 @@ jobs:
uses: ./.github/actions/nm-produce-gha-benchmark-json
with:
vllm_benchmark_jsons_path: benchmark-results
output_directory: gh-action-benchmark-jsons
# Metrics that are "better" when the value is greater are stored here
bigger_is_better_output_file_path: gh-action-benchmark-jsons/bigger_is_better.json
bigger_is_better_output_filename: bigger_is_better.json
# Metrics that are "better" when the value is smaller are stored here
smaller_is_better_output_file_path: gh-action-benchmark-jsons/smaller_is_better.json
smaller_is_better_output_filename: smaller_is_better.json
# Metrics that we only want to observe are stored here
observation_metrics_output_file_path: gh-action-benchmark-jsons/observation_metrics.json
observation_metrics_output_filename: observation_metrics.json
python: ${{ inputs.python }}
venv:

Expand Down Expand Up @@ -222,45 +223,94 @@ jobs:
- name: display structure of downloaded files
run: ls -R ./downloads

- name: nm-github-action-benchmark(bigger_is_better.json)
- name: nm-github-action-benchmark(serving/bigger_is_better.json)
# Absence of the file indicates that there were no "bigger_is_better" metrics
if: (success() || failure()) && (hashFiles('downloads/bigger_is_better.json') != '')
if: (success() || failure()) && (hashFiles('downloads/benchmark_serving/bigger_is_better.json') != '')
uses: ./.github/actions/nm-github-action-benchmark
with:
gh_action_benchmark_name: "bigger_is_better"
gh_action_benchmark_json_file_path: "downloads/bigger_is_better.json"
gh_action_benchmark_json_file_path: "downloads/benchmark_serving/bigger_is_better.json"
gh_action_benchmark_tool: "customBiggerIsBetter"
gh_pages_branch: "nm-gh-pages"
benchmark_data_dir_path: "dev/bench/serving"
auto_push: ${{ inputs.push_benchmark_results_to_gh_pages }}
reporting_enabled: "true"
github_token: ${{ secrets.GITHUB_TOKEN }}

- name: nm-github-action-benchmark(smaller_is_better.json)
- name: nm-github-action-benchmark(serving/smaller_is_better.json)
# Absence of the file indicates that there were no "smaller_is_better" metrics
if: (success() || failure()) && (hashFiles('downloads/smaller_is_better.json') != '')
if: (success() || failure()) && (hashFiles('downloads/benchmark_serving/smaller_is_better.json') != '')
uses: ./.github/actions/nm-github-action-benchmark
with:
gh_action_benchmark_name: "smaller_is_better"
gh_action_benchmark_json_file_path: "downloads/smaller_is_better.json"
gh_action_benchmark_json_file_path: "downloads/benchmark_serving/smaller_is_better.json"
gh_action_benchmark_tool: "customSmallerIsBetter"
gh_pages_branch: "nm-gh-pages"
benchmark_data_dir_path: "dev/bench/serving"
auto_push: ${{ inputs.push_benchmark_results_to_gh_pages }}
reporting_enabled: "true"
github_token: ${{ secrets.GITHUB_TOKEN }}

- name: nm-github-action-benchmark(observation_metrics.json)
- name: nm-github-action-benchmark(serving/observation_metrics.json)
# Absence of the file indicates that there were no "observation" metrics
if: (success() || failure()) && (hashFiles('downloads/observation_metrics.json') != '')
if: (success() || failure()) && (hashFiles('downloads/benchmark_serving/observation_metrics.json') != '')
uses: ./.github/actions/nm-github-action-benchmark
with:
gh_action_benchmark_name: "observation_metrics"
gh_action_benchmark_json_file_path: "downloads/observation_metrics.json"
gh_action_benchmark_json_file_path: "downloads/benchmark_serving/observation_metrics.json"
# `github-action-benchmark` expects a tool name that is either
# "customBiggerIsBetter" or "customSmallerIsBetter". This is a hack to
# work around that. Since we mark the action to not report failures, this
# is fine.
gh_action_benchmark_tool: "customBiggerIsBetter"
gh_pages_branch: "nm-gh-pages"
benchmark_data_dir_path: "dev/bench/serving"
auto_push: ${{ inputs.push_benchmark_results_to_gh_pages }}
reporting_enabled: "false"
github_token: ${{ secrets.GITHUB_TOKEN }}

- name: nm-github-action-benchmark(throughput/bigger_is_better.json)
# Absence of the file indicates that there were no "bigger_is_better" metrics
if: (success() || failure()) && (hashFiles('downloads/benchmark_throughput/bigger_is_better.json') != '')
uses: ./.github/actions/nm-github-action-benchmark
with:
gh_action_benchmark_name: "bigger_is_better"
gh_action_benchmark_json_file_path: "downloads/benchmark_throughput/bigger_is_better.json"
gh_action_benchmark_tool: "customBiggerIsBetter"
gh_pages_branch: "nm-gh-pages"
benchmark_data_dir_path: "dev/bench/throughput"
auto_push: ${{ inputs.push_benchmark_results_to_gh_pages }}
reporting_enabled: "true"
github_token: ${{ secrets.GITHUB_TOKEN }}

- name: nm-github-action-benchmark(throughput/smaller_is_better.json)
# Absence of the file indicates that there were no "smaller_is_better" metrics
if: (success() || failure()) && (hashFiles('downloads/benchmark_throughput/smaller_is_better.json') != '')
uses: ./.github/actions/nm-github-action-benchmark
with:
gh_action_benchmark_name: "smaller_is_better"
gh_action_benchmark_json_file_path: "downloads/benchmark_throughput/smaller_is_better.json"
gh_action_benchmark_tool: "customSmallerIsBetter"
gh_pages_branch: "nm-gh-pages"
benchmark_data_dir_path: "dev/bench/throughput"
auto_push: ${{ inputs.push_benchmark_results_to_gh_pages }}
reporting_enabled: "true"
github_token: ${{ secrets.GITHUB_TOKEN }}

- name: nm-github-action-benchmark(throughput/observation_metrics.json)
# Absence of the file indicates that there were no "observation" metrics
if: (success() || failure()) && (hashFiles('downloads/benchmark_throughput/observation_metrics.json') != '')
uses: ./.github/actions/nm-github-action-benchmark
with:
gh_action_benchmark_name: "observation_metrics"
gh_action_benchmark_json_file_path: "downloads/benchmark_throughput/observation_metrics.json"
# `github-action-benchmark` expects a tool name that is either
# "customBiggerIsBetter" or "customSmallerIsBetter". This is a hack to
# work around that. Since we mark the action to not report failures, this
# is fine.
gh_action_benchmark_tool: "customBiggerIsBetter"
gh_pages_branch: "nm-gh-pages"
benchmark_data_dir_path: "dev/bench/throughput"
auto_push: ${{ inputs.push_benchmark_results_to_gh_pages }}
reporting_enabled: "false"
github_token: ${{ secrets.GITHUB_TOKEN }}
2 changes: 2 additions & 0 deletions neuralmagic/benchmarks/run_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
def run(config_file_path: Path, output_directory: Path) -> None:

for config in benchmark_configs(config_file_path):
output_directory = output_directory.joinpath(config.script_name)
output_directory.mkdir(parents=True, exist_ok=True)
if config.script_name == "benchmark_serving":
run_benchmark_serving_script(config, output_directory)
continue
Expand Down
48 changes: 36 additions & 12 deletions neuralmagic/benchmarks/scripts/logging/gha_benchmark_logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,10 +106,12 @@ def process(json_file_path: Path) -> Iterable[Type_Record_T]:
GHARecord.from_metric_template(metric, extra=hover_data)), metrics)


def main(args: argparse.Namespace) -> None:
input_directory = Path(args.input_directory)
def process_folder(input_directory: Path):
print(f"processing folder : {input_directory}")

json_file_paths = input_directory.glob('*.json')
json_file_paths = list(input_directory.glob('*.json'))
if not json_file_paths:
return

type_records: List[Type_Record_T] = list(
reduce(lambda whole, part: whole + part,
Expand Down Expand Up @@ -142,15 +144,30 @@ def filter_and_dump_if_non_empty(type_records: List[Type_Record_T],
with open(output_path, 'w+') as f:
json.dump(gha_record_dicts, f, indent=4)

output_directory = Path(args.output_directory).joinpath(
input_directory.name)
filter_and_dump_if_non_empty(
type_records, BenchmarkMetricType.BiggerIsBetter,
Path(args.bigger_is_better_metrics_output_file_path))
output_directory.joinpath(
args.bigger_is_better_metrics_output_filename))
filter_and_dump_if_non_empty(
type_records, BenchmarkMetricType.SmallerIsBetter,
Path(args.smaller_is_better_metrics_output_file_path))
output_directory.joinpath(
args.smaller_is_better_metrics_output_filename))
filter_and_dump_if_non_empty(
type_records, BenchmarkMetricType.Observation,
Path(args.observation_metrics_output_file_path))
output_directory.joinpath(args.observation_metrics_output_filename))


def main(args: argparse.Namespace) -> None:
groups = ["benchmark_serving", "benchmark_throughput"]
input_base_directory = Path(args.input_directory)
input_directories = [
input_base_directory.joinpath(group) for group in groups
]

for input_directory in input_directories:
process_folder(input_directory)


if __name__ == '__main__':
Expand All @@ -167,28 +184,35 @@ def filter_and_dump_if_non_empty(type_records: List[Type_Record_T],
"--input-directory",
required=True,
type=str,
help="""Path to the directory containing BenchmarkResult
jsons. This is typically the output directory passed
to the benchmark runner scripts like
help="""Path to the directory containing BenchmarkResult
jsons. This is typically the output directory passed
to the benchmark runner scripts like
neuralmagic/benchmarks/run_benchmarks.py.""")

parser.add_argument("--bigger-is-better-metrics-output-file-path",
parser.add_argument(
"-o",
"--output-directory",
required=True,
type=str,
help="Path to directory where JSON files will be stored")

parser.add_argument("--bigger-is-better-metrics-output-filename",
required=True,
type=str,
help="""
An output file path, where the BenchmarkMetricType
BiggerIsBetter metrics are stored.
""")

parser.add_argument("--smaller-is-better-metrics-output-file-path",
parser.add_argument("--smaller-is-better-metrics-output-filename",
required=True,
type=str,
help="""
An output file path, where the BenchmarkMetricType
SmallerIsBetter metrics are stored.
""")

parser.add_argument("--observation-metrics-output-file-path",
parser.add_argument("--observation-metrics-output-filename",
required=True,
type=str,
help="""
Expand Down
Loading