Skip to content
This repository was archived by the owner on Oct 11, 2024. It is now read-only.

Benchmarking separation #362

Closed
wants to merge 11 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/actions/nm-benchmark/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@ runs:
VENV="${{ inputs.venv }}-${COMMIT:0:7}"
source $(pyenv root)/versions/${{ inputs.python }}/envs/${VENV}/bin/activate
fi
echo "::group::install requirements"
pip3 install -r neuralmagic/benchmarks/requirements-benchmark.txt
echo "::endgroup::"
SUCCESS=0
.github/scripts/nm-run-benchmarks.sh ${{ inputs.benchmark_config_list_file }} ${{ inputs.output_directory }} || SUCCESS=$?
echo "test=${SUCCESS}" >> "$GITHUB_OUTPUT"
Expand Down
34 changes: 12 additions & 22 deletions .github/actions/nm-github-action-benchmark/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,38 +5,27 @@ inputs:
description: "Name of the benchmark. Metrics are grouped by benchmark names. github_action_benchmark alert-trigger looks for the previous benchmark value in the benchmark-name group on the previous commit"
gh_action_benchmark_json_file_path:
description: "Path to the benchmark json file to upload (Note that this JSON should be in a `github-action-benchmark` consumable format - This is typically the output of neuralmagic/benchmarks/scripts/logging/gha_benchmark_logging.py)"
type: string
required: true
gh_action_benchmark_tool:
description: "A string that is input to the `tool` argument of `github-action-benchmark`. This should be either `customBiggerIsBetter` or `customSmallerIsBetter`"
type: choice
options:
- 'customBiggerIsBetter'
- 'customSmallerIsBetter'
description: "A string that is input to the `tool` argument of `github-action-benchmark`. This should be either `customBiggerIsBetter` or `customSmallerIsBetter`"
required: true
gh_pages_branch:
description: "Github branch where the `github-action-benchmark` creates its index.html and data.js"
type: string
required: true
benchmark_data_dir_path:
description: "Path to a directory that contains benchmark files on the GitHub pages branch."
required: true
auto_push:
description: "When set to true, pushes the benchmark results to the `nm-gh-pages` branch."
type: choice
options:
- 'true'
- 'false'
description: "When set to true, pushes the benchmark results to the `nm-gh-pages` branch. This should be either 'true' or 'false'."
required: true
reporting_enabled:
description: "When set to true, if there is a regression, do 3 things. 1. Mark the workflow as failed. 2. Add commit comments"
type: choice
options:
- 'true'
- 'false'
description: "When set to true, if there is a regression, do 2 things: 1. Mark the workflow as failed, 2. Add commit comments. This should be either 'true' or 'false'."
required: true
github_token:
description: "secrets.GITHUB_TOKEN from the caller"
required: true
runs:

runs:
using: composite
steps:
# A previous invocation of this action may have left the github pages branch in an
Expand All @@ -53,17 +42,18 @@ runs:
output-file-path: ${{ inputs.gh_action_benchmark_json_file_path }}
tool: ${{ inputs.gh_action_benchmark_tool }}
gh-pages-branch: ${{ inputs.gh_pages_branch }}
# Token required for pushing to nm-gh-pages branch
benchmark-data-dir-path: ${{ inputs.benchmark_data_dir_path }}
# Token required for pushing to nm-gh-pages branch
github-token: ${{ inputs.github_token }}
# Push and deploy to Github pages automatically
auto-push: ${{ inputs.auto_push == 'true' }}
# Add a commit comment comparing the current benchmark with the previous.
comment-always: ${{ inputs.reporting_enabled == 'true' }}
# Create an alert when some value has regressed more than 10%
# Create an alert when some value has regressed more than 10%
alert-threshold: "110%"
# Mark the workflow as a failure when some alert is triggered
fail-on-alert: ${{ inputs.reporting_enabled == 'true' }}
# Add a commit comment describing what triggered the alert
comment-on-alert: ${{ inputs.reporting_enabled == 'true' }}
# TODO (varun): Is this a reasonable number ?
# TODO (varun): Is this a reasonable number ?
max-items-in-chart: 50
22 changes: 15 additions & 7 deletions .github/actions/nm-produce-gha-benchmark-json/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,17 @@ inputs:
vllm_benchmark_jsons_path:
description: 'Path to a directory containing a list of BenchmarkResult JSONs'
required: true
bigger_is_better_output_file_path:
description: 'Path to a file where the GHA CustomBiggerIsBetter JSON is to be stored'
output_directory:
description: 'Path to directory where JSON files will be stored'
required: true
smaller_is_better_output_file_path:
description: 'Path to a file where the GHA CustomSmallerIsBetter JSON is to be stored'
bigger_is_better_output_filename:
description: 'Name of file where the GHA CustomBiggerIsBetter JSON is to be stored'
required: true
observation_metrics_output_file_path:
description: 'Path to a file where metrics that we only want to observe are stored'
smaller_is_better_output_filename:
description: 'Name of file where the GHA CustomSmallerIsBetter JSON is to be stored'
required: true
observation_metrics_output_filename:
description: 'Name of file where metrics that we only want to observe are stored'
python:
description: 'python version, e.g. 3.10.12'
required: true
Expand All @@ -29,7 +32,12 @@ runs:
source $(pyenv root)/versions/${{ inputs.python }}/envs/${VENV}/bin/activate
fi
SUCCESS=0
python3 -m neuralmagic.benchmarks.scripts.logging.gha_benchmark_logging -i ${{inputs.vllm_benchmark_jsons_path}} --bigger-is-better-metrics-output-file-path ${{ inputs.bigger_is_better_output_file_path }} --smaller-is-better-metrics-output-file-path ${{ inputs.smaller_is_better_output_file_path }} --observation-metrics-output-file-path ${{ inputs.observation_metrics_output_file_path }} || SUCCESS=$?
python3 -m neuralmagic.benchmarks.scripts.logging.gha_benchmark_logging \
-i ${{inputs.vllm_benchmark_jsons_path}} \
--output-directory ${{ inputs.output_directory }} \
--bigger-is-better-metrics-output-filename ${{ inputs.bigger_is_better_output_filename }} \
--smaller-is-better-metrics-output-filename ${{ inputs.smaller_is_better_output_filename }} \
--observation-metrics-output-filename ${{ inputs.observation_metrics_output_filename }} || SUCCESS=$?
echo "test=${SUCCESS}" >> "$GITHUB_OUTPUT"
exit ${SUCCESS}
shell: bash
74 changes: 62 additions & 12 deletions .github/workflows/nm-benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -164,12 +164,13 @@ jobs:
uses: ./.github/actions/nm-produce-gha-benchmark-json
with:
vllm_benchmark_jsons_path: benchmark-results
output_directory: gh-action-benchmark-jsons
# Metrics that are "better" when the value is greater are stored here
bigger_is_better_output_file_path: gh-action-benchmark-jsons/bigger_is_better.json
bigger_is_better_output_filename: bigger_is_better.json
# Metrics that are "better" when the value is smaller are stored here
smaller_is_better_output_file_path: gh-action-benchmark-jsons/smaller_is_better.json
smaller_is_better_output_filename: smaller_is_better.json
# Metrics that we only want to observe are stored here
observation_metrics_output_file_path: gh-action-benchmark-jsons/observation_metrics.json
observation_metrics_output_filename: observation_metrics.json
python: ${{ inputs.python }}
venv:

Expand Down Expand Up @@ -222,45 +223,94 @@ jobs:
- name: display structure of downloaded files
run: ls -R ./downloads

- name: nm-github-action-benchmark(bigger_is_better.json)
- name: nm-github-action-benchmark(serving/bigger_is_better.json)
# Absence of the file indicates that there were no "bigger_is_better" metrics
if: (success() || failure()) && (hashFiles('downloads/bigger_is_better.json') != '')
if: (success() || failure()) && (hashFiles('downloads/benchmark_serving/bigger_is_better.json') != '')
uses: ./.github/actions/nm-github-action-benchmark
with:
gh_action_benchmark_name: "bigger_is_better"
gh_action_benchmark_json_file_path: "downloads/bigger_is_better.json"
gh_action_benchmark_json_file_path: "downloads/benchmark_serving/bigger_is_better.json"
gh_action_benchmark_tool: "customBiggerIsBetter"
gh_pages_branch: "nm-gh-pages"
benchmark_data_dir_path: "dev/bench/serving"
auto_push: ${{ inputs.push_benchmark_results_to_gh_pages }}
reporting_enabled: "true"
github_token: ${{ secrets.GITHUB_TOKEN }}

- name: nm-github-action-benchmark(smaller_is_better.json)
- name: nm-github-action-benchmark(serving/smaller_is_better.json)
# Absence of the file indicates that there were no "smaller_is_better" metrics
if: (success() || failure()) && (hashFiles('downloads/smaller_is_better.json') != '')
if: (success() || failure()) && (hashFiles('downloads/benchmark_serving/smaller_is_better.json') != '')
uses: ./.github/actions/nm-github-action-benchmark
with:
gh_action_benchmark_name: "smaller_is_better"
gh_action_benchmark_json_file_path: "downloads/smaller_is_better.json"
gh_action_benchmark_json_file_path: "downloads/benchmark_serving/smaller_is_better.json"
gh_action_benchmark_tool: "customSmallerIsBetter"
gh_pages_branch: "nm-gh-pages"
benchmark_data_dir_path: "dev/bench/serving"
auto_push: ${{ inputs.push_benchmark_results_to_gh_pages }}
reporting_enabled: "true"
github_token: ${{ secrets.GITHUB_TOKEN }}

- name: nm-github-action-benchmark(observation_metrics.json)
- name: nm-github-action-benchmark(serving/observation_metrics.json)
# Absence of the file indicates that there were no "observation" metrics
if: (success() || failure()) && (hashFiles('downloads/observation_metrics.json') != '')
if: (success() || failure()) && (hashFiles('downloads/benchmark_serving/observation_metrics.json') != '')
uses: ./.github/actions/nm-github-action-benchmark
with:
gh_action_benchmark_name: "observation_metrics"
gh_action_benchmark_json_file_path: "downloads/observation_metrics.json"
gh_action_benchmark_json_file_path: "downloads/benchmark_serving/observation_metrics.json"
# `github-action-benchmark` expects a tool name that is either
# "customBiggerIsBetter" or "customSmallerIsBetter". This is a hack to
# work around that. Since we mark the action to not report failures, this
# is fine.
gh_action_benchmark_tool: "customBiggerIsBetter"
gh_pages_branch: "nm-gh-pages"
benchmark_data_dir_path: "dev/bench/serving"
auto_push: ${{ inputs.push_benchmark_results_to_gh_pages }}
reporting_enabled: "false"
github_token: ${{ secrets.GITHUB_TOKEN }}

- name: nm-github-action-benchmark(throughput/bigger_is_better.json)
# Absence of the file indicates that there were no "bigger_is_better" metrics
if: (success() || failure()) && (hashFiles('downloads/benchmark_throughput/bigger_is_better.json') != '')
uses: ./.github/actions/nm-github-action-benchmark
with:
gh_action_benchmark_name: "bigger_is_better"
gh_action_benchmark_json_file_path: "downloads/benchmark_throughput/bigger_is_better.json"
gh_action_benchmark_tool: "customBiggerIsBetter"
gh_pages_branch: "nm-gh-pages"
benchmark_data_dir_path: "dev/bench/throughput"
auto_push: ${{ inputs.push_benchmark_results_to_gh_pages }}
reporting_enabled: "true"
github_token: ${{ secrets.GITHUB_TOKEN }}

- name: nm-github-action-benchmark(throughput/smaller_is_better.json)
# Absence of the file indicates that there were no "smaller_is_better" metrics
if: (success() || failure()) && (hashFiles('downloads/benchmark_throughput/smaller_is_better.json') != '')
uses: ./.github/actions/nm-github-action-benchmark
with:
gh_action_benchmark_name: "smaller_is_better"
gh_action_benchmark_json_file_path: "downloads/benchmark_throughput/smaller_is_better.json"
gh_action_benchmark_tool: "customSmallerIsBetter"
gh_pages_branch: "nm-gh-pages"
benchmark_data_dir_path: "dev/bench/throughput"
auto_push: ${{ inputs.push_benchmark_results_to_gh_pages }}
reporting_enabled: "true"
github_token: ${{ secrets.GITHUB_TOKEN }}

- name: nm-github-action-benchmark(throughput/observation_metrics.json)
# Absence of the file indicates that there were no "observation" metrics
if: (success() || failure()) && (hashFiles('downloads/benchmark_throughput/observation_metrics.json') != '')
uses: ./.github/actions/nm-github-action-benchmark
with:
gh_action_benchmark_name: "observation_metrics"
gh_action_benchmark_json_file_path: "downloads/benchmark_throughput/observation_metrics.json"
# `github-action-benchmark` expects a tool name that is either
# "customBiggerIsBetter" or "customSmallerIsBetter". This is a hack to
# work around that. Since we mark the action to not report failures, this
# is fine.
gh_action_benchmark_tool: "customBiggerIsBetter"
gh_pages_branch: "nm-gh-pages"
benchmark_data_dir_path: "dev/bench/throughput"
auto_push: ${{ inputs.push_benchmark_results_to_gh_pages }}
reporting_enabled: "false"
github_token: ${{ secrets.GITHUB_TOKEN }}
2 changes: 2 additions & 0 deletions neuralmagic/benchmarks/run_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
def run(config_file_path: Path, output_directory: Path) -> None:

for config in benchmark_configs(config_file_path):
output_directory = output_directory.joinpath(config.script_name)
output_directory.mkdir(parents=True, exist_ok=True)
if config.script_name == "benchmark_serving":
run_benchmark_serving_script(config, output_directory)
continue
Expand Down
48 changes: 36 additions & 12 deletions neuralmagic/benchmarks/scripts/logging/gha_benchmark_logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,10 +106,12 @@ def process(json_file_path: Path) -> Iterable[Type_Record_T]:
GHARecord.from_metric_template(metric, extra=hover_data)), metrics)


def main(args: argparse.Namespace) -> None:
input_directory = Path(args.input_directory)
def process_folder(input_directory: Path):
print(f"processing folder : {input_directory}")

json_file_paths = input_directory.glob('*.json')
json_file_paths = list(input_directory.glob('*.json'))
if not json_file_paths:
return

type_records: List[Type_Record_T] = list(
reduce(lambda whole, part: whole + part,
Expand Down Expand Up @@ -142,15 +144,30 @@ def filter_and_dump_if_non_empty(type_records: List[Type_Record_T],
with open(output_path, 'w+') as f:
json.dump(gha_record_dicts, f, indent=4)

output_directory = Path(args.output_directory).joinpath(
input_directory.name)
filter_and_dump_if_non_empty(
type_records, BenchmarkMetricType.BiggerIsBetter,
Path(args.bigger_is_better_metrics_output_file_path))
output_directory.joinpath(
args.bigger_is_better_metrics_output_filename))
filter_and_dump_if_non_empty(
type_records, BenchmarkMetricType.SmallerIsBetter,
Path(args.smaller_is_better_metrics_output_file_path))
output_directory.joinpath(
args.smaller_is_better_metrics_output_filename))
filter_and_dump_if_non_empty(
type_records, BenchmarkMetricType.Observation,
Path(args.observation_metrics_output_file_path))
output_directory.joinpath(args.observation_metrics_output_filename))


def main(args: argparse.Namespace) -> None:
groups = ["benchmark_serving", "benchmark_throughput"]
input_base_directory = Path(args.input_directory)
input_directories = [
input_base_directory.joinpath(group) for group in groups
]

for input_directory in input_directories:
process_folder(input_directory)


if __name__ == '__main__':
Expand All @@ -167,28 +184,35 @@ def filter_and_dump_if_non_empty(type_records: List[Type_Record_T],
"--input-directory",
required=True,
type=str,
help="""Path to the directory containing BenchmarkResult
jsons. This is typically the output directory passed
to the benchmark runner scripts like
help="""Path to the directory containing BenchmarkResult
jsons. This is typically the output directory passed
to the benchmark runner scripts like
neuralmagic/benchmarks/run_benchmarks.py.""")

parser.add_argument("--bigger-is-better-metrics-output-file-path",
parser.add_argument(
"-o",
"--output-directory",
required=True,
type=str,
help="Path to directory where JSON files will be stored")

parser.add_argument("--bigger-is-better-metrics-output-filename",
required=True,
type=str,
help="""
An output file path, where the BenchmarkMetricType
BiggerIsBetter metrics are stored.
""")

parser.add_argument("--smaller-is-better-metrics-output-file-path",
parser.add_argument("--smaller-is-better-metrics-output-filename",
required=True,
type=str,
help="""
An output file path, where the BenchmarkMetricType
SmallerIsBetter metrics are stored.
""")

parser.add_argument("--observation-metrics-output-file-path",
parser.add_argument("--observation-metrics-output-filename",
required=True,
type=str,
help="""
Expand Down
Loading