Skip to content

Commit

Permalink
Improve profiling setup and documentation, sync benchmarks with main (#…
Browse files Browse the repository at this point in the history
…218)

* Automatically set rpd env var with profile flag

* Add readme

* Fix lint errors

---------

Co-authored-by: AdrianAbeyta <[email protected]>
  • Loading branch information
AdrianAbeyta and AdrianAbeyta authored Oct 3, 2024
1 parent 47d6392 commit 4cb422f
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 25 deletions.
59 changes: 59 additions & 0 deletions benchmarks/profiling/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# VLLM Benchmark Profiling

This profiling directory provides a method to profile VLLM throughput and latency benchmarks using ROCm profiling utilities.

## 1. Dependencies

Before using the profiling feature, you need to install the required dependencies:

### Install ROCm Profile Data

```bash
git clone -b nvtx_enabled https://github.com/ROCm/rocmProfileData.git
cd rocmProfileData && make && sudo make install
```

### Install hipMarker

```bash
cd rocmProfileData/hipMarker && python3 setup.py install
```

## 2. Profiling Benchmarks

Profiling can be used to monitor the performance of the VLLM benchmarks with ROCm. The key flags used for profiling are:

- `--profile-rpd`: Profiles the generation process of a single batch.
- `--profile-dir PROFILE_DIR`: Specifies the path to save the profiler output, which can later be visualized using tools like [ui.perfetto.dev](https://ui.perfetto.dev/) or [chrome.tracing](chrome://tracing/).

### Profiling Using Default Directory

By default, profiling results are saved in either `vllm_benchmark_latency_result` or `vllm_benchmark_throughput_result`. To run a benchmark and profile it using the default directory, execute:

```bash
python3 benchmark_throughput.py --input-len {len} --output-len {len} --model {model} --profile-rpd
```

### Profiling With a Custom Directory

You can specify a custom directory for saving profiler outputs by using the `--profile-dir` flag:

```bash
python3 benchmark_throughput.py --input-len {len} --output-len {len} --model {model} --profile-rpd --profile-dir {/path/to/custom/dir}
```

After profiling is complete, an `.rpd` file containing the trace data will be saved to the specified directory.

## 3. Convert Trace Data to JSON Format

To view the trace data, it needs to be converted into a format that is compatible with tools like Chrome tracing or Perfetto.

You can use the `rpd2tracing.py` script in rocmProfileData to convert the `.rpd` file into a JSON file:

```bash
python3 rocmProfileData/tools/rpd2tracing.py trace.rpd trace.json
```

Once the trace is converted, open the `.json` file in [Chrome](chrome://tracing/) or [Perfetto](https://ui.perfetto.dev/) for visualization.


36 changes: 21 additions & 15 deletions benchmarks/profiling/benchmark_latency.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

from vllm import LLM, SamplingParams
from vllm.engine.arg_utils import DEVICE_OPTIONS, EngineArgs
from vllm.inputs import PromptType
from vllm.inputs import PromptInputs
from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS
from vllm.utils import FlexibleArgumentParser

Expand All @@ -30,8 +30,7 @@ def rpd_profiler_context():
rpd.top_totals()

@contextmanager
def torch_profiler_context(profile_dir: Optional[str] = None,
trace_file_name=None):
def torch_profiler_context(profile_dir: Optional[str] = None):
p = torch.profiler.profile(
activities=[
torch.profiler.ProfilerActivity.CPU,
Expand All @@ -48,15 +47,27 @@ def torch_profiler_context(profile_dir: Optional[str] = None,
print(p.key_averages().table(sort_by="self_cuda_time_total",
row_limit=-1))

def get_profiling_context(profile_dir: Optional[str] = None,
trace_file_name=None):
def get_profiling_context(profile_dir: Optional[str] = None):
if args.profile_torch:
return torch_profiler_context(profile_dir, trace_file_name)
return torch_profiler_context(profile_dir)
elif args.profile_rpd:
return rpd_profiler_context()
else:
return nullcontext()

if args.profile_torch or args.profile_rpd:
profile_dir = Path(args.profile_dir
or "./vllm_benchmark_latency_result")
profile_dir.mkdir(parents=True, exist_ok=True)
name = os.path.basename(os.path.normpath(args.model))
model_trace_name = (
f"{name}_in_{args.input_len}_out_{args.output_len}_"
f"batch_{args.batch_size}_tp_{args.tensor_parallel_size}")
print(f"Profiling (results will be saved to '{profile_dir}')...")
if args.profile_rpd:
profile_dir /= f"{model_trace_name}.rpd"
os.environ["VLLM_RPD_PROFILER_DIR"] = str(profile_dir)

# NOTE(woosuk): If the request cannot be processed in a single batch,
# the engine will automatically process the request in multiple batches.
llm = LLM(
Expand Down Expand Up @@ -100,19 +111,19 @@ def get_profiling_context(profile_dir: Optional[str] = None,
dummy_prompt_token_ids = np.random.randint(10000,
size=(args.batch_size,
args.input_len))
dummy_prompts: List[PromptType] = [{
dummy_inputs: List[PromptInputs] = [{
"prompt_token_ids": batch
} for batch in dummy_prompt_token_ids.tolist()]

def run_to_completion(profile_dir: Optional[str] = None):
if profile_dir:
with get_profiling_context():
llm.generate(dummy_prompts,
with get_profiling_context(profile_dir):
llm.generate(dummy_inputs,
sampling_params=sampling_params,
use_tqdm=False)
else:
start_time = time.perf_counter()
llm.generate(dummy_prompts,
llm.generate(dummy_inputs,
sampling_params=sampling_params,
use_tqdm=False)
end_time = time.perf_counter()
Expand All @@ -124,11 +135,6 @@ def run_to_completion(profile_dir: Optional[str] = None):
run_to_completion(profile_dir=None)

if args.profile_torch or args.profile_rpd:
profile_dir = args.profile_dir
if not profile_dir:
profile_dir = Path(".") / "vllm_benchmark_latency_result"
os.makedirs(profile_dir, exist_ok=True)
print(f"Profiling (results will be saved to '{profile_dir}')...")
run_to_completion(profile_dir=profile_dir)
return

Expand Down
24 changes: 17 additions & 7 deletions benchmarks/profiling/benchmark_throughput.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import random
import time
from contextlib import contextmanager, nullcontext
from pathlib import Path
from typing import List, Optional, Tuple

import torch
Expand Down Expand Up @@ -121,15 +122,27 @@ def torch_profiler_context(profile_dir: Optional[str] = None):
print(p.key_averages().table(sort_by="self_cuda_time_total",
row_limit=-1))

def get_profiling_context(profile_dir: Optional[str] = None,
trace_file_name=None):
def get_profiling_context(profile_dir: Optional[str] = None):
if args.profile_torch:
return torch_profiler_context(profile_dir, trace_file_name)
return torch_profiler_context(profile_dir)
elif args.profile_rpd:
return rpd_profiler_context()
else:
return nullcontext()

if args.profile_torch or args.profile_rpd:
profile_dir = Path(args.profile_dir
or "./vllm_benchmark_throughput_result")
profile_dir.mkdir(parents=True, exist_ok=True)
name = os.path.basename(os.path.normpath(args.model))
model_trace_name = (
f"{name}_in_{args.input_len}_out_{args.output_len}_"
f"tp_{args.tensor_parallel_size}")
print(f"Profiling (results will be saved to '{profile_dir}')...")
if args.profile_rpd:
profile_dir /= f"{model_trace_name}.rpd"
os.environ["VLLM_RPD_PROFILER_DIR"] = str(profile_dir)

llm = LLM(
model=model,
tokenizer=tokenizer,
Expand Down Expand Up @@ -171,10 +184,7 @@ def get_profiling_context(profile_dir: Optional[str] = None,
))

if args.profile_torch or args.profile_rpd:
profile_dir = args.profile_dir
name = os.path.basename(os.path.normpath(args.model))
model_trace_name = f"{name}_in_{args.input_len}_out_{args.output_len}"
with get_profiling_context(profile_dir, model_trace_name):
with get_profiling_context(profile_dir):
llm.generate(prompts, sampling_params, use_tqdm=True)
return
else:
Expand Down
3 changes: 0 additions & 3 deletions vllm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,11 +163,8 @@ def __init__(self,
skip=False):
self.skip = skip
if not self.skip:
if 'RANK' in os.environ or int(os.getenv('WORLD_SIZE', 1)) > 1:
filename = f"{filename}_pid{os.getpid()}"
self.name = name
self.args = args if args else ""
print(f"filename type {type(filename)}")
self.rpd = self.initialize_rpd_tracer(filename, nvtx)

def _recreate_cm(self):
Expand Down

0 comments on commit 4cb422f

Please sign in to comment.