Skip to content

Commit

Permalink
Add command line options; add fwd/bwd
Browse files Browse the repository at this point in the history
  • Loading branch information
kiya00 committed Jan 21, 2025
1 parent 835b5c9 commit 3c5d713
Show file tree
Hide file tree
Showing 4 changed files with 140 additions and 63 deletions.
2 changes: 1 addition & 1 deletion thunder/dynamo/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def save_reproducer_to_folder(
reproducer_folder,
f"graph{graph_idx}_{cur_name}",
use_pytest_benchmark,
check_consistency=check_consistency,
# check_consistency=check_consistency,
save_input_tensor=save_input_tensor,
)

Expand Down
98 changes: 66 additions & 32 deletions thunder/dynamo/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,45 @@
import sys
from pathlib import Path
import json
import argparse

import torch
from thunder.dynamo.compiler import thunderfx
from thunder.benchmarks.targets import ComputeType, backward_only
from thunder.dynamo.utils import run_backward

if TYPE_CHECKING:
from thunder.dynamo.utils import SubgraphInfo
from os import PathLike
from collections.abc import Callable


def run_repro(ex_dict, ex_name, model, compute_type, *inputs): # CLI options
if ex_name == "eager":
compiled_fn = model
elif ex_name == "torch_inductor":
compiled_fn = ex_dict[ex_name](model, inputs)
else:
compiled_fn = ex_dict[ex_name](model)

results = {}
match compute_type:
case "forward":
try:
result = compiled_fn(*inputs)
except Exception as e:
raise e
results["forward"] = result
case "forward+backward":
try:
forward_result, grads = run_backward(compiled_fn, *inputs)
except Exception as e:
raise e
results["forward"] = forward_result
results["backward"] = grads
return results


def get_thunder_graph_names(subgraph_infos: list[SubgraphInfo]):
thunder_graph_names = []
for graph_idx, subgraph_info in enumerate(subgraph_infos):
Expand All @@ -30,6 +60,7 @@ def thunderfx_save_report(
folder_path: str | PathLike = "/tmp/thunderfx_report",
check_consistency: bool = True,
check_benchmark: bool = True,
save_benchmark_inputs: bool = True,
**kwargs,
):
try:
Expand All @@ -44,7 +75,7 @@ def thunderfx_save_report(
return
print(f"The reproducer file is saved in {folder_path}")
return

print("The input callable can be successfully executed by ThunderFX.")
if not check_benchmark and not check_consistency:
return

Expand All @@ -62,62 +93,65 @@ def thunderfx_save_report(
folder.mkdir(exist_ok=True)
# Checks consistency with Torch eager
if check_consistency:
print("Verifying consistency between Thunder and Torch eager ...")
consistency_folder = folder / "consistency"
consistency_folder.mkdir(exist_ok=True)
compiled._backend.save_reproducer_to_folder(consistency_folder, check_consistency=True)
compiled._backend.save_reproducer_to_folder(consistency_folder)
for file in consistency_folder.glob("*.py"):
# The consistency results generated by the script are passed here via stdout
consistency_result = eval(
subprocess.run([sys.executable, folder / file], capture_output=True, text=True).stdout
)
for g_name, consistency in consistency_result.items():
g_ex_name = f"{file.name.rstrip('.py')}[{g_name}]"
assert g_ex_name in report_result
report_result[g_ex_name] = ["yes" if consistency is None else str(consistency)]
g_name = file.name.rstrip(".py")
cmd = [sys.executable, folder / file, "--check_consistency=True", "--compute_type=forward+backward"]
consistency_result = subprocess.run(cmd, capture_output=True, text=True)
if consistency_result.returncode:
error = consistency_result.stderr
print(f"[{g_name}] Consistency check failed: {error}")
else:
print(f"[{g_name}] Consistency check succeeded")

# Benchmark
if check_benchmark:
print("Analyzing performance through benchmarking, this might take a moment...")
benchmark_folder = folder / "benchmark"
benchmark_folder.mkdir(exist_ok=True)
compiled._backend.save_reproducer_to_folder(benchmark_folder, save_input_tensor=True, use_pytest_benchmark=True)

benchmark_json_files = []
for file in benchmark_folder.glob("*.py"):
benchmark_json_files.append(str(benchmark_folder / f"{file.name.replace('.py', '.json')}"))
subprocess.run(
benchmark_result = subprocess.run(
[
sys.executable,
"-m",
"pytest",
benchmark_folder / file,
"--benchmark-timer=torch.utils.benchmark.utils.timer.timer",
"--benchmark-warmup=on",
"--benchmark-group-by=param:compute_type",
f"--benchmark-json={benchmark_json_files[-1]}",
"--disable-warnings",
"-q",
],
capture_output=True,
text=True,
)

print(benchmark_result.stdout)
print("Max allocated memory usage:")
for tmp_json in benchmark_json_files:
with open(tmp_json) as file:
data = json.load(file)
for bk in data["benchmarks"]:
cur_name = bk["name"].lstrip("test_")
if cur_name in report_result:
report_result[cur_name].append(bk["stats"]["mean"])
report_result[cur_name].append(bk["extra_info"]["max_allocated_memory_MB"])

list_data: list[dict] = []
for g_name, values in report_result.items():
list_data.append({})
list_data[-1]["name"] = g_name
if check_consistency:
list_data[-1]["consistency"] = values[0]
if check_benchmark:
base = check_benchmark + check_consistency - 1
list_data[-1]["performance_mean"] = values[base]
list_data[-1]["max_allocated_memory_MB"] = values[base + 1]
json_data = {"report": list_data}

with open(folder / "report.json", "w") as f:
json.dump(json_data, f, indent=4)
benchs = data["benchmarks"]
forward_benchs = [bench for bench in benchs if "forward" in bench["param"]]
backward_benchs = [bench for bench in benchs if "backward" in bench["param"]]

forward_benchs_sorted = sorted(
forward_benchs, key=lambda x: x["extra_info"]["max_allocated_memory_MB"], reverse=True
)
backward_benchs_sorted = sorted(
backward_benchs, key=lambda x: x["extra_info"]["max_allocated_memory_MB"], reverse=True
)

for bk in forward_benchs_sorted:
print(f"{bk['name'].lstrip('test_')}: {bk['extra_info']['max_allocated_memory_MB']/1000} GB")
print("\n")
for bk in backward_benchs_sorted:
print(f"{bk['name'].lstrip('test_')}: {bk['extra_info']['max_allocated_memory_MB']/1000} GB")
print("\n")
97 changes: 68 additions & 29 deletions thunder/dynamo/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
from thunder.torch.default_torch_ops import torch_auto_registered_ops
from thunder.torch import _torch_to_thunder_function_map
from thunder.torch.langctx import torchctx
from thunder.core.utils import check
from thunder.core.utils import check, sequencify
from thunder.core.pytree import tree_flatten

if TYPE_CHECKING:
from thunder.core.symbol import Symbol
Expand Down Expand Up @@ -724,7 +725,6 @@ def reproducer(
folder: str | os.PathLike,
graph_name: str,
use_pytest_benchmark: bool = False,
check_consistency: bool = False,
save_input_tensor: bool = False,
):
folder = Path(folder)
Expand Down Expand Up @@ -753,6 +753,38 @@ def torch_inductor(fn, inputs):
bench_executors_dict["thunder"]=partial(thunder.jit, {thunder_options_str})
bench_executors_dict["torch_inductor"]=torch_inductor
bench_executors_dict["eager"]=None
"""

COMMAND_LINE_ARGS = f"""
import argparse
parser = argparse.ArgumentParser(description="Process some inputs for myscript.py.")
parser.add_argument(
"--check_consistency",
type=bool,
default=False,
help="Whether to check consistency (default: False)"
)
parser.add_argument(
"--compute_type",
type=str,
choices=["forward", "forward+backward"],
default="forward",
help="Type of computation to perform (forward, forward+backward)"
)
parser.add_argument(
"--executor",
type=str,
choices=["torch_inductor", "thunder", "eager"],
default="thunder",
help="Type of computation to perform (thunder, torch_inductor, or eager)"
)
args = parser.parse_args()
ex_name = args.executor
compute_type = args.compute_type
check_acc = args.check_consistency
"""

# split reason
Expand Down Expand Up @@ -791,6 +823,7 @@ def torch_inductor(fn, inputs):
code_str += "from thunder.dev_utils.nvtx_profile_transform import NvtxProfileTransform\n"
if use_pytest_benchmark:
code_str += f"""import pytest
from thunder.benchmarks.targets import parametrize_compute_type_only_training, benchmark_for_compute_type
{EXECUTOR_DICT_CODE_STR}
"""
if has_cuda_args:
Expand All @@ -803,9 +836,11 @@ def torch_inductor(fn, inputs):
"executor,",
executors,
ids=executor_ids,
)"""
func_str = f"def test_{graph_name}(benchmark, executor):\n{readable}\n"
)
@parametrize_compute_type_only_training"""
func_str = f"def test_{graph_name}(benchmark, executor, compute_type):\n{readable}\n"
else:
code_str += f"\n{EXECUTOR_DICT_CODE_STR}{COMMAND_LINE_ARGS}"
func_str = f"def test_{graph_name}():\n{readable}\n"

if any(arg is None for arg in args):
Expand All @@ -820,24 +855,15 @@ def torch_inductor(fn, inputs):
func_str += f"{_addindent(input_str, 4)}\n]\n"

if not use_pytest_benchmark:
func_str += f"compiled = thunder.jit(DynamoModule(), {thunder_options_str})\n"
func_str += "thunder_result = compiled(*inputs)"
if check_consistency:
func_str += f"""
{TORCH_INDUCTOR_FUNCTION_STR}
eager_result = DynamoModule()(*inputs)\n
inductor_result = torch_inductor(DynamoModule(), inputs)(*inputs)
def check_assertion(expected, actual):
try:
torch.testing.assert_close(expected, actual)
return None # No exception, return None
except AssertionError as e:
return e # Return the caught exception
result = {{}}
result["thunder"] = check_assertion(eager_result, thunder_result)
result["torch_inductor"] = check_assertion(eager_result, inductor_result)
print(result)
func_str += f"""
mod = DynamoModule()
from thunder.dynamo.report import run_repro
result = run_repro(bench_executors_dict, ex_name, mod, compute_type, *inputs)
if check_acc:
eager_result = run_repro(bench_executors_dict, "eager", mod, compute_type, *inputs)
for (compute_t, eager_v), (_, cur_v) in zip(eager_result.items(), result.items()):
torch.testing.assert_close(eager_v, cur_v, msg=lambda e : f'{{compute_t}}: {{e}}')
"""
else:
func_str = f"""{func_str}
Expand All @@ -848,18 +874,31 @@ def check_assertion(expected, actual):
compiled = executor(mod, inputs)
else:
compiled = executor(mod)
"""
if not has_cuda_args:
func_str += f"""benchmark(compiled, *inputs)"""
else:
func_str += f"""from thunder.benchmarks import record_peak_allocated_memory
with record_peak_allocated_memory(benchmark):
benchmark(compiled, *inputs)
benchmark_for_compute_type(compute_type, benchmark, compiled, inputs, {{}})
"""
print(comment_str, file=f)
print(code_str, file=f)
print(_addindent(func_str, 4), file=f)

if not use_pytest_benchmark:
print(f"\ntest_{graph_name}()", file=f)


def run_backward(fn, *args, **kwargs):
result = fn(*args, **kwargs)
result = sequencify(result)

forward_inputs = tree_flatten((args, kwargs))[0]
forward_inputs = list(filter(lambda x: isinstance(x, torch.Tensor) and x.requires_grad, forward_inputs))
differentiable_tensor_result = list(filter(lambda x: isinstance(x, torch.Tensor) and x.requires_grad, result))

output_grads = []
for diff_result in differentiable_tensor_result:
output_grads.append(torch.ones_like(diff_result))

for i in forward_inputs:
i.grad = None

torch.autograd.backward(result, output_grads, inputs=forward_inputs)
return result, [t.grad for t in forward_inputs]
6 changes: 5 additions & 1 deletion thunder/tests/test_dynamo.py
Original file line number Diff line number Diff line change
Expand Up @@ -1042,7 +1042,7 @@ def foo(x):
out = thfoo(t0)
assert out.device.type == "meta"


@requiresCUDA
def test_report(tmp_path):
def foo(x):
Expand All @@ -1051,6 +1051,10 @@ def foo(x):
return y + x.cos()

x = torch.randn(4, 4, device="cuda", requires_grad=True)
# cf = thunderfx(foo)
# cf(x)
# cf._backend.save_reproducer_to_folder(tmp_path)

thunderfx_save_report(foo, x, folder_path=tmp_path)

from unittest.mock import patch
Expand Down

0 comments on commit 3c5d713

Please sign in to comment.