From 4409faad5862c74f3a96a16b9f92ced5fa1a31c5 Mon Sep 17 00:00:00 2001 From: Nicholas Sarkauskas Date: Tue, 14 Jan 2025 13:12:44 -0800 Subject: [PATCH] review feedback --- CMakeLists.txt | 8 +++-- csrc/host_ir/executor.cpp | 6 ++-- csrc/host_ir/host_ir.cpp | 14 ++++----- csrc/host_ir/host_ir.h | 14 ++++----- ...ation.cpp => test_host_ir_integration.cpp} | 29 +++++++++---------- 5 files changed, 32 insertions(+), 39 deletions(-) rename tests/cpp/{test_multidevice_host_ir_integration.cpp => test_host_ir_integration.cpp} (63%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6c70757128d..9f182892ff1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -661,7 +661,6 @@ if(BUILD_TEST) ${NVFUSER_ROOT}/tests/cpp/test_multidevice_communications.cpp ${NVFUSER_ROOT}/tests/cpp/test_multidevice_communicator.cpp ${NVFUSER_ROOT}/tests/cpp/test_multidevice_host_ir.cpp - ${NVFUSER_ROOT}/tests/cpp/test_multidevice_host_ir_integration.cpp ${NVFUSER_ROOT}/tests/cpp/test_multidevice_lower_communication.cpp ${NVFUSER_ROOT}/tests/cpp/test_multidevice_matmul.cpp ${NVFUSER_ROOT}/tests/cpp/test_multidevice_pipeline.cpp @@ -700,7 +699,12 @@ if(BUILD_TEST) add_test(tutorial "${NVFUSER_ROOT}/tests/cpp/test_tutorial.cpp" "") list(APPEND TEST_BINARIES tutorial) - add_test(test_host_ir "${NVFUSER_ROOT}/tests/cpp/test_host_irs.cpp" "") + set(HOSTIR_TEST_SRCS) + list(APPEND HOSTIR_TEST_SRCS + ${NVFUSER_ROOT}/tests/cpp/test_host_irs.cpp + ${NVFUSER_ROOT}/tests/cpp/test_host_ir_integration.cpp + ) + add_test(test_host_ir "${HOSTIR_TEST_SRCS}" "") list(APPEND TEST_BINARIES test_host_ir) if(BUILD_PYTHON) diff --git a/csrc/host_ir/executor.cpp b/csrc/host_ir/executor.cpp index baffa5cfbdf..0f9f3da6921 100644 --- a/csrc/host_ir/executor.cpp +++ b/csrc/host_ir/executor.cpp @@ -313,11 +313,9 @@ void HostIrEvaluator::handle(LaunchKernel* launch_kernel) { args.push(input_evaluation); } - // placeholder for storing the outputs - std::vector outputs; - // run the compiled kernel - outputs = container_->getKernelExecutor(launch_kernel->getIndex())->run(args); + std::vector outputs = + container_->getKernelExecutor(launch_kernel->getIndex())->run(args); // Store the outputs in the context for (auto output_idx : c10::irange(outputs.size())) { diff --git a/csrc/host_ir/host_ir.cpp b/csrc/host_ir/host_ir.cpp index afe3433cc4d..780da3ca190 100644 --- a/csrc/host_ir/host_ir.cpp +++ b/csrc/host_ir/host_ir.cpp @@ -121,7 +121,7 @@ bool PostOnStream::sameAs(const Statement* other) const { LaunchKernel::LaunchKernel( IrBuilderPasskey passkey, - int hic_executor_index, + int64_t hic_executor_index, std::vector inputs, std::vector outputs) : Expr(passkey, std::move(inputs), std::move(outputs), {}), @@ -131,12 +131,12 @@ NVFUSER_DEFINE_CLONE_AND_CREATE(LaunchKernel) std::string LaunchKernel::toString(int indent_size) const { std::stringstream ss; - indent(ss, indent_size) << "LaunchKernel (" - << "Inputs:{"; + indent(ss, indent_size) << "LaunchKernel(" + << "Inputs: {"; std::for_each(inputs().begin(), inputs().end(), [&ss](auto input) { ss << input->toString(0) << ", "; }); - ss << "}, Outputs:{"; + ss << "}, Outputs: {"; std::for_each(outputs().begin(), outputs().end(), [&ss](auto output) { ss << output->toString(0) << ", "; }); @@ -144,7 +144,7 @@ std::string LaunchKernel::toString(int indent_size) const { return ss.str(); } -int LaunchKernel::getIndex() const { +int64_t LaunchKernel::getIndex() const { return hic_executor_index_; } @@ -152,10 +152,6 @@ std::string LaunchKernel::toInlineString(int indent_size) const { NVF_CHECK(false, "Can not be printed inline"); } -bool LaunchKernel::sameAs(const Statement* other) const { - return false; -} - Stream::Stream(IrBuilderPasskey passkey, Val* index) : Val(passkey, ValType::Stream), index_(index) {} diff --git a/csrc/host_ir/host_ir.h b/csrc/host_ir/host_ir.h index 640183c38e5..479b7ac2ef6 100644 --- a/csrc/host_ir/host_ir.h +++ b/csrc/host_ir/host_ir.h @@ -120,7 +120,9 @@ class LaunchKernel : public Expr { using Expr::Expr; LaunchKernel( IrBuilderPasskey passkey, - int hic_executor_index, // TODO + int64_t hic_executor_index, // Index into the HostIrContainer's vector of + // KernelExecutors--i.e., the kernel this IR + // should launch std::vector inputs, std::vector outputs); @@ -137,15 +139,9 @@ class LaunchKernel : public Expr { return "hir::LaunchKernel"; } - int getIndex() const; + int64_t getIndex() const; - bool sameAs(const Statement* other) const override; - - Expr* hostOpToPost() const { - return attributes_.at(0)->as(); - } - - int hic_executor_index_; + int64_t hic_executor_index_; }; class Stream : public Val { diff --git a/tests/cpp/test_multidevice_host_ir_integration.cpp b/tests/cpp/test_host_ir_integration.cpp similarity index 63% rename from tests/cpp/test_multidevice_host_ir_integration.cpp rename to tests/cpp/test_host_ir_integration.cpp index 16c21b92461..d17a5d8f2b9 100644 --- a/tests/cpp/test_multidevice_host_ir_integration.cpp +++ b/tests/cpp/test_host_ir_integration.cpp @@ -1,6 +1,6 @@ // clang-format off /* -* SPDX-FileCopyrightText: Copyright (c) 2023-present NVIDIA CORPORATION & AFFILIATES. +* SPDX-FileCopyrightText: Copyright (c) 2025-present NVIDIA CORPORATION & AFFILIATES. * All rights reserved. * SPDX-License-Identifier: BSD-3-Clause */ @@ -10,13 +10,15 @@ #include #include #include -#include +#include namespace nvfuser { namespace hir { -TEST_F(MultiDeviceTest, LaunchKernel) { +using HostIrIntegrationTest = NVFuserTest; + +TEST_F(HostIrIntegrationTest, LaunchKernel) { Fusion fusion; FusionGuard fg(&fusion); TensorView* tv0 = makeSymbolicTensor(2); @@ -40,26 +42,23 @@ TEST_F(MultiDeviceTest, LaunchKernel) { auto tv2 = ir_cloner.clone(tv0); auto tv3 = ir_cloner.clone(tv1); - std::vector lk_inputs = {tv2}; - std::vector lk_outputs = {tv3}; + std::vector launch_kernel_inputs = {tv2}; + std::vector launch_kernel_outputs = {tv3}; - hic->addInput(lk_inputs.back()); - hic->addOutput(lk_outputs.back()); + hic->addInput(launch_kernel_inputs.back()); + hic->addOutput(launch_kernel_outputs.back()); - auto launch_kernel = - IrBuilder::create(0, lk_inputs, lk_outputs); + auto launch_kernel = IrBuilder::create( + 0, launch_kernel_inputs, launch_kernel_outputs); hic->pushBackTopLevelExprs(launch_kernel); - HostIrEvaluatorParams params; - params.use_fusion_executor_cache = false; - HostIrEvaluator hie(std::move(hic), communicator_, params); + HostIrEvaluator hie(std::move(hic)); at::Tensor output = at::empty({32, 32}, options); - auto outputs = - hie.runWithInput({{lk_inputs.back(), t0}, {lk_outputs.back(), output}}); + auto outputs = hie.runWithInput({{tv2, t0}, {tv3, output}}); - ASSERT_TRUE(outputs[0].equal(t0)); + EXPECT_TRUE(outputs[0].equal(t0)); } } // namespace hir