diff --git a/.bazelci/presubmit.yml b/.bazelci/presubmit.yml index c57d817954..a3ecd590ba 100644 --- a/.bazelci/presubmit.yml +++ b/.bazelci/presubmit.yml @@ -78,6 +78,18 @@ tasks: working_directory: examples test_targets: - //... + examples-worker: + name: Examples Persistent Worker + platform: ubuntu1804 + working_directory: examples + build_flags: + - "--@rules_rust//rust:experimental-use-worker" + - "--worker_verbose" + test_flags: + - "--@rules_rust//rust:experimental-use-worker" + - "--worker_verbose" + test_targets: + - //... docs_linux: name: Docs platform: ubuntu1804 diff --git a/docs/index.md b/docs/index.md index 8227e4ec44..1a7e770f2f 100644 --- a/docs/index.md +++ b/docs/index.md @@ -90,3 +90,18 @@ bazel build @examples//hello_world_wasm --platforms=@rules_rust//rust/platform:w `rust_wasm_bindgen` will automatically transition to the `wasm` platform and can be used when building WebAssembly code for the host target. + +## Persistent Worker support + +The rules ship with a persistent worker implementation that uses Rust's [incremental compilation](https://doc.rust-lang.org/edition-guide/rust-2018/the-compiler/incremental-compilation-for-faster-compiles.html) for faster build times when iterating on code. + +To enable this: + +Enable the protobuf rules by adding this to your WORKSPACE + +``` +load("@rules_rust//proto:repositories.bzl", "rust_proto_repositories") +rust_proto_repositories() +``` + +In your build command, use the flag `--@rules_rust//rust:experimental-use-worker`. Optionally you can add this flag to your `.bazelrc` to always use this. diff --git a/rust/BUILD.bazel b/rust/BUILD.bazel index d5c8d38e30..3fc0bde42b 100644 --- a/rust/BUILD.bazel +++ b/rust/BUILD.bazel @@ -1,4 +1,5 @@ load("@bazel_skylib//:bzl_library.bzl", "bzl_library") +load("@bazel_skylib//rules:common_settings.bzl", "bool_flag") package(default_visibility = ["//visibility:public"]) @@ -22,3 +23,8 @@ bzl_library( "//rust/private:rules", ], ) + +bool_flag( + name = "experimental-use-worker", + build_setting_default = False, +) diff --git a/rust/private/rust.bzl b/rust/private/rust.bzl index fe92e54de3..a88be4edd1 100644 --- a/rust/private/rust.bzl +++ b/rust/private/rust.bzl @@ -647,12 +647,19 @@ _common_attrs = { default = "@bazel_tools//tools/cpp:current_cc_toolchain", ), "_error_format": attr.label(default = "//:error_format"), + "_persistent_worker": attr.label( + default = Label("//util/worker"), + executable = True, + allow_single_file = True, + cfg = "exec", + ), "_process_wrapper": attr.label( default = Label("//util/process_wrapper"), executable = True, allow_single_file = True, cfg = "exec", ), + "_use_worker": attr.label(default = Label("//rust:experimental-use-worker")), } _rust_test_attrs = { diff --git a/rust/private/rustc.bzl b/rust/private/rustc.bzl index f8a8f8acc7..f4d24a0f3d 100644 --- a/rust/private/rustc.bzl +++ b/rust/private/rustc.bzl @@ -13,6 +13,7 @@ # limitations under the License. # buildifier: disable=module-docstring +load("@bazel_skylib//rules:common_settings.bzl", "BuildSettingInfo") load( "@bazel_tools//tools/build_defs/cc:action_names.bzl", "CPP_LINK_EXECUTABLE_ACTION_NAME", @@ -55,6 +56,9 @@ ErrorFormatInfo = provider( fields = {"error_format": "(string) [" + ", ".join(_error_format_values) + "]"}, ) +def _use_worker(ctx): + return hasattr(ctx.attr, "_use_worker") and ctx.attr._use_worker[BuildSettingInfo].value + def _get_rustc_env(ctx, toolchain): """Gathers rustc environment variables @@ -355,6 +359,9 @@ def construct_arguments( # Wrapper args first args = ctx.actions.args() + if _use_worker(ctx): + args.set_param_file_format("multiline") + args.use_param_file("@%s", use_always = True) for build_env_file in build_env_files: args.add("--env-file", build_env_file) @@ -554,12 +561,30 @@ def rustc_compile_action( else: formatted_version = "" + executable = ctx.executable._process_wrapper + tools = [] + arguments = [args] + execution_requirements = {} + if _use_worker(ctx): + tools = [executable] + arguments = [ + "--compiler", executable.path, + "--compilation_mode", ctx.var["COMPILATION_MODE"], + args, + ] + executable = ctx.executable._persistent_worker + execution_requirements = { + "requires-worker-protocol": "proto", + "supports-workers": "1", + } + ctx.actions.run( - executable = ctx.executable._process_wrapper, + executable = executable, inputs = compile_inputs, outputs = [crate_info.output], env = env, - arguments = [args], + tools = tools, + arguments = arguments, mnemonic = "Rustc", progress_message = "Compiling Rust {} {}{} ({} files)".format( crate_info.type, @@ -567,6 +592,7 @@ def rustc_compile_action( formatted_version, len(crate_info.srcs.to_list()), ), + execution_requirements = execution_requirements, ) dylibs = [get_preferred_artifact(lib) for linker_input in dep_info.transitive_noncrates.to_list() for lib in linker_input.libraries if _is_dylib(lib)] diff --git a/util/process_wrapper/BUILD.bazel b/util/process_wrapper/BUILD.bazel index 4b600369ee..a27603f26e 100644 --- a/util/process_wrapper/BUILD.bazel +++ b/util/process_wrapper/BUILD.bazel @@ -24,3 +24,11 @@ cc_binary( }), visibility = ["//visibility:public"], ) + +exports_files([ + "utils.h", + "utils.cc", + "system.h", + "system_windows.cc", + "system_posix.cc", +]) diff --git a/util/worker/BUILD b/util/worker/BUILD new file mode 100644 index 0000000000..72c66430b9 --- /dev/null +++ b/util/worker/BUILD @@ -0,0 +1,38 @@ +load("@rules_cc//cc:defs.bzl", "cc_binary") +load("@rules_proto//proto:defs.bzl", "proto_library"); + +cc_binary( + name = "worker", + srcs = [ + "worker.cc", + "//util/process_wrapper:system.h", + "//util/process_wrapper:utils.h", + "//util/process_wrapper:utils.cc", + ] + select({ + "@bazel_tools//src/conditions:host_windows": [ + "//util/process_wrapper:system_windows.cc", + ], + "//conditions:default": [ + "//util/process_wrapper:system_posix.cc", + ], + }), + deps = [":worker_cc_proto"], + defines = [] + select({ + "@bazel_tools//src/conditions:host_windows": [ + "UNICODE", + "_UNICODE", + ], + "//conditions:default": [], + }), + visibility = ["//visibility:public"], +) + +cc_proto_library( + name = "worker_cc_proto", + deps = [":worker_protocol"], +) + +proto_library( + name = "worker_protocol", + srcs = ["worker_protocol.proto"], +) diff --git a/util/worker/worker.cc b/util/worker/worker.cc new file mode 100644 index 0000000000..1453f86f3d --- /dev/null +++ b/util/worker/worker.cc @@ -0,0 +1,234 @@ +// Copyright 2020 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "util/process_wrapper/system.h" +#include "util/process_wrapper/utils.h" +#include "util/worker/worker_protocol.pb.h" + +using blaze::worker::WorkRequest; +using blaze::worker::WorkResponse; +using google::protobuf::io::CodedInputStream; +using google::protobuf::io::CodedOutputStream; +using google::protobuf::io::FileInputStream; +using google::protobuf::io::FileOutputStream; + +using namespace process_wrapper; + +bool ReadRequest(FileInputStream *input, WorkRequest &request) +{ + uint32_t req_len; + CodedInputStream stream(input); + if (!stream.ReadVarint32(&req_len)) { + std::cerr << "Unable to read message length\n"; + return false; + } + + CodedInputStream::Limit limit = stream.PushLimit(req_len); + if (!request.MergeFromCodedStream(&stream)) { + std::cerr << "Unable to merge from stream\n"; + return false; + } + + if (!stream.ConsumedEntireMessage()) { + std::cerr << "Did not consume entire message\n"; + return false; + } + + stream.PopLimit(limit); + return true; +} + +bool HandleRequest( + const WorkRequest &request, + WorkResponse &response, + const System::StrType& exec_path, + const System::StrType& compilation_mode, + const System::EnvironmentBlock& environment_block +) { + System::Arguments arguments; + // Pre-allocate. +2 for the incremental argument. + arguments.reserve(request.arguments_size() + 2); + + auto request_args = request.arguments(); + std::string target; + for (int i = 0; i < request.arguments_size(); i++) { + auto argument = request.arguments(i); + // Starts with + if (argument.rfind("--target=", 0) != std::string::npos) { + target = argument.substr(9) + '/'; + } + arguments.push_back(argument); + } + + // Considering + // https://github.com/rust-lang/rust/blob/673d0db5e393e9c64897005b470bfeb6d5aec61b/compiler/rustc_incremental/src/persist/fs.rs#L145 + // as the canonical description of how incremental compilation is affected by + // the choice of directory, it helps to segment based on compilation mode. + // That prevents the GC phase from clearing the cache of a debug build when running an opt build. + arguments.push_back("--codegen"); + // TODO: Can be shared across requests to avoid concatenation. + arguments.push_back("incremental=" + System::GetWorkingDirectory() + "/rustc-target/" + target + compilation_mode + "/incremental"); + + // Since the worker is not multiplexed, we can always log to the same file and overwrite on the next request. + System::StrType stdout_file = System::GetWorkingDirectory() + "/stdout.log"; + System::StrType stderr_file = System::GetWorkingDirectory() + "/stderr.log"; + + int exit_code = System::Exec(exec_path, arguments, environment_block, + stdout_file, stderr_file); + std::ifstream source(stderr_file, std::ios::binary); + std::string stderr_output; + if (source.fail()) { + stderr_output = "[worker] Error getting stderr\n"; + } else { + std::stringstream stderr_stream; + stderr_stream << source.rdbuf(); + stderr_output = stderr_stream.str(); + } + + response.set_exit_code(exit_code); + response.set_request_id(request.request_id()); + response.set_output(std::move(stderr_output)); + return true; +} + +int RunAsWorker( + const System::StrType& exec_path, + const System::StrType& compilation_mode, + const System::EnvironmentBlock& environment_block +) { + std::unique_ptr input(new FileInputStream(0)); + std::unique_ptr output(new FileOutputStream(1)); + + while (true) { + WorkRequest request; + if (!ReadRequest(input.get(), request)) { + return 1; + } + + WorkResponse response; + if (!HandleRequest(request, response, exec_path, compilation_mode, environment_block)) { + return 1; + } + + // A CodedInputStream will try to move around the underlying buffer when destroyed. + // If we Flush stdout, that fails. So ensure it goes out of scope before we flush. + { + CodedOutputStream coded_out(output.get()); + coded_out.WriteVarint32(response.ByteSize()); + response.SerializeWithCachedSizes(&coded_out); + if (coded_out.HadError()) { + std::cerr << "Error serializing response\n"; + return 1; + } + } + output->Flush(); + } + + return 0; +} + +int RunStandalone( + const System::StrType& exec_path, + const System::EnvironmentBlock& environment_block, + const System::StrType& param_file_param +) { + if (param_file_param[0] != '@') { + std::cerr << "Param file must start with '@', got \"" << param_file_param << "\"\n"; + return -1; + } + + std::string param_file = ToUtf8(param_file_param).substr(1); + System::Arguments arguments; + + std::ifstream source(param_file); + std::string line; + while (std::getline(source, line)) { + arguments.push_back(line); + } + + std::string empty; + + return System::Exec(exec_path, arguments, environment_block, empty, empty); +} + +using CharType = process_wrapper::System::StrType::value_type; + +int PW_MAIN(int argc, const CharType* argv[], const CharType* envp[]) { + System::StrType exec_path; + System::StrType compilation_mode; + System::StrType param_file; + System::EnvironmentBlock environment_block; + // Taking all environment variables from the current process + // and sending them down to the child process + for (int i = 0; envp[i] != nullptr; ++i) { + environment_block.push_back(envp[i]); + } + + // Have the last values added take precedence over the first. + // This is simpler than needing to track duplicates and explicitly override them. + std::reverse(environment_block.begin(), environment_block.end()); + + // This will need support for understanding param file argument. + // As well as parsing other flags generally. + + bool as_worker = false; + + // Processing current process argument list until -- is encountered + // everthing after gets sent down to the child process + for (int i = 1; i < argc; ++i) { + System::StrType arg = argv[i]; + if (arg == PW_SYS_STR("--persistent_worker")) { + as_worker = true; + } else if (arg == PW_SYS_STR("--compilation_mode")) { + if (++i == argc) { + std::cerr << "--compilation_mode flag missing argument\n"; + return -1; + } + compilation_mode = argv[i]; + } else if (arg == PW_SYS_STR("--compiler")) { + if (++i == argc) { + std::cerr << "--compiler flag missing argument\n"; + return -1; + } + exec_path = argv[i]; + } else if (arg[0] == '@') { + param_file = arg; + } else { + std::cerr << "worker wrapper error: unknown argument \"" << ToUtf8(arg) + << "\"." << '\n'; + return -1; + } + } + + if (as_worker) { + if (!param_file.empty()) { + std::cerr << "Param file argument \"" << param_file << "\" not supported in worker mode\n"; + return -1; + } + return RunAsWorker(exec_path, compilation_mode, environment_block); + } else { + return RunStandalone(exec_path, environment_block, param_file); + } +} diff --git a/util/worker/worker_protocol.proto b/util/worker/worker_protocol.proto new file mode 100644 index 0000000000..c628b7eb7a --- /dev/null +++ b/util/worker/worker_protocol.proto @@ -0,0 +1,62 @@ +// Copyright 2015 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package blaze.worker; + +option java_package = "com.google.devtools.build.lib.worker"; + +// An input file. +message Input { + // The path in the file system where to read this input artifact from. This is + // either a path relative to the execution root (the worker process is + // launched with the working directory set to the execution root), or an + // absolute path. + string path = 1; + + // A hash-value of the contents. The format of the contents is unspecified and + // the digest should be treated as an opaque token. + bytes digest = 2; +} + +// This represents a single work unit that Blaze sends to the worker. +message WorkRequest { + repeated string arguments = 1; + + // The inputs that the worker is allowed to read during execution of this + // request. + repeated Input inputs = 2; + + // To support multiplex worker, each WorkRequest must have an unique ID. This + // ID should be attached unchanged to the WorkResponse. + int32 request_id = 3; +} + +// The worker sends this message to Blaze when it finished its work on the +// WorkRequest message. +message WorkResponse { + int32 exit_code = 1; + + // This is printed to the user after the WorkResponse has been received and is + // supposed to contain compiler warnings / errors etc. - thus we'll use a + // string type here, which gives us UTF-8 encoding. + string output = 2; + + // To support multiplex worker, each WorkResponse must have an unique ID. + // Since worker processes which support multiplex worker will handle multiple + // WorkRequests in parallel, this ID will be used to determined which + // WorkerProxy does this WorkResponse belong to. + int32 request_id = 3; +}