From dc0d88dd9d3ba32b7018a6c75938b357e559403f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A9lix=20Saparelli?= Date: Thu, 26 Oct 2023 17:32:32 +1300 Subject: [PATCH] Use and pass jobserver instead of limiting cores directly (#1338) Currently cargo-pgrx uses both rayon and some manual "max(1, cores / 3)" logic to use multiple cores without overwhelming the system. This PR ditches these two mechanisms in favour of the jobserver concept, implemented by the jobslot crate. That is an implementation of the `make` jobserver protocol. Basically a jobserver controls access to a limited pool of job slots: to do some compute, you grab a slot, blocking until one becomes available if necessary. A process in the jobserver context passes down the jobserver handle to subprocesses, who, if they're aware (like `make` is), thus cooperate together. In this PR, cargo-pgrx: - passes the jobserver handle to all calls to `make` - grabs a slot for all *compute-bound* steps (ie untar and configure) - *doesn't* grab a slot for *network-bound* steps (ie downloads) - starts a thread per `pg_config` resolution task This naturally and granularly distributes tasks across all available cores, without restricting each `make` to a fixed amount of jobs. Notably, if eg building pg12 and pg13, and pg12 finishes completely, the `make` for pg13 may scale up to use the freed-up cores. --- Before, with 4 cores, it would not even start downloading/untarring pg16 until one of the compiles was done. After, 5 downloads happen and then it interleaves 4 compute tasks. --- Cargo.lock | 34 +++++----- cargo-pgrx/Cargo.toml | 3 +- cargo-pgrx/README.md | 86 ++++++++++++------------- cargo-pgrx/src/command/init.rs | 111 ++++++++++++++++++++------------- 4 files changed, 126 insertions(+), 108 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cf1cc7525..2ed7f4930 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -152,7 +152,7 @@ version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" dependencies = [ - "hermit-abi 0.1.19", + "hermit-abi", "libc", "winapi", ] @@ -360,9 +360,9 @@ dependencies = [ "eyre", "flate2", "fork", + "jobslot", "libloading 0.8.1", "nix", - "num_cpus", "object", "once_cell", "owo-colors", @@ -371,7 +371,6 @@ dependencies = [ "prettyplease", "proc-macro2", "quote", - "rayon", "regex", "semver 1.0.20", "serde", @@ -1060,12 +1059,6 @@ dependencies = [ "libc", ] -[[package]] -name = "hermit-abi" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7" - [[package]] name = "hmac" version = "0.12.1" @@ -1123,6 +1116,19 @@ version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" +[[package]] +name = "jobslot" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d03a2fc28c7a19e689b7e8c3f209c77dac23133d235a80cd9c3f400ecd9a73f" +dependencies = [ + "cfg-if", + "getrandom", + "libc", + "scopeguard", + "windows-sys", +] + [[package]] name = "js-sys" version = "0.3.64" @@ -1380,16 +1386,6 @@ dependencies = [ "libm", ] -[[package]] -name = "num_cpus" -version = "1.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" -dependencies = [ - "hermit-abi 0.3.3", - "libc", -] - [[package]] name = "numeric" version = "0.0.0" diff --git a/cargo-pgrx/Cargo.toml b/cargo-pgrx/Cargo.toml index c4fe4323d..f20c29c8a 100644 --- a/cargo-pgrx/Cargo.toml +++ b/cargo-pgrx/Cargo.toml @@ -32,13 +32,11 @@ clap-cargo = { version = "0.11.0", features = [ "cargo_metadata" ] } semver = "1.0.20" owo-colors = { version = "3.5.0", features = [ "supports-colors" ] } env_proxy = "0.4.1" -num_cpus = "1.16.0" pgrx-pg-config = { path = "../pgrx-pg-config", version = "=0.11.0" } pgrx-sql-entity-graph = { path = "../pgrx-sql-entity-graph", version = "=0.11.0" } prettyplease = "0.2.15" proc-macro2 = { version = "1.0.69", features = [ "span-locations" ] } quote = "1.0.33" -rayon = "1.8.0" regex = "1.10.0" ureq = "2.8.0" url = "2.4.1" @@ -60,6 +58,7 @@ flate2 = { version = "1.0.27", default-features = false, features = ["rust_backe tempfile = "3.8.0" nix = { version = "0.27", default-features = false, features = ["user"] } toml = "0.8.2" +jobslot = "0.2.12" bzip2 = "0.4.4" tar = "0.4.40" diff --git a/cargo-pgrx/README.md b/cargo-pgrx/README.md index 22e008bf4..a53eadbb7 100644 --- a/cargo-pgrx/README.md +++ b/cargo-pgrx/README.md @@ -10,7 +10,7 @@ A video walkthrough of its abilities can be found here: https://www.twitch.tv/vi Install via crates.io: -```shell script +```console $ cargo install --locked cargo-pgrx ``` @@ -18,7 +18,7 @@ As new versions of `pgrx` are released, you'll want to make sure you run this co ## Usage -```shell script +```console $ cargo pgrx --help Cargo subcommand for 'pgrx' to make Postgres extension development easy @@ -58,7 +58,7 @@ Options: ## First Time Initialization -```shell script +```console $ cargo pgrx init Discovered Postgres v15.0, v14.5, v13.8, v12.12, v11.17 Downloading Postgres v15.0 from https://ftp.postgresql.org/pub/source/v15.0/postgresql-15.0.tar.bz2 @@ -100,7 +100,7 @@ $ cargo pgrx init `cargo pgrx init` is required to be run once to properly configure the `pgrx` development environment. -As shown by the screenshot above, it downloads the latest versions of Postgres v11, v12, v13, v14, v15, configures them, compiles them, and installs them to `~/.pgrx/`, including all [`contrib`](https://www.postgresql.org/docs/current/contrib.html) extensions and tools included with Postgres. Other `pgrx` commands such as `run` and `test` will fully manage and otherwise use these Postgres installations for you. +As shown by the screenshot above, it downloads the latest releases of supported Postgres versions, configures them, compiles them, and installs them to `~/.pgrx/`, including all [`contrib`](https://www.postgresql.org/docs/current/contrib.html) extensions and tools included with Postgres. Other `pgrx` commands such as `run` and `test` will fully manage and otherwise use these Postgres installations for you. `pgrx` is designed to support multiple Postgres versions in such a way that during development, you'll know if you're trying to use a Postgres API that isn't common across all versions. It's also designed to make testing your extension against these versions easy. This is why it requires you to have all fully compiled and installed versions of Postgres during development. @@ -125,34 +125,34 @@ Once complete, `cargo pgrx init` also creates a configuration file (`~/.pgrx/con If a new minor Postgres version is released in the future you can simply run `cargo pgrx init [args]` again, and your local version will be updated, preserving all existing databases and configuration. -```shell script -cargo-pgrx-init 0.5.0 -PgCentral Foundation, Inc. +```console +$ cargo pgrx init -h Initialize pgrx development environment for the first time -USAGE: - cargo pgrx init [OPTIONS] +Usage: cargo pgrx init [OPTIONS] -OPTIONS: - --base-port - Base port number - - --base-testing-port - Base testing port number - - -h, --help Print help information - --pg11 If installed locally, the path to PG11's `pgconfig` tool, or `download` to - have pgrx download/compile/install it [env: PG11_PG_CONFIG=] - --pg12 If installed locally, the path to PG12's `pgconfig` tool, or `download` to - have pgrx download/compile/install it [env: PG12_PG_CONFIG=] - --pg13 If installed locally, the path to PG13's `pgconfig` tool, or `download` to - have pgrx download/compile/install it [env: PG13_PG_CONFIG=] - --pg14 If installed locally, the path to PG14's `pgconfig` tool, or `download` to - have pgrx download/compile/install it [env: PG14_PG_CONFIG=] - --pg15 If installed locally, the path to PG15's `pgconfig` tool, or `download` to - have pgrx download/compile/install it [env: PG15_PG_CONFIG=] - -v, --verbose Enable info logs, -vv for debug, -vvv for trace - -V, --version Print version information +Options: + --pg11 If installed locally, the path to PG11's `pgconfig` tool, or `download` + to have pgrx download/compile/install it [env: PG11_PG_CONFIG=] + --pg12 If installed locally, the path to PG12's `pgconfig` tool, or `download` + to have pgrx download/compile/install it [env: PG12_PG_CONFIG=] + --pg13 If installed locally, the path to PG13's `pgconfig` tool, or `download` + to have pgrx download/compile/install it [env: PG13_PG_CONFIG=] + --pg14 If installed locally, the path to PG14's `pgconfig` tool, or `download` + to have pgrx download/compile/install it [env: PG14_PG_CONFIG=] + --pg15 If installed locally, the path to PG15's `pgconfig` tool, or `download` + to have pgrx download/compile/install it [env: PG15_PG_CONFIG=] + --pg16 If installed locally, the path to PG16's `pgconfig` tool, or `download` + to have pgrx download/compile/install it [env: PG16_PG_CONFIG=] + --base-port Base port number + --base-testing-port Base testing port number + --configure-flag Additional flags to pass to the configure script + --valgrind Compile PostgreSQL with the necessary flags to detect a good amount of + memory errors when run under Valgrind + -j, --jobs Allow N make jobs at once + -h, --help Print help (see more with '--help') + -v, --verbose... Enable info logs, -vv for debug, -vvv for trace + -V, --version Print version ``` ## Creating a new Extension @@ -173,7 +173,7 @@ If you'd like to create a "background worker" instead, specify the `--bgworker` > > If you don't, you may experience unnecessary rebuilds using tools like Rust-Analyzer, as it will use the wrong `rustflags` option. -```shell script +```console $ cargo pgrx new --help cargo-pgrx-new 0.5.0 PgCentral Foundation, Inc. @@ -194,7 +194,7 @@ OPTIONS: ## Managing Your Postgres Installations -```shell script +```console $ cargo pgrx status all Postgres v11 is stopped Postgres v12 is stopped @@ -234,7 +234,7 @@ Once started, you can connect to them using `psql` (if you have it on your $PATH ## Compiling and Running Your Extension -```shell script +```console $ cargo pgrx run pg13 building extension with features `` "cargo" "build" "--message-format=json-render-diagnostics" @@ -289,7 +289,7 @@ When you exit `psql`, the Postgres instance continues to run in the background. For Postgres installations which are already on your computer, `cargo pgrx run` will need write permissions to the directories described by `pg_config --pkglibdir` and `pg_config --sharedir`. It's up to you to decide how to make that happen. While a single Postgres installation can be started multiple times on different ports and different data directories, it does not support multiple "extension library directories". -```shell script +```console $ cargo pgrx run --help cargo-pgrx-run 0.5.0 PgCentral Foundation, Inc. @@ -341,7 +341,7 @@ OPTIONS: ## Connect to a Database -```shell script +```console $ cargo pgrx connect Re-using existing database strings psql (13.5) @@ -365,7 +365,7 @@ database name as the final argument. If the specified database doesn't exist, `cargo pgrx connect` will create it. Similarly, if the specified version of Postgres isn't running, it'll be automatically started. -```shell script +```console cargo-pgrx-connect 0.5. PgCentral Foundation, Inc. Connect, via psql, to a Postgres instance @@ -401,7 +401,7 @@ OPTIONS: ## Installing Your Extension Locally -```shell script +```console $ cargo pgrx install building extension with features `` "cargo" "build" "--message-format=json-render-diagnostics" @@ -427,7 +427,7 @@ files to their proper location using `sudo`, prompting you for your password. By default, `cargo pgrx install` builds your extension in debug mode. Specifying `--release` changes that. -```shell script +```console $ cargo pgrx install --help cargo-pgrx-install 0.5.0 PgCentral Foundation, Inc. @@ -480,7 +480,7 @@ OPTIONS: ## Testing Your Extension -```shell script +```console $ cargo pgrx test "cargo" "test" "--features" " pg_test" Finished test [unoptimized + debuginfo] target(s) in 0.07s @@ -519,7 +519,7 @@ Rust `#[test]` functions behave normally, while `#[pg_test]` functions are run * Additionally, a `#[pg_test]` function runs in a transaction that is aborted when the test is finished. As such, any changes it might make to the database are not preserved. -```shell script +```console cargo-pgrx-test 0.5.0 PgCentral Foundation, Inc. Run the test suite for this crate @@ -569,7 +569,7 @@ OPTIONS: ## Building an Installation Package -```shell script +```console $ cargo pgrx package building extension with features `` "cargo" "build" "--release" "--message-format=json-render-diagnostics" @@ -601,7 +601,7 @@ version of Postgres 12.) This command could be useful from Dockerfiles, for example, to automate building installation packages for various Linux distobutions or MacOS Postgres installations. -```shell script +```console $ cargo pgrx package --help cargo-pgrx-package 0.5.0 PgCentral Foundation, Inc. @@ -657,7 +657,7 @@ OPTIONS: If you just want to look at the full extension schema that pgrx will generate, use `cargo pgrx schema`. -```shell script +```console $ cargo pgrx schema --help cargo-pgrx-schema 0.5.0 PgCentral Foundation, Inc. @@ -844,7 +844,7 @@ Although this may be relaxed in the future, currently schema generation involves `#[repr(Rust)]` types. Generally, the appropriate way to fix this is reinstall `cargo-pgrx`, using a command like the following -```shell script +```console $ cargo install --force --locked cargo-pgrx ``` diff --git a/cargo-pgrx/src/command/init.rs b/cargo-pgrx/src/command/init.rs index 0640a366e..0f1bba6bb 100644 --- a/cargo-pgrx/src/command/init.rs +++ b/cargo-pgrx/src/command/init.rs @@ -16,16 +16,15 @@ use owo_colors::OwoColorize; use pgrx_pg_config::{ get_c_locale_flags, prefix_path, ConfigToml, PgConfig, PgConfigSelector, Pgrx, PgrxHomeError, }; -use rayon::prelude::*; use tar::Archive; use std::collections::HashMap; use std::fs::File; use std::io::{Read, Write}; +use std::num::NonZeroUsize; use std::path::PathBuf; use std::process::{Command, Stdio}; - -use std::sync::{Arc, Mutex}; +use std::sync::OnceLock; static PROCESS_ENV_DENYLIST: &'static [&'static str] = &[ "DEBUG", @@ -79,11 +78,28 @@ pub(crate) struct Init { /// installed, but the resulting build is usable without valgrind. #[clap(long)] valgrind: bool, + #[clap(long, short, help = "Allow N make jobs at once")] + jobs: Option, + #[clap(skip)] + jobserver: OnceLock, } impl CommandExecute for Init { #[tracing::instrument(level = "error", skip(self))] fn execute(self) -> eyre::Result<()> { + self.jobserver + .set( + jobslot::Client::new( + self.jobs + .or_else(|| { + std::thread::available_parallelism().map(NonZeroUsize::get).ok() + }) + .unwrap_or(1), + ) + .expect("failed to create jobserver"), + ) + .unwrap(); + let mut versions = HashMap::new(); if let Some(ref version) = self.pg11 { @@ -161,39 +177,36 @@ pub(crate) fn init_pgrx(pgrx: &Pgrx, init: &Init) -> eyre::Result<()> { }, }; - let output_configs = Arc::new(Mutex::new(Vec::new())); - - let mut pg_configs = Vec::new(); - for pg_config in pgrx.iter(PgConfigSelector::All) { - pg_configs.push(pg_config?); - } - - let span = tracing::Span::current(); - pg_configs - .into_par_iter() - .map(|pg_config| { - let _span = span.clone().entered(); - let mut pg_config = pg_config.clone(); - stop_postgres(&pg_config).ok(); // no need to fail on errors trying to stop postgres while initializing - if !pg_config.is_real() { - pg_config = match download_postgres(&pg_config, &pgrx_home, init) { - Ok(pg_config) => pg_config, - Err(e) => return Err(eyre!(e)), + let mut output_configs = std::thread::scope(|s| -> eyre::Result> { + let span = tracing::Span::current(); + let mut threads = Vec::new(); + for pg_config in pgrx.iter(PgConfigSelector::All) { + let pg_config = pg_config?; + let span = span.clone(); + let pgrx_home = pgrx_home.clone(); + threads.push(s.spawn(move || { + let _span = span.entered(); + let mut pg_config = pg_config.clone(); + stop_postgres(&pg_config).ok(); // no need to fail on errors trying to stop postgres while initializing + if !pg_config.is_real() { + pg_config = match download_postgres(&pg_config, &pgrx_home, init) { + Ok(pg_config) => pg_config, + Err(e) => return Err(eyre!(e)), + } } - } - let mut mutex = output_configs.lock(); - // PoisonError doesn't implement std::error::Error, can't `?` it. - let output_configs = mutex.as_mut().expect("failed to get output_configs lock"); + Ok(pg_config) + })); + } + let mut output_configs = Vec::with_capacity(threads.len()); + for thread in threads { + let pg_config = thread.join().map_err(|_| eyre!("thread panicked"))??; output_configs.push(pg_config); - Ok(()) - }) - .collect::>()?; + } - let mut mutex = output_configs.lock(); - // PoisonError doesn't implement std::error::Error, can't `?` it. - let output_configs = mutex.as_mut().unwrap(); + Ok(output_configs) + })?; output_configs.sort_by(|a, b| { a.major_version() @@ -214,7 +227,7 @@ pub(crate) fn init_pgrx(pgrx: &Pgrx, init: &Init) -> eyre::Result<()> { } } - write_config(output_configs, init)?; + write_config(&output_configs, init)?; Ok(()) } @@ -247,13 +260,20 @@ fn download_postgres( } let mut buf = Vec::new(); let _count = http_response.into_reader().read_to_end(&mut buf)?; - let pgdir = untar(&buf, pgrx_home, pg_config)?; - configure_postgres(pg_config, &pgdir, init)?; - make_postgres(pg_config, &pgdir)?; - make_install_postgres(pg_config, &pgdir) // returns a new PgConfig object + let pgdir = untar(&buf, pgrx_home, pg_config, &init)?; + configure_postgres(pg_config, &pgdir, &init)?; + make_postgres(pg_config, &pgdir, &init)?; + make_install_postgres(pg_config, &pgdir, init) // returns a new PgConfig object } -fn untar(bytes: &[u8], pgrxdir: &PathBuf, pg_config: &PgConfig) -> eyre::Result { +fn untar( + bytes: &[u8], + pgrxdir: &PathBuf, + pg_config: &PgConfig, + init: &Init, +) -> eyre::Result { + let _token = init.jobserver.get().unwrap().acquire().unwrap(); + let mut unpackdir = pgrxdir.clone(); unpackdir.push(&format!("{}_unpack", pg_config.version()?)); if unpackdir.exists() { @@ -382,6 +402,8 @@ fn fixup_homebrew_for_icu(configure_cmd: &mut Command) { } fn configure_postgres(pg_config: &PgConfig, pgdir: &PathBuf, init: &Init) -> eyre::Result<()> { + let _token = init.jobserver.get().unwrap().acquire().unwrap(); + println!("{} Postgres v{}", " Configuring".bold().green(), pg_config.version()?); let mut configure_path = pgdir.clone(); configure_path.push("configure"); @@ -447,14 +469,11 @@ fn configure_postgres(pg_config: &PgConfig, pgdir: &PathBuf, init: &Init) -> eyr } } -fn make_postgres(pg_config: &PgConfig, pgdir: &PathBuf) -> eyre::Result<()> { - let num_cpus = 1.max(num_cpus::get() / 3); +fn make_postgres(pg_config: &PgConfig, pgdir: &PathBuf, init: &Init) -> eyre::Result<()> { println!("{} Postgres v{}", " Compiling".bold().green(), pg_config.version()?); let mut command = std::process::Command::new("make"); command - .arg("-j") - .arg(num_cpus.to_string()) .arg("world-bin") .stdout(std::process::Stdio::piped()) .stderr(std::process::Stdio::piped()) @@ -467,7 +486,7 @@ fn make_postgres(pg_config: &PgConfig, pgdir: &PathBuf) -> eyre::Result<()> { let command_str = format!("{:?}", command); tracing::debug!(command = %command_str, "Running"); - let child = command.spawn()?; + let child = init.jobserver.get().unwrap().configure_and_run(&mut command, |cmd| cmd.spawn())?; let output = child.wait_with_output()?; tracing::trace!(status_code = %output.status, command = %command_str, "Finished"); @@ -483,7 +502,11 @@ fn make_postgres(pg_config: &PgConfig, pgdir: &PathBuf) -> eyre::Result<()> { } } -fn make_install_postgres(version: &PgConfig, pgdir: &PathBuf) -> eyre::Result { +fn make_install_postgres( + version: &PgConfig, + pgdir: &PathBuf, + init: &Init, +) -> eyre::Result { println!( "{} Postgres v{} to {}", " Installing".bold().green(), @@ -504,7 +527,7 @@ fn make_install_postgres(version: &PgConfig, pgdir: &PathBuf) -> eyre::Result