From 9a0bedea45d107316d4a14d9d3b2ab2486c56ffd Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 17 Oct 2024 23:05:53 +0200 Subject: [PATCH 1/3] queue rebuilds for old releases --- ...7d80cc4be3282ca1b9c37c1b2e8c28dff53d0.json | 22 +++ ...57a4282b79c6c20859bb88a451afc3b1a404d.json | 35 ++++ src/bin/cratesfyi.rs | 6 + src/build_queue.rs | 187 +++++++++++++++++- src/config.rs | 7 + src/lib.rs | 2 +- src/utils/daemon.rs | 32 ++- 7 files changed, 284 insertions(+), 7 deletions(-) create mode 100644 .sqlx/query-007c5f49470ce1bc503f82003377d80cc4be3282ca1b9c37c1b2e8c28dff53d0.json create mode 100644 .sqlx/query-dd1b692e4dc6aaa210f53b1cf3f57a4282b79c6c20859bb88a451afc3b1a404d.json diff --git a/.sqlx/query-007c5f49470ce1bc503f82003377d80cc4be3282ca1b9c37c1b2e8c28dff53d0.json b/.sqlx/query-007c5f49470ce1bc503f82003377d80cc4be3282ca1b9c37c1b2e8c28dff53d0.json new file mode 100644 index 000000000..856d46f21 --- /dev/null +++ b/.sqlx/query-007c5f49470ce1bc503f82003377d80cc4be3282ca1b9c37c1b2e8c28dff53d0.json @@ -0,0 +1,22 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT COUNT(*) as \"count!\" FROM queue WHERE priority >= $1", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "count!", + "type_info": "Int8" + } + ], + "parameters": { + "Left": [ + "Int4" + ] + }, + "nullable": [ + null + ] + }, + "hash": "007c5f49470ce1bc503f82003377d80cc4be3282ca1b9c37c1b2e8c28dff53d0" +} diff --git a/.sqlx/query-dd1b692e4dc6aaa210f53b1cf3f57a4282b79c6c20859bb88a451afc3b1a404d.json b/.sqlx/query-dd1b692e4dc6aaa210f53b1cf3f57a4282b79c6c20859bb88a451afc3b1a404d.json new file mode 100644 index 000000000..1eede2b5e --- /dev/null +++ b/.sqlx/query-dd1b692e4dc6aaa210f53b1cf3f57a4282b79c6c20859bb88a451afc3b1a404d.json @@ -0,0 +1,35 @@ +{ + "db_name": "PostgreSQL", + "query": "SELECT i.* FROM (\n SELECT\n c.name,\n r.version,\n max(b.rustc_nightly_date) as rustc_nightly_date\n\n FROM crates AS c\n INNER JOIN releases AS r ON c.latest_version_id = r.id\n INNER JOIN builds AS b ON r.id = b.rid\n\n WHERE\n r.rustdoc_status = TRUE\n\n GROUP BY c.name, r.version\n ) as i\n WHERE i.rustc_nightly_date < $1\n ORDER BY i.rustc_nightly_date ASC\n LIMIT $2", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "name", + "type_info": "Text" + }, + { + "ordinal": 1, + "name": "version", + "type_info": "Text" + }, + { + "ordinal": 2, + "name": "rustc_nightly_date", + "type_info": "Date" + } + ], + "parameters": { + "Left": [ + "Date", + "Int8" + ] + }, + "nullable": [ + false, + false, + null + ] + }, + "hash": "dd1b692e4dc6aaa210f53b1cf3f57a4282b79c6c20859bb88a451afc3b1a404d" +} diff --git a/src/bin/cratesfyi.rs b/src/bin/cratesfyi.rs index ffceddafb..2e2bd56f3 100644 --- a/src/bin/cratesfyi.rs +++ b/src/bin/cratesfyi.rs @@ -155,6 +155,8 @@ enum CommandLine { repository_stats_updater: Toggle, #[arg(long = "cdn-invalidator", default_value = "enabled", value_enum)] cdn_invalidator: Toggle, + #[arg(long = "queue-rebuilds", default_value = "enabled", value_enum)] + queue_rebuilds: Toggle, }, StartBuildServer { @@ -192,6 +194,7 @@ impl CommandLine { metric_server_socket_addr, repository_stats_updater, cdn_invalidator, + queue_rebuilds, } => { if repository_stats_updater == Toggle::Enabled { docs_rs::utils::daemon::start_background_repository_stats_updater(&ctx)?; @@ -199,6 +202,9 @@ impl CommandLine { if cdn_invalidator == Toggle::Enabled { docs_rs::utils::daemon::start_background_cdn_invalidator(&ctx)?; } + if queue_rebuilds == Toggle::Enabled { + docs_rs::utils::daemon::start_background_queue_rebuild(&ctx)?; + } start_background_metrics_webserver(Some(metric_server_socket_addr), &ctx)?; diff --git a/src/build_queue.rs b/src/build_queue.rs index 43e04ab3d..925e2348d 100644 --- a/src/build_queue.rs +++ b/src/build_queue.rs @@ -8,15 +8,13 @@ use crate::{cdn, BuildPackageSummary}; use crate::{Config, Index, InstanceMetrics, RustwideBuilder}; use anyhow::Context as _; use fn_error_context::context; -use futures_util::stream::TryStreamExt; +use futures_util::{stream::TryStreamExt, StreamExt}; use sqlx::Connection as _; use std::collections::HashMap; use std::sync::Arc; use tokio::runtime::Runtime; -use tracing::{debug, error, info}; +use tracing::{debug, error, info, instrument}; -// Threshold priority to decide whether a crate will in the rebuild-queue-list. -// If crate is in the rebuild-queue-list it won't in the build-queue-list. pub(crate) const REBUILD_PRIORITY: i32 = 20; #[derive(Debug, Clone, Eq, PartialEq, serde::Serialize)] @@ -656,12 +654,191 @@ impl BuildQueue { } } +/// Queue rebuilds as configured. +/// +/// The idea is to rebuild: +/// * the latest release of each crate +/// * when the nightly version is older than our configured threshold +/// * and there was a successful build for that release, that included documentation. +/// * starting with the oldest nightly versions. +/// * also checking if there is already a build queued. +/// +/// This might exclude releases from rebuilds that +/// * previously failed but would succeed with a newer nightly version +/// * previously failed but would succeed just with a retry. +#[instrument(skip_all)] +pub async fn queue_rebuilds( + conn: &mut sqlx::PgConnection, + config: &Config, + build_queue: &AsyncBuildQueue, +) -> Result<()> { + let already_queued_rebuilds = sqlx::query_scalar!( + r#"SELECT COUNT(*) as "count!" FROM queue WHERE priority >= $1"#, + REBUILD_PRIORITY + ) + .fetch_one(&mut *conn) + .await?; + + let rebuilds_to_queue = config + .max_queued_rebuilds + .expect("config.max_queued_rebuilds not set") as i64 + - already_queued_rebuilds; + + if rebuilds_to_queue <= 0 { + info!("not queueing rebuilds; queue limit reached"); + return Ok(()); + } + + let mut results = sqlx::query!( + "SELECT i.* FROM ( + SELECT + c.name, + r.version, + max(b.rustc_nightly_date) as rustc_nightly_date + + FROM crates AS c + INNER JOIN releases AS r ON c.latest_version_id = r.id + INNER JOIN builds AS b ON r.id = b.rid + + WHERE + r.rustdoc_status = TRUE + + GROUP BY c.name, r.version + ) as i + WHERE i.rustc_nightly_date < $1 + ORDER BY i.rustc_nightly_date ASC + LIMIT $2", + config + .rebuild_up_to_date + .expect("config.rebuild_up_to_date not set"), + rebuilds_to_queue, + ) + .fetch(&mut *conn); + + while let Some(row) = results.next().await { + let row = row?; + + if !build_queue + .has_build_queued(&row.name, &row.version) + .await? + { + info!("queueing rebuild for {} {}...", &row.name, &row.version); + build_queue + .add_crate(&row.name, &row.version, REBUILD_PRIORITY, None) + .await?; + } + } + + Ok(()) +} + #[cfg(test)] mod tests { + use crate::test::FakeBuild; + use super::*; - use chrono::Utc; + use chrono::{NaiveDate, Utc}; use std::time::Duration; + #[test] + fn test_dont_rebuild_when_new() { + crate::test::async_wrapper(|env| async move { + env.override_config(|config| { + config.max_queued_rebuilds = Some(100); + config.rebuild_up_to_date = Some(NaiveDate::from_ymd_opt(2020, 1, 1).unwrap()); + }); + + env.async_fake_release() + .await + .name("foo") + .version("0.1.0") + .builds(vec![FakeBuild::default() + .rustc_version("rustc 1.84.0-nightly (e7c0d2750 2020-10-15)")]) + .create_async() + .await?; + + let build_queue = env.async_build_queue().await; + assert!(build_queue.queued_crates().await?.is_empty()); + + let mut conn = env.async_db().await.async_conn().await; + queue_rebuilds(&mut conn, &env.config(), &build_queue).await?; + + assert!(build_queue.queued_crates().await?.is_empty()); + + Ok(()) + }) + } + + #[test] + fn test_rebuild_when_old() { + crate::test::async_wrapper(|env| async move { + env.override_config(|config| { + config.max_queued_rebuilds = Some(100); + config.rebuild_up_to_date = Some(NaiveDate::from_ymd_opt(2024, 1, 1).unwrap()); + }); + + env.async_fake_release() + .await + .name("foo") + .version("0.1.0") + .builds(vec![FakeBuild::default() + .rustc_version("rustc 1.84.0-nightly (e7c0d2750 2020-10-15)")]) + .create_async() + .await?; + + let build_queue = env.async_build_queue().await; + assert!(build_queue.queued_crates().await?.is_empty()); + + let mut conn = env.async_db().await.async_conn().await; + queue_rebuilds(&mut conn, &env.config(), &build_queue).await?; + + let queue = build_queue.queued_crates().await?; + assert_eq!(queue.len(), 1); + assert_eq!(queue[0].name, "foo"); + assert_eq!(queue[0].version, "0.1.0"); + assert_eq!(queue[0].priority, REBUILD_PRIORITY); + + Ok(()) + }) + } + + #[test] + fn test_dont_rebuild_when_full() { + crate::test::async_wrapper(|env| async move { + env.override_config(|config| { + config.max_queued_rebuilds = Some(1); + config.rebuild_up_to_date = Some(NaiveDate::from_ymd_opt(2024, 1, 1).unwrap()); + }); + + let build_queue = env.async_build_queue().await; + build_queue + .add_crate("foo1", "0.1.0", REBUILD_PRIORITY, None) + .await?; + build_queue + .add_crate("foo2", "0.1.0", REBUILD_PRIORITY, None) + .await?; + + env.async_fake_release() + .await + .name("foo") + .version("0.1.0") + .builds(vec![FakeBuild::default() + .rustc_version("rustc 1.84.0-nightly (e7c0d2750 2020-10-15)")]) + .create_async() + .await?; + + let build_queue = env.async_build_queue().await; + assert_eq!(build_queue.queued_crates().await?.len(), 2); + + let mut conn = env.async_db().await.async_conn().await; + queue_rebuilds(&mut conn, &env.config(), &build_queue).await?; + + assert_eq!(build_queue.queued_crates().await?.len(), 2); + + Ok(()) + }) + } + #[test] fn test_add_duplicate_doesnt_fail_last_priority_wins() { crate::test::async_wrapper(|env| async move { diff --git a/src/config.rs b/src/config.rs index 60be0b2cb..1ef9f64d3 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,5 +1,6 @@ use crate::{cdn::CdnKind, storage::StorageKind}; use anyhow::{anyhow, bail, Context, Result}; +use chrono::NaiveDate; use std::{env::VarError, error::Error, path::PathBuf, str::FromStr, time::Duration}; use tracing::trace; use url::Url; @@ -113,6 +114,10 @@ pub struct Config { pub(crate) build_default_memory_limit: Option, pub(crate) include_default_targets: bool, pub(crate) disable_memory_limit: bool, + + // automatic rebuild configuration + pub(crate) max_queued_rebuilds: Option, + pub(crate) rebuild_up_to_date: Option, } impl Config { @@ -230,6 +235,8 @@ impl Config { "DOCSRS_BUILD_WORKSPACE_REINITIALIZATION_INTERVAL", 86400, )?), + max_queued_rebuilds: maybe_env("DOCSRS_MAX_QUEUED_REBUILDS")?, + rebuild_up_to_date: maybe_env("DOCSRS_REBUILD_UP_TO_DATE")?, }) } } diff --git a/src/lib.rs b/src/lib.rs index 6eaae2746..ff2a039bb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,7 +2,7 @@ //! documentation of crates for the Rust Programming Language. #![allow(clippy::cognitive_complexity)] -pub use self::build_queue::{AsyncBuildQueue, BuildQueue}; +pub use self::build_queue::{queue_rebuilds, AsyncBuildQueue, BuildQueue}; pub use self::config::Config; pub use self::context::Context; pub use self::docbuilder::PackageKind; diff --git a/src/utils/daemon.rs b/src/utils/daemon.rs index 94789245c..23f84ba5d 100644 --- a/src/utils/daemon.rs +++ b/src/utils/daemon.rs @@ -3,7 +3,7 @@ //! This daemon will start web server, track new packages and build them use crate::{ - cdn, + cdn, queue_rebuilds, utils::{queue_builder, report_error}, web::start_web_server, AsyncBuildQueue, Config, Context, Index, RustwideBuilder, @@ -91,6 +91,35 @@ pub fn start_background_repository_stats_updater(context: &dyn Context) -> Resul Ok(()) } +pub fn start_background_queue_rebuild(context: &dyn Context) -> Result<(), Error> { + let runtime = context.runtime()?; + let pool = context.pool()?; + let config = context.config()?; + let build_queue = runtime.block_on(context.async_build_queue())?; + + if config.max_queued_rebuilds.is_none() || config.rebuild_up_to_date.is_none() { + info!("rebuild config incomplete, skipping rebuild queueing"); + return Ok(()); + } + + async_cron( + &runtime, + "background queue rebuilder", + Duration::from_secs(60 * 60), + move || { + let pool = pool.clone(); + let build_queue = build_queue.clone(); + let config = config.clone(); + async move { + let mut conn = pool.get_async().await?; + queue_rebuilds(&mut conn, &config, &build_queue).await?; + Ok(()) + } + }, + ); + Ok(()) +} + pub fn start_background_cdn_invalidator(context: &dyn Context) -> Result<(), Error> { let metrics = context.instance_metrics()?; let config = context.config()?; @@ -183,6 +212,7 @@ pub fn start_daemon( start_background_repository_stats_updater(&*context)?; start_background_cdn_invalidator(&*context)?; + start_background_queue_rebuild(&*context)?; // NOTE: if a error occurred earlier in `start_daemon`, the server will _not_ be joined - // instead it will get killed when the process exits. From 60ef24d6d6af5d166f5429c866196182efcb5ecd Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Thu, 24 Oct 2024 04:35:59 +0200 Subject: [PATCH 2/3] add comment about static rebuild priority --- src/build_queue.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/build_queue.rs b/src/build_queue.rs index 925e2348d..311055ece 100644 --- a/src/build_queue.rs +++ b/src/build_queue.rs @@ -15,6 +15,10 @@ use std::sync::Arc; use tokio::runtime::Runtime; use tracing::{debug, error, info, instrument}; +/// The static prioriry for background rebuilds. +/// Used when queueing rebuilds, and when rendering them +/// collapsed in the UI. +/// For Normal build priorities we use smaller values. pub(crate) const REBUILD_PRIORITY: i32 = 20; #[derive(Debug, Clone, Eq, PartialEq, serde::Serialize)] From 7676275bfd1ba0b22270c6db8549477c4ff960a4 Mon Sep 17 00:00:00 2001 From: Denis Cornehl Date: Fri, 25 Oct 2024 05:25:57 +0200 Subject: [PATCH 3/3] fix typos in comments --- src/build_queue.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/build_queue.rs b/src/build_queue.rs index 311055ece..4d2319a4e 100644 --- a/src/build_queue.rs +++ b/src/build_queue.rs @@ -15,10 +15,10 @@ use std::sync::Arc; use tokio::runtime::Runtime; use tracing::{debug, error, info, instrument}; -/// The static prioriry for background rebuilds. +/// The static priority for background rebuilds. /// Used when queueing rebuilds, and when rendering them /// collapsed in the UI. -/// For Normal build priorities we use smaller values. +/// For normal build priorities we use smaller values. pub(crate) const REBUILD_PRIORITY: i32 = 20; #[derive(Debug, Clone, Eq, PartialEq, serde::Serialize)]