From 659d8baf2918da4fd5096e40fe5b9cbc06c1db88 Mon Sep 17 00:00:00 2001 From: Wolf Vollprecht Date: Thu, 23 May 2024 11:34:44 +0200 Subject: [PATCH] feat: accept multiple urls for url source and use them as mirrors (#840) --- examples/cargo-edit/recipe.yaml | 5 +- examples/rich/recipe.yaml | 4 +- src/packaging/metadata.rs | 1 + src/recipe/parser/source.rs | 18 ++- ...recipe__parser__tests__recipe_windows.snap | 30 ++-- ...d__recipe__parser__tests__unix_recipe.snap | 30 ++-- src/source/mod.rs | 8 +- src/source/url_source.rs | 151 ++++++++++-------- test-data/recipes/correct-sha/recipe.yaml | 4 +- 9 files changed, 139 insertions(+), 112 deletions(-) diff --git a/examples/cargo-edit/recipe.yaml b/examples/cargo-edit/recipe.yaml index 89f646d58..fe14525bc 100644 --- a/examples/cargo-edit/recipe.yaml +++ b/examples/cargo-edit/recipe.yaml @@ -8,7 +8,10 @@ package: version: ${{ version }} source: - url: https://github.com/killercup/cargo-edit/archive/refs/tags/v${{ version }}.tar.gz + # url: + # - https://github.com/killercup/cargo-edit/archive/refs/tags/v${{ version }}.tar.gz + # - https://foo.com/bla.tar.gz + # url: https://github.com/killercup/cargo-edit/archive/refs/tags/v${{ version }}.tar.gz sha256: 46670295e2323fc2f826750cdcfb2692fbdbea87122fe530a07c50c8dba1d3d7 build: diff --git a/examples/rich/recipe.yaml b/examples/rich/recipe.yaml index f7d43f0d8..8262d6d1a 100644 --- a/examples/rich/recipe.yaml +++ b/examples/rich/recipe.yaml @@ -8,7 +8,9 @@ package: version: ${{ version }} source: - - url: https://pypi.io/packages/source/r/rich/rich-${{ version }}.tar.gz + - url: + - https://example.com/rich-${{ version }}.tar.gz # this will give a 404! + - https://pypi.io/packages/source/r/rich/rich-${{ version }}.tar.gz sha256: d653d6bccede5844304c605d5aac802c7cf9621efd700b46c7ec2b51ea914898 build: diff --git a/src/packaging/metadata.rs b/src/packaging/metadata.rs index 77463e6e5..6e6930327 100644 --- a/src/packaging/metadata.rs +++ b/src/packaging/metadata.rs @@ -81,6 +81,7 @@ fn contains_prefix_text( // will break either way as C:/ can't be converted // to something meaningful in unix either way let forward_slash: Cow<'_, str> = to_forward_slash_lossy(prefix); + let contains_prefix = memchr::memmem::find_iter(mmap.as_ref(), forward_slash.deref()) .next() .is_some(); diff --git a/src/recipe/parser/source.rs b/src/recipe/parser/source.rs index 5c0cc9702..82148268e 100644 --- a/src/recipe/parser/source.rs +++ b/src/recipe/parser/source.rs @@ -4,7 +4,7 @@ use std::{fmt, path::PathBuf, str::FromStr}; use rattler_digest::{serde::SerializableHash, Md5, Md5Hash, Sha256, Sha256Hash}; use serde::{Deserialize, Serialize}; -use serde_with::serde_as; +use serde_with::{formats::PreferOne, serde_as, OneOrMany}; use url::Url; use crate::{ @@ -71,7 +71,8 @@ impl TryConvertNode> for RenderedNode { return Err(vec![_partialerror!( *self.span(), ErrorKind::Other, - label = "unknown source type" + label = "unknown source type (no `url`, `path` or `git` found)", + help = "are you missing `url`, `path` or `git`?" )]); } } @@ -384,7 +385,8 @@ impl fmt::Display for GitUrl { #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct UrlSource { /// Url to the source code (usually a tar.gz or tar.bz2 etc. file) - url: Url, + #[serde_as(as = "OneOrMany<_, PreferOne>")] + url: Vec, /// Optionally a sha256 checksum to verify the downloaded file #[serde(skip_serializing_if = "Option::is_none")] @@ -409,8 +411,8 @@ pub struct UrlSource { impl UrlSource { /// Get the url. - pub const fn url(&self) -> &Url { - &self.url + pub fn urls(&self) -> &[Url] { + self.url.as_slice() } /// Get the SHA256 checksum of the URL source. @@ -441,7 +443,7 @@ impl UrlSource { impl TryConvertNode for RenderedMappingNode { fn try_convert(&self, _name: &str) -> Result> { - let mut url = None; + let mut urls = None; let mut sha256 = None; let mut md5 = None; let mut patches = Vec::new(); @@ -450,7 +452,7 @@ impl TryConvertNode for RenderedMappingNode { self.iter().map(|(key, value)| { match key.as_str() { - "url" => url = value.try_convert(key)?, + "url" => urls = value.try_convert(key)?, "sha256" => { let sha256_str: RenderedScalarNode = value.try_convert(key)?; let sha256_out = rattler_digest::parse_digest_from_hex::(sha256_str.as_str()).ok_or_else(|| vec![_partialerror!(*sha256_str.span(), ErrorKind::InvalidSha256)])?; @@ -475,7 +477,7 @@ impl TryConvertNode for RenderedMappingNode { Ok(()) }).flatten_errors()?; - let url = url.ok_or_else(|| { + let url = urls.ok_or_else(|| { vec![_partialerror!( *self.span(), ErrorKind::MissingField("url".into()), diff --git a/src/recipe/snapshots/rattler_build__recipe__parser__tests__recipe_windows.snap b/src/recipe/snapshots/rattler_build__recipe__parser__tests__recipe_windows.snap index dda65856e..30e51a9f5 100644 --- a/src/recipe/snapshots/rattler_build__recipe__parser__tests__recipe_windows.snap +++ b/src/recipe/snapshots/rattler_build__recipe__parser__tests__recipe_windows.snap @@ -14,21 +14,23 @@ Recipe { source: [ Url( UrlSource { - url: Url { - scheme: "https", - cannot_be_a_base: false, - username: "", - password: None, - host: Some( - Domain( - "github.com", + url: [ + Url { + scheme: "https", + cannot_be_a_base: false, + username: "", + password: None, + host: Some( + Domain( + "github.com", + ), ), - ), - port: None, - path: "/xtensor-stack/xtensor/archive/0.24.6.tar.gz", - query: None, - fragment: None, - }, + port: None, + path: "/xtensor-stack/xtensor/archive/0.24.6.tar.gz", + query: None, + fragment: None, + }, + ], sha256: Some( [ 248, diff --git a/src/recipe/snapshots/rattler_build__recipe__parser__tests__unix_recipe.snap b/src/recipe/snapshots/rattler_build__recipe__parser__tests__unix_recipe.snap index 2ccede476..26c2d04f9 100644 --- a/src/recipe/snapshots/rattler_build__recipe__parser__tests__unix_recipe.snap +++ b/src/recipe/snapshots/rattler_build__recipe__parser__tests__unix_recipe.snap @@ -14,21 +14,23 @@ Recipe { source: [ Url( UrlSource { - url: Url { - scheme: "https", - cannot_be_a_base: false, - username: "", - password: None, - host: Some( - Domain( - "github.com", + url: [ + Url { + scheme: "https", + cannot_be_a_base: false, + username: "", + password: None, + host: Some( + Domain( + "github.com", + ), ), - ), - port: None, - path: "/xtensor-stack/xtensor/archive/0.24.6.tar.gz", - query: None, - fragment: None, - }, + port: None, + path: "/xtensor-stack/xtensor/archive/0.24.6.tar.gz", + query: None, + fragment: None, + }, + ], sha256: Some( [ 248, diff --git a/src/source/mod.rs b/src/source/mod.rs index 909409404..ba522333f 100644 --- a/src/source/mod.rs +++ b/src/source/mod.rs @@ -150,13 +150,11 @@ pub async fn fetch_sources( } } Source::Url(src) => { - tracing::info!("Fetching source from URL: {}", src.url()); - - let file_name_from_url = src - .url() + let first_url = src.urls().first().expect("we should have at least one URL"); + let file_name_from_url = first_url .path_segments() .and_then(|segments| segments.last().map(|last| last.to_string())) - .ok_or_else(|| SourceError::UrlNotFile(src.url().clone()))?; + .ok_or_else(|| SourceError::UrlNotFile(first_url.clone()))?; let res = url_source::url_src(src, &cache_src, tool_configuration).await?; let mut dest_dir = if let Some(target_directory) = src.target_directory() { diff --git a/src/source/url_source.rs b/src/source/url_source.rs index 8b36cb395..ebbef3c7b 100644 --- a/src/source/url_source.rs +++ b/src/source/url_source.rs @@ -50,88 +50,103 @@ pub(crate) async fn url_src( ) -> Result { // convert sha256 or md5 to Checksum let checksum = Checksum::from_url_source(source).ok_or_else(|| { - SourceError::NoChecksum(format!("No checksum found for url: {}", source.url())) + SourceError::NoChecksum(format!("No checksum found for url(s): {:?}", source.urls())) })?; - if source.url().scheme() == "file" { - let local_path = source.url().to_file_path().map_err(|_| { - SourceError::Io(std::io::Error::new( - std::io::ErrorKind::Other, - "Invalid local file path", - )) - })?; - - if !local_path.is_file() { - return Err(SourceError::FileNotFound(local_path)); + let mut last_error = None; + for url in source.urls() { + if url.scheme() == "file" { + let local_path = url.to_file_path().map_err(|_| { + SourceError::Io(std::io::Error::new( + std::io::ErrorKind::Other, + "Invalid local file path", + )) + })?; + + if !local_path.is_file() { + return Err(SourceError::FileNotFound(local_path)); + } + + if !checksum.validate(&local_path) { + return Err(SourceError::ValidationFailed); + } + + tracing::info!("Using local source file."); + return Ok(local_path); } - if !checksum.validate(&local_path) { - return Err(SourceError::ValidationFailed); + let cache_name = PathBuf::from(cache_name_from_url(url, &checksum).ok_or( + SourceError::UnknownErrorStr("Failed to build cache name from url"), + )?); + let cache_name = cache_dir.join(cache_name); + + let metadata = fs::metadata(&cache_name); + if metadata.is_ok() && metadata?.is_file() && checksum.validate(&cache_name) { + tracing::info!("Found valid source cache file."); + return Ok(cache_name.clone()); } - tracing::info!("Using local source file."); - return Ok(local_path); - } + let client = reqwest::Client::new(); + let download_size = { + let resp = client.head(url.as_str()).send().await?; + if resp.status().is_success() { + resp.headers() + .get(reqwest::header::CONTENT_LENGTH) + .and_then(|ct_len| ct_len.to_str().ok()) + .and_then(|ct_len| ct_len.parse().ok()) + .unwrap_or(0) + } else { + tracing::warn!( + "Could not download file from: {}. Error {}", + url, + resp.status() + ); + last_error = Some(resp.error_for_status()); + continue; + } + }; + + let progress_bar = tool_configuration.fancy_log_handler.add_progress_bar( + indicatif::ProgressBar::new(download_size) + .with_prefix("Downloading") + .with_style(tool_configuration.fancy_log_handler.default_bytes_style()), + ); + progress_bar.set_message( + url.path_segments() + .and_then(|segs| segs.last()) + .map(str::to_string) + .unwrap_or_else(|| "Unknown File".to_string()), + ); + let mut file = tokio::fs::File::create(&cache_name).await?; + + let request = client.get(url.clone()); + let mut download = request.send().await?; + + while let Some(chunk) = download.chunk().await? { + progress_bar.inc(chunk.len() as u64); + file.write_all(&chunk).await?; + } - let cache_name = PathBuf::from(cache_name_from_url(source.url(), &checksum).ok_or( - SourceError::UnknownErrorStr("Failed to build cache name from url"), - )?); - let cache_name = cache_dir.join(cache_name); + progress_bar.finish(); - let metadata = fs::metadata(&cache_name); - if metadata.is_ok() && metadata?.is_file() && checksum.validate(&cache_name) { - tracing::info!("Found valid source cache file."); - return Ok(cache_name.clone()); - } + file.flush().await?; - let client = reqwest::Client::new(); - let download_size = { - let resp = client.head(source.url().as_str()).send().await?; - if resp.status().is_success() { - resp.headers() - .get(reqwest::header::CONTENT_LENGTH) - .and_then(|ct_len| ct_len.to_str().ok()) - .and_then(|ct_len| ct_len.parse().ok()) - .unwrap_or(0) - } else { - return Err(SourceError::UrlNotFile(source.url().clone())); + if !checksum.validate(&cache_name) { + tracing::error!("Checksum validation failed!"); + fs::remove_file(&cache_name)?; + return Err(SourceError::ValidationFailed); } - }; - let progress_bar = tool_configuration.fancy_log_handler.add_progress_bar( - indicatif::ProgressBar::new(download_size) - .with_prefix("Downloading") - .with_style(tool_configuration.fancy_log_handler.default_bytes_style()), - ); - progress_bar.set_message( - source - .url() - .path_segments() - .and_then(|segs| segs.last()) - .map(str::to_string) - .unwrap_or_else(|| "Unknown File".to_string()), - ); - let mut file = tokio::fs::File::create(&cache_name).await?; - - let request = client.get(source.url().as_str()); - let mut download = request.send().await?; - - while let Some(chunk) = download.chunk().await? { - progress_bar.inc(chunk.len() as u64); - file.write_all(&chunk).await?; + return Ok(cache_name); } - progress_bar.finish(); - - file.flush().await?; - - if !checksum.validate(&cache_name) { - tracing::error!("Checksum validation failed!"); - fs::remove_file(&cache_name)?; - return Err(SourceError::ValidationFailed); + if let Some(Err(last_error)) = last_error { + Err(SourceError::Url(last_error)) + } else { + Err(SourceError::UnknownError( + "Could not download any file".to_string(), + )) } - - Ok(cache_name) } #[cfg(test)] diff --git a/test-data/recipes/correct-sha/recipe.yaml b/test-data/recipes/correct-sha/recipe.yaml index e4010d388..10cd897cf 100644 --- a/test-data/recipes/correct-sha/recipe.yaml +++ b/test-data/recipes/correct-sha/recipe.yaml @@ -6,7 +6,9 @@ package: name: test-package version: ${{ version }} source: - - url: https://github.com/microsoft/mssql-jdbc/raw/v12.2.0/LICENSE + - url: + - https://idontexist.com/microsoft/mssql-jdbc/raw/v12.2.0/LICENSE + - https://github.com/microsoft/mssql-jdbc/raw/v12.2.0/LICENSE sha256: 96783d7ed4ff39e20c9ae6642ac6de02692ec8909e9910700e3694d68211ee97 file_name: mssql-jdbc-license.txt