From aafeee9a28c2a4872c0e4b2985d81e92511cdcd0 Mon Sep 17 00:00:00 2001 From: Michael Weiss Date: Thu, 22 Feb 2024 18:28:39 +0100 Subject: [PATCH] Reland: Cleanup the logic for "merging" package "patches" This is a reland of c88bad4 which I had to revert in 01f2ff7 since we had to downgrade the `config` crate (4c6f027). The reland is possible since we could upgrade the `config` crate to version 0.15 (9f05d6a) in the meantime. I cherry-picked c88bad4 but had to manually merge in the changes from 55920b5 and 97f0cee as well. See below for the original commit message: The old logic was pretty complex and only necessary to build the correct relative paths to the patch files (the paths in `pkg.toml` must be prepended with the relative path to the directory containing the `pkg.toml` file). Since the recently released version 0.14.0 of the `config` crate we can access the "origin" of a configuration value (`Value::origin()`) so we can use that information to avoid having to check if the `patches` have changed every time we merge another `pkg.toml` file. Unfortunately this does currently require a dedicated source implementation (`PkgTomlSource` but actually why not) since `config::File::from_str()` always sets the URI/origin to `None` and the new `set_patches_base_dir()` function is a bit of a hack... IMO the new code is much more readable, more efficient, and overall still cleaner though (most of the new code is for error handling and the custom `Source` implementation). [0]: https://github.com/mehcode/config-rs/blob/0.14.0/CHANGELOG.md#0140---2024-02-01 Signed-off-by: Michael Weiss (cherry picked from commit c88bad4fb30e2146691329ada0e4621d05b26512) --- src/package/package.rs | 42 ++++++++ src/repository/mod.rs | 1 + src/repository/pkg_toml_source.rs | 48 +++++++++ src/repository/repository.rs | 169 +++++++++++------------------- 4 files changed, 152 insertions(+), 108 deletions(-) create mode 100644 src/repository/pkg_toml_source.rs diff --git a/src/package/package.rs b/src/package/package.rs index 6e8db5fa..74d39411 100644 --- a/src/package/package.rs +++ b/src/package/package.rs @@ -9,8 +9,12 @@ // use std::collections::HashMap; +use std::path::Path; use std::path::PathBuf; +use anyhow::anyhow; +use anyhow::Context; +use anyhow::Result; use getset::Getters; use serde::Deserialize; use serde::Serialize; @@ -20,6 +24,7 @@ use crate::package::name::*; use crate::package::source::*; use crate::package::version::*; use crate::package::{Phase, PhaseName}; +use crate::repository::normalize_relative_path; use crate::util::docker::ImageName; use crate::util::EnvironmentVariableName; @@ -104,6 +109,43 @@ impl Package { format!("{} {}", self.name, self.version) } + // A function to prepend the path of the origin/base directory (where the `pkg.toml` file that + // defined the "patches" resides in) to the relative paths of the patches (it usually only + // makes sense to call this function once!): + pub fn set_patches_base_dir(&mut self, origin_dir: &Path, root_dir: &Path) -> Result<()> { + // origin_dir: The path to the directory of the pkg.toml file where the patches are declared + // root_dir: The root directory of the packages repository + for patch in self.patches.iter_mut() { + // Prepend the path of the directory of the `pkg.toml` file to the relative path of the + // patch file: + let mut path = origin_dir.join(patch.as_path()); + // Ensure that we use relative paths for the patches (the rest of the code that uses + // the patches doesn't work correctly with absolute paths): + if path.is_absolute() { + path = path + .strip_prefix(root_dir) + .map(|p| p.to_owned()) + .with_context(|| { + anyhow!( + "Cannot strip the prefix {} (root directory) form path {} (origin directory)", + root_dir.display(), + origin_dir.display() + ) + })?; + } + if path.is_absolute() { + // The stripping of root_dir in the previous if branch didn't work: + return Err(anyhow!( + "Bug: The path {} is absolute but it should be a relative path.", + path.display() + )); + } else { + *patch = normalize_relative_path(path)?; + } + } + Ok(()) + } + #[cfg(test)] pub fn set_dependencies(&mut self, dependencies: Dependencies) { self.dependencies = dependencies; diff --git a/src/repository/mod.rs b/src/repository/mod.rs index 5167a781..c7d9faeb 100644 --- a/src/repository/mod.rs +++ b/src/repository/mod.rs @@ -13,3 +13,4 @@ mod repository; pub use repository::*; mod fs; +mod pkg_toml_source; diff --git a/src/repository/pkg_toml_source.rs b/src/repository/pkg_toml_source.rs new file mode 100644 index 00000000..9f765dad --- /dev/null +++ b/src/repository/pkg_toml_source.rs @@ -0,0 +1,48 @@ +// Copyright (c) 2020-2024 science+computing ag and other contributors +// +// This program and the accompanying materials are made +// available under the terms of the Eclipse Public License 2.0 +// which is available at https://www.eclipse.org/legal/epl-2.0/ +// +// SPDX-License-Identifier: EPL-2.0 + +// A custom `Source` implementation for the `config` crate to tack the `pkg.toml` file path as URI/origin +// in addition to the content (basically a replacement for `config::File::from_str(str, format)`). + +use std::path::Path; + +use config::ConfigError; +use config::FileFormat; +use config::Format; +use config::Map; +use config::Source; +use config::Value; + +#[derive(Clone, Debug)] +pub struct PkgTomlSource { + content: String, + uri: String, +} + +impl PkgTomlSource { + pub fn new(path: &Path, content: String) -> Self { + // We could also use `path.to_str()` for proper error handling: + let path = path.to_string_lossy().to_string(); + PkgTomlSource { content, uri: path } + } +} + +impl Source for PkgTomlSource { + fn clone_into_box(&self) -> Box { + Box::new((*self).clone()) + } + + fn collect(&self) -> Result, ConfigError> { + FileFormat::Toml + .parse(Some(&self.uri), &self.content) + .map_err(|cause| ConfigError::FileParse { + uri: Some(self.uri.clone()), + cause, + }) + } +} diff --git a/src/repository/repository.rs b/src/repository/repository.rs index f6db4d52..a58d3dfd 100644 --- a/src/repository/repository.rs +++ b/src/repository/repository.rs @@ -18,9 +18,6 @@ use anyhow::Context; use anyhow::Error; use anyhow::Result; use regex::Regex; -use resiter::AndThen; -use resiter::FilterMap; -use resiter::Map; use tracing::trace; use crate::package::Package; @@ -41,7 +38,7 @@ impl From> for Repository { } // A helper function to normalize relative Unix paths (ensures that one cannot escape using `..`): -fn normalize_relative_path(path: PathBuf) -> Result { +pub fn normalize_relative_path(path: PathBuf) -> Result { let mut normalized_path = PathBuf::new(); for component in path.components() { match component { @@ -110,30 +107,6 @@ impl Repository { trace!("Loading files from filesystem"); let fsr = FileSystemRepresentation::load(path.to_path_buf())?; - // Helper function to extract the `patches` array from a package config/definition: - fn get_patches( - config: &config::ConfigBuilder, - path: &Path, - ) -> Result> { - // TODO: Avoid unnecessary config building (inefficient): - let config = config.build_cloned().context(anyhow!( - "Failed to load the following TOML file: {}", - path.display() - ))?; - match config.get_array("patches") { - Ok(v) => v - .into_iter() - .map(config::Value::into_string) - .map_err(Error::from) - .map_err(|e| e.context("patches must be strings")) - .map_err(Error::from) - .map_ok(PathBuf::from) - .collect(), - Err(config::ConfigError::NotFound(_)) => Ok(Vec::with_capacity(0)), - Err(e) => Err(Error::from(e)), - } - } - let cwd = std::env::current_dir()?; let leaf_files = fsr .files() @@ -145,92 +118,72 @@ impl Repository { Err(e) => Some(Err(e)), }); progress.set_length(leaf_files.clone().count().try_into()?); - leaf_files.inspect(|r| trace!("Loading files for {:?}", r)) + leaf_files + .inspect(|r| trace!("Loading files for {:?}", r)) .map(|path| { progress.inc(1); let path = path?; - fsr.get_files_for(path)? + let config = fsr.get_files_for(path)? .iter() + // Load all "layers": .inspect(|(path, _)| trace!("Loading layer at {}", path.display())) - .fold(Ok(Config::builder()) as Result<_>, |config, (path, content)| { - let mut config = config?; - - let patches_before_merge = get_patches(&config, path)?; - config = config.add_source(config::File::from_str(content, config::FileFormat::Toml)); - let patches_after_merge = get_patches(&config, path)?; - - // TODO: Get rid of the unnecessarily complex handling of the `patches` configuration setting: - // Ideally this would be handled by the `config` crate (this is - // already the case for all other "settings" but in this case we also need - // to prepend the corresponding directory path). - let patches = if patches_before_merge == patches_after_merge { - patches_before_merge - } else { - // The patches have changed since the `config.merge()` of the next - // `pkg.toml` file so we have to build the paths to the patch files - // by prepending the path to the directory of the `pkg.toml` file since - // `path` is only available in this "iteration". - patches_after_merge - .into_iter() - .map(|p| if let Some(current_dir) = path.parent() { - // Prepend the path of the directory of the `pkg.toml` file to - // the name of the patch file: - let mut path = current_dir.join(p); - // Ensure that we use relative paths for the patches (the rest - // of the code that uses the patches doesn't work correctly - // with absolute paths): - if path.is_absolute() { - // We assume that cwd is part of the prefix (currently, the - // path comes from `git2::Repository::workdir()` and should - // always be absolute and start from cwd so this is fine): - path = path - .strip_prefix(&cwd) - .map(|p| p.to_owned()) - .with_context(|| anyhow!("Cannot strip the prefix {} form path {}", cwd.display(), current_dir.display()))?; - } - if path.is_absolute() { - Err(anyhow!("The path {} is absolute but it should be a relative path.", path.display())) - } else { - normalize_relative_path(path) - } - } else { - Err(anyhow!("Path should point to path with parent, but doesn't: {}", path.display())) - }) - .inspect(|patch| trace!("Patch: {:?}", patch)) - // If the patch file exists, use it (as config::Value). - // Otherwise we have an error here, because we're referring to a non-existing file: - .and_then_ok(|patch| if patch.exists() { - Ok(Some(patch)) - } else { - Err(anyhow!("The following patch does not exist: {}", patch.display())) - }) - .filter_map_ok(|o| o) - .collect::>>() - .with_context(|| anyhow!("Could not process the patches declared here: {}", path.display()))? - }; - - trace!("Patches after postprocessing merge: {:?}", patches); - let patches = patches - .into_iter() - .map(|p| p.display().to_string()) - .map(config::Value::from) - .collect::>(); - { - // Update the `patches` configuration setting: - let mut builder = Config::builder(); - builder = builder.set_default("patches", config::Value::from(patches))?; - config = config.add_source(builder.build()?); - // Ideally we'd use `config.set()` but that is a permanent override (so - // subsequent `config.merge()` merges won't have an effect on - // "patches"). There's also `config.set_once()` but that only lasts - // until the next `config.merge()` and `config.set_default()` only sets - // a default value. - } - Ok(config) + .fold(Config::builder(), |config_builder, (path, content)| { + use crate::repository::pkg_toml_source::PkgTomlSource; + config_builder.add_source(PkgTomlSource::new(path, (*content).to_string())) }) - .and_then(|c| c.build()?.try_deserialize::().map_err(Error::from) - .with_context(|| anyhow!("Could not load package configuration: {}", path.display()))) - .map(|pkg| ((pkg.name().clone(), pkg.version().clone()), pkg)) + .build()?; + + let patches_value = config.get_array("patches"); + let mut pkg = config + .try_deserialize::() + .map_err(Error::from) + .with_context(|| { + anyhow!("Could not load package configuration: {}", path.display()) + })?; + + if !pkg.patches().is_empty() { + // We have to build the full relative paths to the patch files by + // prepending the path to the directory of the `pkg.toml` file they've + // been defined in so that they can be found later. + let patches = patches_value.context(anyhow!( + "Bug: Could not get the \"patches\" value for: {}", + path.display() + ))?; + let first_patch_value = patches.first().ok_or(anyhow!( + "Bug: Could not get the first \"patches\" entry for: {}", + path.display() + ))?; + // Get the origin (path to the `pkg.toml` file) for the "patches" + // setting (it must currently be the same for all array entries): + let origin_path = first_patch_value.origin().map(PathBuf::from).ok_or(anyhow!( + "Bug: Could not get the origin of the first \"patches\" entry for: {}", + path.display() + ))?; + // Note: `parent()` only "Returns None if the path terminates in a root + // or prefix, or if it’s the empty string." so this should never happen: + let origin_dir_path = origin_path.parent().ok_or(anyhow!( + "Bug: Could not get the origin's parent of the first \"patches\" entry for: {}", + path.display() + ))?; + pkg.set_patches_base_dir(origin_dir_path, &cwd) + .with_context(|| { + anyhow!("Could not set the base directory for the patches declared here: {}", path.display()) + })?; + // Check if the patches exist: + for patch in pkg.patches() { + if !patch.exists() { + return Err(anyhow!( + "The following patch does not exist: {}", + patch.display() + )) + .with_context(|| { + anyhow!("Could not process the patches declared here: {}", path.display()) + }); + } + } + } + + Ok(((pkg.name().clone(), pkg.version().clone()), pkg)) }) .collect::>>() .map(Repository::new)