diff --git a/Cargo.lock b/Cargo.lock index 0693d5adb..09f615c91 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3376,6 +3376,29 @@ dependencies = [ "thiserror 1.0.69", ] +[[package]] +name = "nickel-lang-package" +version = "0.1.0" +dependencies = [ + "anyhow", + "directories", + "gix", + "gix-hash", + "insta", + "nickel-lang-core", + "nickel-lang-git", + "nickel-lang-utils", + "pubgrub", + "regex", + "semver", + "serde", + "serde_json", + "serde_with", + "tempfile", + "test-generator", + "thiserror 1.0.69", +] + [[package]] name = "nickel-lang-utils" version = "0.1.0" @@ -3772,6 +3795,17 @@ dependencies = [ "parking_lot", ] +[[package]] +name = "pubgrub" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdd14552ad5f5d743a323c10d576f26822a044355d6601f377d813ece46f38fd" +dependencies = [ + "rustc-hash 1.1.0", + "serde", + "thiserror 1.0.69", +] + [[package]] name = "pyckel" version = "1.9.0" @@ -3886,7 +3920,7 @@ dependencies = [ "pin-project-lite", "quinn-proto", "quinn-udp", - "rustc-hash", + "rustc-hash 2.0.0", "rustls", "socket2", "thiserror 2.0.3", @@ -3904,7 +3938,7 @@ dependencies = [ "getrandom", "rand", "ring", - "rustc-hash", + "rustc-hash 2.0.0", "rustls", "rustls-pki-types", "slab", @@ -4155,6 +4189,12 @@ version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + [[package]] name = "rustc-hash" version = "2.0.0" @@ -4301,9 +4341,12 @@ checksum = "a3cf7c11c38cb994f3d40e8a8cde3bbd1f72a435e4c49e85d6553d8312306152" [[package]] name = "semver" -version = "1.0.22" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92d43fe69e652f3df9bdc2b85b2854a0825b86e4fb76bc44d945137d053639ca" +checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" +dependencies = [ + "serde", +] [[package]] name = "serde" diff --git a/Cargo.toml b/Cargo.toml index 433cecb17..78507ca05 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,7 @@ members = [ "vector", "lsp/nls", "lsp/lsp-harness", + "package", "utils", "wasm-repl", "pyckel", @@ -25,6 +26,7 @@ readme = "README.md" [workspace.dependencies] nickel-lang-core = { version = "0.10.0", path = "./core", default-features = false } nickel-lang-git = { version = "0.1.0", path = "./git" } +nickel-lang-package = { version = "0.1.0", path = "./package" } nickel-lang-vector = { version = "0.1.0", path = "./vector" } nickel-lang-utils = { version = "0.1.0", path = "./utils" } lsp-harness = { version = "0.1.0", path = "./lsp/lsp-harness" } @@ -58,6 +60,7 @@ directories = "4.0.1" env_logger = "0.10" git-version = "0.3.5" gix = "0.67.0" +gix-hash = "0.15.0" indexmap = "1.9.3" indoc = "2" insta = "1.29.0" diff --git a/package/Cargo.toml b/package/Cargo.toml new file mode 100644 index 000000000..6cf42b74c --- /dev/null +++ b/package/Cargo.toml @@ -0,0 +1,35 @@ +[package] +name = "nickel-lang-package" +description = "The Nickel Package Manager (npm)" +version = "0.1.0" + +authors.workspace = true +edition.workspace = true +homepage.workspace = true +keywords.workspace = true +license.workspace = true +repository.workspace = true +readme.workspace = true + +[features] + +[dependencies] +anyhow.workspace = true +directories.workspace = true +gix.workspace = true +gix-hash = { workspace = true, features = ["serde"] } +nickel-lang-core = { workspace = true, default-features = false } +nickel-lang-git.workspace = true +pubgrub = { version = "0.2.1", features = ["serde"] } +regex.workspace = true +semver = { version = "1.0.23", features = ["serde"] } +serde.workspace = true +serde_json.workspace = true +serde_with.workspace = true +tempfile = { workspace = true } +thiserror.workspace = true + +[dev-dependencies] +insta = { workspace = true, features = ["filters"] } +nickel-lang-utils.workspace = true +test-generator.workspace = true diff --git a/package/src/config.rs b/package/src/config.rs new file mode 100644 index 000000000..7ca53a647 --- /dev/null +++ b/package/src/config.rs @@ -0,0 +1,66 @@ +use std::path::PathBuf; + +use directories::ProjectDirs; +use std::collections::HashMap; + +const DEFAULT_INDEX_URL: &str = "https://github.com/nickel-lang/nickel-mine.git"; + +/// Global configuration for the package manager. +#[derive(Clone, Debug)] +pub struct Config { + pub index_url: gix::Url, + + pub cache_dir: PathBuf, + + /// Defaults to `/index` + pub index_dir: PathBuf, + /// Defaults to `/index-packages` + pub index_package_dir: PathBuf, + /// Defaults to `/git-packages` + pub git_package_dir: PathBuf, + + /// Git source replacements: any git packages that we're supposed to + /// fetch from the original source will be transparently fetched from the + /// replacement source instead. The lock-file will not see this replacement; + /// it's intended for vendoring or mirroring, not changing the contents of + /// the package. + pub git_replacements: HashMap, + // TODO: index replacments (and private indices) +} + +impl Default for Config { + fn default() -> Self { + // unwrap: TODO + let cache_dir = ProjectDirs::from("org", "nickel-lang", "nickel") + .unwrap() + .cache_dir() + .to_owned(); + Self { + // unwrap: it's a constant, and we know it's a valid url. + index_url: DEFAULT_INDEX_URL.try_into().unwrap(), + index_dir: PathBuf::default(), + index_package_dir: PathBuf::default(), + git_package_dir: PathBuf::default(), + cache_dir: PathBuf::default(), + git_replacements: HashMap::default(), + } + .with_cache_dir(cache_dir) + } +} + +impl Config { + /// Configures the root cache directory, and reconfigures the various derived paths + /// based on the new root cache directory. + pub fn with_cache_dir(self, cache_dir: PathBuf) -> Self { + Self { + index_dir: cache_dir.join("index"), + index_package_dir: cache_dir.join("index-packages"), + git_package_dir: cache_dir.join("git-packages"), + ..self + } + } + + pub fn with_index_dir(self, index_dir: PathBuf) -> Self { + Self { index_dir, ..self } + } +} diff --git a/package/src/error.rs b/package/src/error.rs new file mode 100644 index 000000000..074204497 --- /dev/null +++ b/package/src/error.rs @@ -0,0 +1,217 @@ +use std::path::{Path, PathBuf}; + +use gix::ObjectId; +use nickel_lang_core::{eval::cache::CacheImpl, identifier::Ident, program::Program}; + +use crate::{ + index::{self}, + version::SemVer, + UnversionedPackage, +}; + +// TODO: implement IntoDiagnostic. +pub enum Error { + Io { + path: Option, + error: std::io::Error, + }, + Serialize { + error: serde_json::Error, + }, + ManifestEval { + package: Option, + program: Program, + error: nickel_lang_core::error::Error, + }, + NoPackageRoot { + path: PathBuf, + }, + RestrictedPath { + /// The url of the git package that tried the bad import. + package_url: Box, + /// The git id of the bad package. + package_commit: ObjectId, + /// The relative path of the bad package within its git repo. + package_path: PathBuf, + attempted: PathBuf, + restriction: PathBuf, + }, + /// There was some error interacting with a git repository. + Git(nickel_lang_git::Error), + InvalidUrl { + url: String, + msg: String, + }, + Resolution { + msg: String, + }, + InternalManifestError { + path: PathBuf, + msg: String, + }, + /// A package in the index (or, hopefully, a package potentially destined for + /// the index, because packages actually *in* the index should be validated) + /// tried to depend on a path or git dependency. + InvalidIndexDep { + id: index::Id, + dep: Box, + }, + /// There was an error persisting a temporary file. + TempFilePersist { + error: tempfile::PersistError, + }, + /// The package `id` wasn't found in the package index. + UnknownIndexPackage { + id: index::Id, + }, + /// While trying to insert a package in the index, we found that that same + /// package and version was already present. + DuplicateIndexPackageVersion { + id: index::Id, + version: SemVer, + }, +} + +impl std::error::Error for Error {} + +impl std::fmt::Debug for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(self, f) + } +} + +impl std::fmt::Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Error::Io { error, path } => { + if let Some(path) = path { + write!(f, "{}: {error}", path.display()) + } else { + error.fmt(f) + } + } + // Just a short and not-very-informative error. To write a better error message to + // the terminal, use `program.report` like the cli does. + Error::ManifestEval { package, .. } => { + if let Some(package) = package { + write!(f, "error evaluating manifest for package {package}") + } else { + write!(f, "error evaluating package manifest") + } + } + Error::RestrictedPath { + attempted, + restriction, + package_url, + package_commit, + package_path, + } => { + write!( + f, + "git package {package_url}@{package_commit}/{} tried to import path {}, but can only import from {}", + package_path.display(), + attempted.display(), + restriction.display() + ) + } + Error::Git(e) => e.fmt(f), + Error::InvalidUrl { url, msg } => { + write!(f, "invalid url {}: {}", url, msg) + } + Error::InternalManifestError { path, msg } => { + write!( + f, + "internal error reading the manifest at {}; this is a bug in nickel: {msg}", + path.display() + ) + } + Error::NoPackageRoot { path } => write!( + f, + "tried to import a relative path ({}), but we have no reference", + path.display() + ), + Error::Resolution { msg } => write!(f, "version resolution failed: {msg}"), + Error::TempFilePersist { error } => error.fmt(f), + Error::UnknownIndexPackage { id } => write!(f, "package {id} not found in the index"), + Error::InvalidIndexDep { id, dep } => match dep.as_ref() { + UnversionedPackage::Git(g) => write!( + f, + "package {id} depends on git package {}, so it cannot be put in the index", + g.url + ), + UnversionedPackage::Path { path } => write!( + f, + "package {id} depends on path package {}, so it cannot be put in the index", + path.display() + ), + }, + Error::DuplicateIndexPackageVersion { id, version } => { + write!(f, "package {id}@{version} is already present in the index") + } + Error::Serialize { error } => { + write!(f, "serialization error: {error}") + } + } + } +} + +pub trait ResultExt { + type T; + fn in_package(self, package: Ident) -> Result; +} + +impl ResultExt for Result { + type T = T; + + fn in_package(self, package: Ident) -> Result { + self.map_err(|e| match e { + Error::ManifestEval { program, error, .. } => Error::ManifestEval { + package: Some(package), + program, + error, + }, + x => x, + }) + } +} + +pub trait IoResultExt { + type T; + fn with_path(self, path: impl AsRef) -> Result; + fn without_path(self) -> Result; +} + +impl IoResultExt for Result { + type T = T; + fn with_path(self, path: impl AsRef) -> Result { + self.map_err(|e| Error::Io { + path: Some(path.as_ref().to_owned()), + error: e, + }) + } + + fn without_path(self) -> Result { + self.map_err(|e| Error::Io { + path: None, + error: e, + }) + } +} + +impl From for Error { + fn from(e: nickel_lang_git::Error) -> Self { + Self::Git(e) + } +} + +impl From for Error { + fn from(error: tempfile::PersistError) -> Self { + Self::TempFilePersist { error } + } +} + +impl From for Error { + fn from(error: serde_json::Error) -> Self { + Self::Serialize { error } + } +} diff --git a/package/src/index/mod.rs b/package/src/index/mod.rs new file mode 100644 index 000000000..548e1ebd7 --- /dev/null +++ b/package/src/index/mod.rs @@ -0,0 +1,399 @@ +//! The package index. +//! +//! The package index lives in a hard-coded location on github. It gets cached on the local +//! disk, and then lazily loaded from there and cached in memory. +//! +//! TODO: +//! - add file locks to protect the on-disk cache from concurrent modification by multiple nickel +//! processes + +use std::{ + cell::RefCell, + collections::{BTreeMap, HashMap}, + io::Write, + path::PathBuf, + sync::LazyLock, +}; + +use gix::ObjectId; +use nickel_lang_core::identifier::Ident; +use nickel_lang_git::Spec; +use regex::Regex; +use serde::{Deserialize, Serialize}; +use tempfile::{tempdir_in, NamedTempFile}; + +use crate::{ + config::Config, + error::{Error, IoResultExt as _}, + version::SemVer, + Precise, VersionReq, +}; + +pub mod scrape; + +pub use scrape::fetch_git; + +/// The in-memory cache. +#[derive(Debug)] +pub struct PackageCache { + package_files: HashMap, + config: Config, +} + +#[derive(Debug)] +pub struct PackageIndex { + cache: RefCell, +} + +fn id_path(config: &Config, id: &Id) -> PathBuf { + match id { + Id::Github { org, name } => config.index_dir.join("github").join(org).join(name), + } +} + +impl PackageCache { + fn path(&self, id: &Id) -> PathBuf { + id_path(&self.config, id) + } + + /// Creates a temporary file that's in the same directory as the place that `id`'s + /// index file would go. + fn tmp_file(&self, id: &Id) -> NamedTempFile { + let path = self.path(id); + // unwrap: the `path` function always outputs a non-empty path + let parent = path.parent().unwrap(); + std::fs::create_dir_all(parent).unwrap(); + NamedTempFile::new_in(parent).unwrap() + } + + /// Loads and returns all the version metadata for a single package. + /// + /// Reads from disk if necessary; returns a cached result if not. + fn load(&mut self, id: &Id) -> Result, Error> { + use std::collections::hash_map::Entry; + + match self.package_files.entry(id.clone()) { + Entry::Occupied(entry) => Ok(Some(entry.into_mut())), + Entry::Vacant(entry) => { + let mut file = CachedPackageFile::default(); + let path = id_path(&self.config, id); + let data = std::fs::read_to_string(&path).with_path(&path)?; + for line in data.lines() { + let package: Package = serde_json::from_str(line).unwrap(); + if file + .packages + .insert(package.vers.clone(), package) + .is_some() + { + panic!("duplicate version, index is corrupt"); + } + } + Ok(Some(entry.insert(file))) + } + } + } + + pub fn clear(&mut self) { + self.package_files.clear(); + } + + /// Saves a package description to disk. + /// + /// (Also retains a cached copy in memory.) + pub fn save(&mut self, pkg: Package) -> Result<(), Error> { + let id: Id = pkg.id.clone().into(); + let version = pkg.vers.clone(); + let mut existing = self + .load(&id)? + .cloned() + .unwrap_or(CachedPackageFile::default()); + if existing.packages.insert(pkg.vers.clone(), pkg).is_some() { + return Err(Error::DuplicateIndexPackageVersion { id, version }); + } + let mut tmp = self.tmp_file(&id); + for pkg in existing.packages.values() { + serde_json::to_writer(&mut tmp, pkg)?; + tmp.write_all(b"\n").with_path(tmp.path())?; + } + + let out_path = self.path(&id); + tmp.persist(&out_path)?; + Ok(()) + } +} + +impl PackageIndex { + pub fn new(config: Config) -> Self { + PackageIndex { + cache: RefCell::new(PackageCache { + config, + package_files: HashMap::new(), + }), + } + } + + /// Fetch an updated package index from github and save it to our cache directory. + /// TODO: refactor this, since there's a distinction between reading (and appending to) + /// and index, and caching downloaded packages + pub fn fetch_from_github(&self) -> Result<(), Error> { + eprint!("Fetching an updated package index..."); + let config = self.cache.borrow().config.clone(); + + // unwrap: we defined the root directory ourselves, and it has a parent. (TODO: now that it's configurable, do we need another check?) + let parent_dir = config.index_dir.parent().unwrap(); + std::fs::create_dir_all(parent_dir).with_path(parent_dir)?; + let tree_path = tempdir_in(parent_dir).with_path(parent_dir)?; + let _id = nickel_lang_git::fetch(&Spec::head(config.index_url), tree_path.path())?; + + // If there's an existing index at the on-disk location, replace it with the + // fresh one we just downloaded. Doing this atomically and cross-platform is + // tricky (rename is weird with directories), so we delete and then rename, + // and possibly fail (platform-dependent) if someone beat us to re-creating the + // directory. + // + // Cargo uses an advisory file lock for all changes to the index, so at least + // multiple instances of cargo won't mess up (but other process could interfere). + // Maybe we could do the same. + if config.index_dir.exists() { + // We could do better with error messages here: if the recursive delete fails + // because of some problem with a child, our error message will nevertheless + // point at the root path. + std::fs::remove_dir_all(&config.index_dir).with_path(&config.index_dir)?; + } + std::fs::rename(tree_path.into_path(), &config.index_dir).with_path(&config.index_dir)?; + eprintln!("done!"); + Ok(()) + } + + /// Fetch the index if we don't have one. + pub fn ensure_exists(&self) -> Result<(), Error> { + let root = self.cache.borrow().config.index_dir.clone(); + if !root.exists() { + self.fetch_from_github()?; + } + Ok(()) + } + + pub fn available_versions<'a>( + &'a self, + id: &Id, + ) -> Result + 'a, Error> { + let mut cache = self.cache.borrow_mut(); + let pkg_file = cache.load(id)?; + let versions: Vec<_> = pkg_file + .map(|pkg_file| pkg_file.packages.keys().cloned().collect()) + .unwrap_or_default(); + Ok(versions.into_iter()) + } + + pub fn all_versions(&self, id: &Id) -> Result, Error> { + let mut cache = self.cache.borrow_mut(); + let pkg_file = cache.load(id)?; + Ok(pkg_file + .map(|pkg_file| { + pkg_file + .packages + .iter() + .map(|(v, package)| (v.clone(), package.clone())) + .collect() + }) + .unwrap_or_default()) + } + + pub fn package(&self, id: &Id, v: SemVer) -> Result, Error> { + Ok(self.all_versions(id)?.get(&v).cloned()) + } + + pub fn save(&mut self, pkg: Package) -> Result<(), Error> { + self.cache.borrow_mut().save(pkg) + } + + pub fn ensure_downloaded(&self, id: &Id, v: SemVer) -> Result<(), Error> { + let package = self + .package(id, v.clone())? + .ok_or_else(|| Error::UnknownIndexPackage { id: id.clone() })?; + let precise = Precise::Index { + id: id.clone(), + version: v, + }; + self.ensure_loc_downloaded(&precise, &package.id) + } + + fn ensure_loc_downloaded( + &self, + precise: &Precise, + // TODO: better naming + loc: &PreciseId, + ) -> Result<(), Error> { + let PreciseId::Github { org, name, commit } = loc; + let url = format!("https://github.com/{org}/{name}.git"); + // unwrap: the url above is valid (TODO: ensure that org and name are sanitized) + let url: gix::Url = url.try_into().unwrap(); + + let target_dir = precise.local_path(&self.cache.borrow().config); + if target_dir.exists() { + eprintln!("Package {org}/{name}@{commit} already exists"); + return Ok(()); + } + + // unwrap: the local path for an index package always has a parent + let parent_dir = target_dir.parent().unwrap(); + std::fs::create_dir_all(parent_dir).with_path(parent_dir)?; + eprintln!( + "Downloading {org}/{name}@{commit} to {}", + target_dir.display() + ); + let tmp_dir = tempdir_in(parent_dir).with_path(parent_dir)?; + let _tree_id = nickel_lang_git::fetch(&Spec::commit(url, *commit), tmp_dir.path())?; + + let tmp_dir = tmp_dir.into_path(); + std::fs::rename(tmp_dir, &target_dir).with_path(target_dir)?; + + Ok(()) + } +} + +/// The identifier of a package in the package index. +#[derive(Clone, PartialEq, Eq, Debug, Hash, Serialize, Deserialize, PartialOrd, Ord)] +pub enum Id { + #[serde(rename = "github")] + Github { org: String, name: String }, +} + +impl Id { + /// Returns the path (relative to the package index base directory) where this + /// package should be stored. + pub fn path(&self) -> PathBuf { + match self { + Id::Github { org, name } => PathBuf::from(format!("github/{org}/{name}")), + } + } + + pub fn remote_url(&self) -> gix::Url { + match self { + // TODO: once we ensure validation on org and name, the unwrap will be ok. + Id::Github { org, name } => format!("https://github.com/{org}/{name}") + .try_into() + .unwrap(), + } + } +} + +impl std::fmt::Display for Id { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Id::Github { org, name } => write!(f, "github/{org}/{name}"), + } + } +} + +#[derive(Debug)] +pub enum IdParseError { + /// We expect exactly 2 slashes, and return this error if there aren't. + Slashes, + /// We only know about github right now, and return this error if they ask for a different one. + UnknownIndex { index: String }, + /// Our rules for user and package names are currently the same as Nickel's identifier rules. + InvalidId { id: String }, +} + +impl std::error::Error for IdParseError {} + +impl std::fmt::Display for IdParseError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + IdParseError::Slashes => { + write!(f, "doesn't match the expected // pattern") + } + IdParseError::UnknownIndex { index } => write!( + f, + "unknown index `{index}`, the only valid value is `github`" + ), + IdParseError::InvalidId { id } => write!(f, "invalid identifier `{id}`"), + } + } +} + +static ID_REGEX: LazyLock = + LazyLock::new(|| Regex::new("^_*[a-zA-Z][_a-zA-Z0-9-']*$").unwrap()); + +impl std::str::FromStr for Id { + type Err = IdParseError; + + fn from_str(s: &str) -> Result { + let mut parts = s.split('/'); + let index = parts.next().ok_or(IdParseError::Slashes)?; + let org = parts.next().ok_or(IdParseError::Slashes)?; + let name = parts.next().ok_or(IdParseError::Slashes)?; + if parts.next().is_some() { + return Err(IdParseError::Slashes); + }; + + if index != "github" { + return Err(IdParseError::UnknownIndex { + index: index.to_string(), + }); + } + + if !ID_REGEX.is_match(org) { + return Err(IdParseError::InvalidId { id: org.to_owned() }); + } + + if !ID_REGEX.is_match(name) { + return Err(IdParseError::InvalidId { + id: name.to_owned(), + }); + } + + Ok(Id::Github { + org: org.to_owned(), + name: name.to_owned(), + }) + } +} + +/// The identifier of a package + version in the package index. +/// +/// Includes a content hash of the package. +#[derive(Clone, PartialEq, Eq, Debug, Hash, Serialize, Deserialize, PartialOrd, Ord)] +pub enum PreciseId { + #[serde(rename = "github")] + Github { + org: String, + name: String, + commit: ObjectId, + }, +} + +impl From for Id { + fn from(id: PreciseId) -> Self { + match id { + PreciseId::Github { org, name, .. } => Id::Github { org, name }, + } + } +} + +#[derive(Clone, Debug, Default)] +pub struct CachedPackageFile { + pub packages: BTreeMap, +} + +/// A package record in the index. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct Package { + pub id: PreciseId, + pub vers: SemVer, + pub nickel_vers: SemVer, + pub deps: BTreeMap, + + /// Version of the index schema. Currently always zero. + v: u32, + // TODO: any other metadata that we'd like to store in the index +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct IndexDependency { + #[serde(flatten)] + pub id: Id, + pub req: VersionReq, +} diff --git a/package/src/index/scrape.rs b/package/src/index/scrape.rs new file mode 100644 index 000000000..e28bb5320 --- /dev/null +++ b/package/src/index/scrape.rs @@ -0,0 +1,55 @@ +//! Tools for updating an index from a git repository. +//! +//! Given an org and a project name, we look for a github project at that location. +//! We then look for all tags of the form v. For each such tag, we record +//! a version of that package. + +use gix::ObjectId; +use nickel_lang_git::Spec; +use tempfile::tempdir; + +use crate::{ + error::{Error, IoResultExt}, + version::SemVer, + ManifestFile, +}; + +use super::{Id, Package, PreciseId}; + +/// Fetch a package from the specified place, and figure out what its index +/// entry should look like. +/// TODO: allow a subdirectory? +pub fn fetch_git(id: &Id, version: SemVer, commit: &ObjectId) -> Result { + // We need to fetch the manifest file to get some metadata out. We're currently shallow-cloning + // the whole repo, but we could use a github API (or maybe some fancier git features) to be more + // efficient. + let tmpdir = tempdir().without_path()?; + let _id = nickel_lang_git::fetch( + &Spec::commit(id.remote_url(), commit.clone()), + tmpdir.path(), + )?; + + let manifest_path = tmpdir.path().join("package.ncl"); + let manifest = ManifestFile::from_path(manifest_path)?; + + let deps = manifest + .dependencies + .into_iter() + .map(|(name, dep)| Ok((name, dep.as_index_dep(id.clone())?))) + .collect::>()?; + + let Id::Github { org, name } = id.clone(); + let id = PreciseId::Github { + org, + name, + commit: *commit, + }; + + Ok(Package { + id, + vers: version, + nickel_vers: manifest.nickel_version, + deps, + v: 0, + }) +} diff --git a/package/src/lib.rs b/package/src/lib.rs new file mode 100644 index 000000000..8736b6a4a --- /dev/null +++ b/package/src/lib.rs @@ -0,0 +1,255 @@ +use std::{ + path::{Path, PathBuf}, + str::FromStr, +}; + +use nickel_lang_core::cache::normalize_abs_path; + +use config::Config; +use error::Error; +use serde::{Deserialize, Serialize}; +use serde_with::{DeserializeFromStr, SerializeDisplay}; +use version::{PartialSemVer, PartialSemVerParseError, SemVer, SemVerParseError}; + +pub mod config; +pub mod error; +pub mod index; +pub mod lock; +pub mod manifest; +pub mod resolve; +pub mod version; + +pub use gix::ObjectId; +pub use manifest::ManifestFile; + +#[derive(Clone, Debug, PartialEq, Eq, Hash, Deserialize)] +pub struct GitDependency { + /// The url of the git repo, in any format understood by `gix`. + /// For example, it can be a path. + #[serde(with = "serde_url")] + pub url: gix::Url, + #[serde(default, rename = "ref")] + pub target: nickel_lang_git::Target, + /// The path to the nickel package within the git repo, if it is not at the top level. + #[serde(default)] + pub path: PathBuf, +} + +#[derive(Clone, Debug, PartialEq, Eq, Hash, DeserializeFromStr, SerializeDisplay)] +pub enum VersionReq { + // TODO: could make this a PartialSemVer + Compatible(SemVer), + // TODO: This one could allow pre-releases + Exact(SemVer), +} + +impl std::fmt::Display for VersionReq { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + VersionReq::Compatible(v) => v.fmt(f), + VersionReq::Exact(v) => write!(f, "={v}"), + } + } +} + +#[derive(Debug, thiserror::Error)] +pub enum VersionReqParseError { + #[error(transparent)] + Exact(#[from] SemVerParseError), + #[error(transparent)] + Compatible(#[from] PartialSemVerParseError), +} + +impl FromStr for VersionReq { + type Err = VersionReqParseError; + + fn from_str(s: &str) -> Result { + if let Some(v) = s.strip_prefix('=') { + Ok(VersionReq::Exact(v.parse()?)) + } else { + Ok(VersionReq::Compatible(PartialSemVer::from_str(s)?.into())) + } + } +} + +impl VersionReq { + pub fn matches(&self, v: &SemVer) -> bool { + match self { + VersionReq::Compatible(lower_bound) => { + lower_bound <= v && *v < lower_bound.next_incompatible() + } + VersionReq::Exact(w) => v == w, + } + } +} + +/// A source includes the place to fetch a package from (e.g. git or a registry), +/// along with possibly some narrowing-down of the allowed versions (e.g. a range +/// of versions, or a git commit id). +#[derive(Clone, Debug, PartialEq, Eq, Hash, Deserialize)] +pub enum Dependency { + Git(GitDependency), + Path { path: PathBuf }, + Index { id: index::Id, version: VersionReq }, +} + +/// The same as [`Dependency`], but only for the packages that have fixed, unresolvable, versions. +#[derive(Clone, PartialEq, Eq, Hash, Debug)] +pub enum UnversionedPackage { + Git(GitDependency), + Path { path: PathBuf }, +} + +impl From for Dependency { + fn from(p: UnversionedPackage) -> Self { + match p { + UnversionedPackage::Git(git) => Dependency::Git(git), + UnversionedPackage::Path { path } => Dependency::Path { path }, + } + } +} + +impl Dependency { + pub fn matches(&self, precise: &Precise) -> bool { + match (self, precise) { + (Dependency::Git(git), Precise::Git { url: repo, .. }) => &git.url == repo, + (Dependency::Path { path }, Precise::Path { path: locked_path }) => path == locked_path, + ( + Dependency::Index { + id: dep_id, + version: dep_version, + }, + Precise::Index { id, version }, + ) => id == dep_id && dep_version.matches(version), + _ => false, + } + } + + pub fn as_index_dep(self, parent_id: index::Id) -> Result { + match self { + Dependency::Index { id, version } => Ok(index::IndexDependency { id, req: version }), + Dependency::Git(g) => Err(Error::InvalidIndexDep { + id: parent_id.clone(), + dep: Box::new(crate::UnversionedPackage::Git(g)), + }), + Dependency::Path { path } => Err(Error::InvalidIndexDep { + id: parent_id.clone(), + dep: Box::new(crate::UnversionedPackage::Path { path }), + }), + } + } +} + +mod serde_url { + use serde::{de::Error, Deserialize, Serialize as _}; + + pub fn serialize(url: &gix::Url, ser: S) -> Result { + // unwrap: locked urls can only come from nickel strings in the manifest file, which must be + // valid utf-8 + std::str::from_utf8(url.to_bstring().as_slice()) + .unwrap() + .serialize(ser) + } + + pub fn deserialize<'de, D: serde::Deserializer<'de>>(de: D) -> Result { + let s = String::deserialize(de)?; + gix::Url::try_from(s).map_err(|e| D::Error::custom(e.to_string())) + } +} + +#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize, PartialOrd, Ord)] +pub struct IndexPrecise { + id: index::Id, + version: SemVer, +} + +/// A precise package version, in a format suitable for putting into a lockfile. +#[serde_with::serde_as] +#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize, PartialOrd, Ord)] +pub enum Precise { + Git { + // We use `Precise` for a few different purposes, and not all of them need the url. (For + // resolution, for example, we could consider two git deps equal if they have the same id + // even if they came from different sources.) However, the lockfile should have a repo url in + // it, because it allows us to fetch the package if it isn't available, and it allows us to + // check if the locked dependency matches the manifest (which might only have the url). + #[serde(with = "serde_url")] + url: gix::Url, + // Serialize/deserialize as hex strings. + #[serde_as(as = "serde_with::DisplayFromStr")] + id: ObjectId, + path: PathBuf, + }, + /// The path is normalized (i.e., all '..'s are at the beginning), and relative + /// to the top-level package manifest. + /// + /// Note that when normalizing we only look at the path and not at the actual filesystem. + /// TODO: maybe just leave out the path altogether? cargo does... + Path { + path: PathBuf, + }, + Index { + id: index::Id, + version: SemVer, + }, +} + +impl Precise { + /// Where on the local filesystem can this package be found? + /// + /// The `base` directory configures a common root for all local caches. In + /// normal usage this could be [`default_cache_dir`], but when testing or + /// vendoring packages it might be something else. + /// + /// Note that the package might not actually be there yet, if it's a git or + /// index package that hasn't been fetched. + pub fn local_path(&self, config: &Config) -> PathBuf { + match self { + Precise::Git { id, path, .. } => repo_root(config, id).join(path), + Precise::Path { path } => Path::new(path).to_owned(), + Precise::Index { id, version } => config + .index_package_dir + .join(id.path()) + .join(version.to_string()), + } + } + + pub fn is_path(&self) -> bool { + matches!(self, Precise::Path { .. }) + } + + /// Is this locked package available offline? If not, it needs to be fetched. + pub fn is_available_offline(&self, config: &Config) -> bool { + // We consider path-dependencies to be always available offline, even if they don't exist. + // We consider git-dependencies to be available offline if there's a directory at + // `~/.cache/nickel/git/ed8234.../` (or wherever the cache directory is on your system). We + // don't check if that directory contains the right git repository -- if someone has messed + // with the contents of `~/.cache/nickel`, that's your problem. + match self { + Precise::Path { .. } => true, + _ => self.local_path(config).is_dir(), + } + } + + /// If this is a path package with a relative path, turn it into an abolute path, relative to `root`. + pub fn with_abs_path(self, root: &std::path::Path) -> Self { + match self { + Precise::Path { path } => Precise::Path { + path: normalize_abs_path(&root.join(path)), + }, + x => x, + } + } + + pub fn version(&self) -> Option { + match self { + Precise::Index { version, .. } => Some(version.clone()), + _ => None, + } + } +} + +/// The path in our local filesystem where we store the git repo with the given id. +fn repo_root(config: &Config, id: &ObjectId) -> PathBuf { + config.git_package_dir.join(id.to_string()) +} diff --git a/package/src/lock.rs b/package/src/lock.rs new file mode 100644 index 000000000..31402f2de --- /dev/null +++ b/package/src/lock.rs @@ -0,0 +1,195 @@ +// c&p from old file. + +use std::{ + collections::{BTreeMap, HashMap}, + path::Path, +}; + +use nickel_lang_core::{cache::normalize_path, identifier::Ident, package::PackageMap}; +use serde::{Deserialize, Serialize}; + +use crate::{ + config::Config, + error::{Error, IoResultExt}, + resolve::Resolution, + ManifestFile, Precise, +}; + +mod package_list { + use std::collections::HashMap; + + use serde::{Deserializer, Serializer}; + + use super::*; + + #[derive(Serialize, Deserialize, PartialEq, PartialOrd, Eq, Ord)] + struct Entry { + source: Precise, + #[serde(flatten)] + entry: LockFileEntry, + } + + pub fn serialize( + h: &HashMap, + ser: S, + ) -> Result { + let mut entries: Vec<_> = h + .iter() + .map(|(source, entry)| Entry { + source: source.clone(), + entry: entry.clone(), + }) + .collect(); + entries.sort(); + entries.serialize(ser) + } + + pub fn deserialize<'de, D: Deserializer<'de>>( + de: D, + ) -> Result, D::Error> { + let entries = Vec::::deserialize(de)?; + Ok(entries.into_iter().map(|e| (e.source, e.entry)).collect()) + } +} + +/// A lock file, specifying versions and names for all recursive dependencies. +/// +/// This defines the on-disk format for lock files. +/// +/// # Open question +/// +/// There's one big open question about the lock file: should it contain information +/// about path dependencies (and their recursive dependencies)? If it does, you +/// can immediately derive the `PackageMap` from the lock file, meaning that if the +/// interpreter gets the lock file then it can do everything else from there, +/// without doing any package resolution. So that's nice. +/// +/// The problem with putting information about path dependencies in the lock file is +/// that path dependencies can change without notice, making the lock file stale. +/// So the interpreter didn't have to do much work, but it ended up running on old +/// information. +/// +/// I think the decision here basically comes down to what we want from the CLI +/// interface. If we require a separate update-the-lock-file step (a la npm or poetry), +/// it makes sense to put the path dependency info here. But if we want an +/// auto-refresh step (a la cargo), we want to leave it out. Current strategy is +/// to keep it in, and we'll measure the performance of package resolution before +/// making a final decision. +#[derive(Clone, Debug, Default, Serialize, Deserialize)] +pub struct LockFile { + /// The dependencies of the current (top-level) package. + /// + /// These should be sorted so that the serialization doesn't change all the time. + pub dependencies: BTreeMap, + /// All packages that we know about, and the dependencies of each one. + /// + /// Note that the package list is not guaranteed to be closed: path dependencies + /// cannot have their dependencies resolved in the on-disk lockfile because they + /// can change at any time. *Some* path dependencies (for example, path dependencies + /// that are local to a git depencency repo) may have resolved dependencies. + #[serde(with = "package_list")] + pub packages: HashMap, +} + +impl LockFile { + pub fn new(manifest: &ManifestFile, resolution: &Resolution) -> Result { + // We don't put all packages in the lock file: we ignore dependencies (and therefore also + // transitive dependencies) of path deps. In order to figure out what to include, we + // traverse the depencency graph. + fn collect_packages( + res: &Resolution, + pkg: &Precise, + acc: &mut HashMap, + ) -> Result<(), Error> { + // let entry = LockFileEntry { + // dependencies: if pkg.is_path() { + // // Skip dependencies of path deps + // Default::default() + // } else { + // res.dependencies(pkg) + // }, + // }; + + // Let's try out what happens if we include path deps and their + // dependencies in the lock file. This makes the lock file + // potentially non-portable to different systems, but on the other + // hand it allows the package map to be read straight from the lock + // file. This is probably the way to go if we require manual lock + // file refreshing. + let entry = LockFileEntry { + dependencies: res + .dependencies(pkg)? + .into_iter() + .map(|(id, entry)| (id.label().to_owned(), entry)) + .collect(), + }; + + // Only recurse if this is the first time we've encountered this precise package. + if acc.insert(pkg.clone(), entry).is_none() { + for (_, dep) in acc[pkg].clone().dependencies { + collect_packages(res, &dep, acc)?; + } + } + Ok(()) + } + + let mut acc = HashMap::new(); + for dep in manifest.dependencies.values() { + collect_packages(resolution, &resolution.precise(dep), &mut acc)?; + } + + Ok(LockFile { + dependencies: manifest + .dependencies + .iter() + .map(|(name, dep)| (name.label().to_owned(), resolution.precise(dep))) + .collect(), + + packages: acc, + }) + } + + // TODO: propagate the error + pub fn from_path(path: impl AsRef) -> Self { + let contents = std::fs::read_to_string(path.as_ref()).unwrap(); + serde_json::from_str(&contents).unwrap() + } + + /// Build a package map from a lock-file. + /// + /// This only works if the lock-file contains path dependencies and their + /// recursive dependencies. See [`LockFile`]. + /// + /// `manifest_dir` is the directory containing the manifest file. Relative + /// path dependencies in the lock-file will be interpreted relative to the + /// manifest directory and turned into absolute paths. + pub fn package_map(&self, manifest_dir: &Path, config: &Config) -> Result { + let manifest_dir = normalize_path(manifest_dir).without_path()?; + + let path = |pkg: &Precise| pkg.clone().with_abs_path(&manifest_dir).local_path(config); + + Ok(PackageMap { + top_level: self + .dependencies + .iter() + .map(|(id, pkg)| (Ident::new(id), path(pkg))) + .collect(), + packages: self + .packages + .iter() + .flat_map(|(pkg, entry)| { + entry + .dependencies + .iter() + .map(|(id, dep)| ((path(pkg), Ident::new(id)), path(dep))) + }) + .collect(), + }) + } +} + +/// The dependencies of a single package. +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, PartialOrd, Eq, Ord)] +pub struct LockFileEntry { + pub dependencies: BTreeMap, +} diff --git a/package/src/manifest.rs b/package/src/manifest.rs new file mode 100644 index 000000000..93054e421 --- /dev/null +++ b/package/src/manifest.rs @@ -0,0 +1,398 @@ +use std::{ + collections::HashMap, + path::{Path, PathBuf}, +}; + +use gix::ObjectId; +use nickel_lang_core::{ + cache::normalize_rel_path, + error::NullReporter, + eval::cache::CacheImpl, + identifier::Ident, + label::Label, + program::Program, + term::{make, RichTerm, RuntimeContract, Term}, +}; +use nickel_lang_git::Spec; +use serde::Deserialize; + +use crate::{ + config::Config, + error::{Error, IoResultExt}, + index, + lock::LockFile, + repo_root, + resolve::{Resolution, UnversionedPrecise}, + version::{FullSemVer, PartialSemVer, SemVer}, + Dependency, GitDependency, Precise, VersionReq, +}; + +/// This is the format of an evaluated manifest. +/// +/// Manifests are nickel files. In order to ingest them, we first evaluate them +/// as nickel files, then use nickel's deserialization support to turn them into +/// rust structs. This struct defines the format of that deserialization. +/// +/// Note that the deserialization step gives pretty useless error messages. We +/// get around this by applying the `std.package.Manifest` contract before +/// evaluation. This means that it's important for the validation applied +/// by deserialization to be less strict than the `std.package.Manifest` +/// contract, so that any errors in the manifest will be caught by the contract. +#[derive(Clone, Debug, Deserialize)] +struct ManifestFileFormat { + name: Ident, + version: FullSemVer, + nickel_version: PartialSemVer, + dependencies: HashMap, +} + +/// In the manifest file, we deserialize the index id in the manifest differently from +/// the serde default, to make it more human-friendly. +fn deserialize_index_package<'de, D>(deserializer: D) -> Result +where + D: serde::Deserializer<'de>, +{ + let s: String = Deserialize::deserialize(deserializer)?; + s.parse().map_err(serde::de::Error::custom) +} + +/// The deserialization format of a dependency in the manifest file. +#[derive(Clone, Debug, PartialEq, Eq, Hash, Deserialize)] +enum DependencyFormat { + Git(GitDependency), + Path(String), + Index { + #[serde(deserialize_with = "deserialize_index_package")] + package: index::Id, + version: VersionReq, + }, +} + +impl From for Dependency { + fn from(df: DependencyFormat) -> Self { + match df { + DependencyFormat::Git(g) => Dependency::Git(g), + DependencyFormat::Path(p) => Dependency::Path { path: p.into() }, + DependencyFormat::Index { + package: id, + version, + } => Dependency::Index { id, version }, + } + } +} + +/// A package manifest file. +#[derive(Clone, Debug, PartialEq)] +pub struct ManifestFile { + // The directory containing the manifest file. Path deps are resolved relative to this. + // If `None`, path deps aren't allowed. + pub parent_dir: Option, + /// The name of the package. + pub name: Ident, + /// The version of the package. + pub version: SemVer, + /// The minimum nickel version supported by the package. + pub nickel_version: SemVer, + /// All the package's dependencies, and the local names that this package will use to refer to them. + pub dependencies: HashMap, +} + +impl ManifestFile { + pub fn from_path(path: impl AsRef) -> Result { + let path = path.as_ref(); + let prog = + Program::new_from_file(path, std::io::stderr(), NullReporter {}).with_path(path)?; + let mut ret = ManifestFile::from_prog(path, prog)?; + ret.parent_dir = path.parent().map(Path::to_owned); + Ok(ret) + } + + pub fn from_contents(data: &[u8]) -> Result { + let prog = Program::new_from_source( + std::io::Cursor::new(data), + "", + std::io::stderr(), + NullReporter {}, + ) + .without_path()?; + ManifestFile::from_prog("".as_ref(), prog) + } + + fn from_prog(path: &Path, mut prog: Program) -> Result { + // Evaluate the manifest with an extra contract applied, so that nice error message will be generated. + // (Probably they applied the Manifest contract already, but just in case...) + // `contract` is `std.package.Manifest` + use nickel_lang_core::term::UnaryOp::RecordAccess; + let contract = make::op1( + RecordAccess("Manifest".into()), + make::op1(RecordAccess("package".into()), Term::Var("std".into())), + ); + prog.add_contract(RuntimeContract::new(contract, Label::default())); + + let manifest_term = prog.eval_full().map_err(|e| Error::ManifestEval { + package: None, + program: prog, + error: e, + })?; + ManifestFile::from_term(path, &manifest_term) + } + + fn lockfile_path(&self) -> Option { + let parent_dir = self.parent_dir.as_ref()?; + Some(parent_dir.join("package.lock")) + } + + pub fn is_lock_file_up_to_date(&self, lock_file: &LockFile) -> bool { + self.dependencies.iter().all(|(name, src)| { + lock_file + .dependencies + .get(name.label()) + .map_or(false, |id| src.matches(id)) + }) + } + + /// Checks if this manifest already has an up-to-date lockfile. + /// + /// Here, by up-to-date we mean that all dependencies in the manifest are present in the lockfile. + /// But we don't, for example, check whether git deps are fully up-to-date. + fn find_lockfile(&self) -> Option { + let lock_file = std::fs::read_to_string(self.lockfile_path()?).ok()?; + let lock_file: LockFile = match serde_json::from_str(&lock_file) { + Ok(f) => f, + Err(e) => { + eprintln!("Found a lockfile, but it failed to parse: {e}"); + return None; + } + }; + self.is_lock_file_up_to_date(&lock_file) + .then_some(lock_file) + } + + /// Recursively resolve dependencies, based on an existing lock-file if there is one. + pub fn resolve(&self, config: Config) -> Result { + let lock = self.find_lockfile().unwrap_or_default(); + crate::resolve::resolve_with_lock(self, &lock, config) + } + + /// Determine the fully-resolved dependencies and write the lock-file to disk. + /// + /// Re-uses a lock file if there's one that's up-to-date. Otherwise, regenerates the lock file. + pub fn lock(&self, config: Config) -> Result { + if let Some(lock) = self.find_lockfile() { + eprintln!("Found an up-to-date lockfile"); + return Ok(lock); + } + + self.regenerate_lock(config) + } + + /// Regenerate the lock file, even if it already exists. + pub fn regenerate_lock(&self, config: Config) -> Result { + let resolution = self.resolve(config)?; + let lock = LockFile::new(self, &resolution)?; + + if let Some(lock_path) = self.lockfile_path() { + // unwrap: serde_json serialization fails if the derived `Serialize` + // trait fails (which it shouldn't), or if there's a map with + // non-string keys (all our maps have `Ident` keys). + let serialized_lock = serde_json::to_string_pretty(&lock).unwrap(); + if let Err(e) = std::fs::write(lock_path, serialized_lock) { + eprintln!("Warning: failed to write lock-file: {e}"); + } + } + + Ok(lock) + } + + // Convert from a `RichTerm` (that we assume was evaluated deeply). We + // could serialize/deserialize, but that doesn't handle the enums. + fn from_term(path: &Path, rt: &RichTerm) -> Result { + // This is only ever called with terms that have passed the `std.package.Manifest` + // contract, so we can assume that they have the right fields. + let ManifestFileFormat { + name, + version, + nickel_version, + dependencies, + } = ManifestFileFormat::deserialize(rt.clone()).map_err(|e| { + Error::InternalManifestError { + path: path.to_owned(), + msg: e.to_string(), + } + })?; + Ok(Self { + parent_dir: None, + name, + version: version.into(), + nickel_version: nickel_version.into(), + dependencies: dependencies + .into_iter() + .map(|(k, v)| (k, v.into())) + .collect(), + }) + } +} + +#[derive(Clone, Debug, Default)] +pub struct Realization { + pub config: Config, + pub git: HashMap, + /// A map from (parent package, dependency) to child package. + pub dependency: HashMap<(Precise, Dependency), UnversionedPrecise>, + pub manifests: HashMap, +} + +impl Realization { + pub fn new(config: Config) -> Self { + Self { + config, + ..Default::default() + } + } + + // TODO: take in an import sequence (like: the dependency was imported from x, which was imported from y) and use it to improve error messages + pub fn realize_all( + &mut self, + root_path: &Path, + dep: &Dependency, + relative_to: Option<&Precise>, + ) -> Result<(), Error> { + let uprecise = match (dep, relative_to) { + // Repo dependencies are resolved later. They are not allowed to have + // transitive git or path dependencies, so we don't even need to recurse. + (Dependency::Index { .. }, _) => { + return Ok(()); + } + (Dependency::Git(git), _) => { + let id = self.realize_one(git)?; + UnversionedPrecise::Git { + id, + url: git.url.clone(), + path: git.path.clone(), + } + } + (Dependency::Path { path }, None) => UnversionedPrecise::Path { path: path.clone() }, + (Dependency::Path { path }, Some(relative_to)) => { + let p = normalize_rel_path(&relative_to.local_path(&self.config).join(path)); + match relative_to { + Precise::Git { + id, + url: repo, + path, + } => { + let repo_path = repo_root(&self.config, id); + let p = p + .strip_prefix(&repo_path) + .map_err(|_| Error::RestrictedPath { + package_url: Box::new(repo.clone()), + package_commit: *id, + package_path: path.clone(), + attempted: p.clone(), + restriction: repo_path.to_owned(), + })?; + UnversionedPrecise::Git { + id: *id, + url: repo.clone(), + path: p.to_owned(), + } + } + _ => UnversionedPrecise::Path { path: p }, + } + } + }; + + let precise = Precise::from(uprecise.clone()); + let path = precise.local_path(&self.config); + let abs_path = root_path.join(path); + + let parent_precise = relative_to.cloned().unwrap_or_else(|| Precise::Path { + path: root_path.to_owned(), + }); + self.dependency + .insert((parent_precise, dep.clone()), uprecise); + + // Only read the dependency manifest and recurse if it's a manifest we haven't + // seen yet. + if !self.manifests.contains_key(&precise) { + let manifest = ManifestFile::from_path(abs_path.join("package.ncl"))?; + + self.manifests.insert(precise.clone(), manifest.clone()); + + for dep in manifest.dependencies.values() { + self.realize_all(root_path, dep, Some(&precise))?; + } + } + + Ok(()) + } + + fn realize_one(&mut self, git: &GitDependency) -> Result { + if let Some(id) = self.git.get(git) { + return Ok(*id); + } + + let url = self + .config + .git_replacements + .get(&git.url) + .unwrap_or(&git.url); + + let spec = Spec { + url: url.clone(), + target: git.target.clone(), + }; + let tmp_dir = + tempfile::tempdir_in(&self.config.cache_dir).with_path(&self.config.cache_dir)?; + let id = nickel_lang_git::fetch(&spec, tmp_dir.path())?; + // unwrap: gix currently only supports sha-1 hashes, so we know it will be the right size + let id: ObjectId = id.as_slice().try_into().unwrap(); + + // Now that we know the object hash, move the fetched repo to the right place in the cache. + let precise = Precise::Git { + id, + url: url.clone(), + path: PathBuf::default(), + }; + let path = precise.local_path(&self.config); + + if path.is_dir() { + // Because the path includes the git id, we're pretty confident that if it + // exists then it already has the right contents. + eprintln!("Already have a cache entry at {path:?}"); + } else { + eprintln!("Checking out {url} to {}", path.display()); + + // Unwrap: the result of `Precise::local_path` always has a parent directory. + let parent_dir = path.parent().unwrap(); + std::fs::create_dir_all(parent_dir).with_path(parent_dir)?; + let tmp_dir = tmp_dir.into_path(); + std::fs::rename(tmp_dir, &path).with_path(path)?; + } + + self.git.insert(git.clone(), id); + Ok(id) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn manifest() { + let manifest = ManifestFile::from_contents( + r#"{name = "foo", version = "1.0.0", nickel_version = "1.9.0", authors = [], description = "hi"}"#.as_bytes(), + ) + .unwrap(); + assert_eq!( + manifest, + ManifestFile { + parent_dir: None, + name: "foo".into(), + version: SemVer::new(1, 0, 0), + nickel_version: SemVer::new(1, 9, 0), + dependencies: HashMap::default() + } + ) + } +} diff --git a/package/src/resolve.rs b/package/src/resolve.rs new file mode 100644 index 000000000..7237eb067 --- /dev/null +++ b/package/src/resolve.rs @@ -0,0 +1,554 @@ +//! We do cargo-style version resolution, where we allow multiple semver-incompatible +//! copies of a package, but we insist that all semver-compatible verisons must resolve +//! to the exact same version. +//! +//! This is not natively supported in pubgrub, so we use one of the two transformations described +//! in [their book](https://pubgrub-rs-guide.pages.dev/limitations/multiple_versions): +//! we make a new package for every collection of semver-compatible +//! versions of each package. So instead of having `foo` with versions `1.1`, `1.2` and `2.0`, +//! we have a package `foo#1` with versions `1.1` and `1.2` and another package `foo#2` +//! with version `2.0`. Since we present them to pubgrub +//! as different packages, they can both appear in the final resolution. + +use std::{borrow::Borrow, collections::HashMap, path::PathBuf}; + +use nickel_lang_core::{cache::normalize_path, identifier::Ident, package::PackageMap}; +use pubgrub::{ + report::{DefaultStringReporter, Reporter as _}, + solver::DependencyProvider, +}; + +use crate::{ + config::Config, + error::{Error, IoResultExt as _}, + index::{self, Id, IndexDependency, PackageIndex}, + lock::LockFile, + manifest::Realization, + version::SemVer, + Dependency, IndexPrecise, ManifestFile, ObjectId, Precise, VersionReq, +}; + +type VersionRange = pubgrub::range::Range; + +pub struct PackageRegistry { + // The packages whose versions were locked in a lockfile; we'll try to prefer using + // those same versions. We won't absolutely insist on it, because if the manifest + // changed (or some path-dependency changed) then the old locked versions might not + // resolve anymore. + previously_locked: HashMap, + index: PackageIndex, + realized_unversioned: Realization, +} + +impl PackageRegistry { + pub fn list_versions<'a>( + &'a self, + package: &Package, + ) -> Result + 'a, Error> { + let locked_version = self.previously_locked.get(package).cloned(); + let rest = match package { + Package::Unversioned(_) => { + Box::new(std::iter::once(SemVer::new(0, 0, 0))) as Box> + } + Package::Bucket(b) => { + let bucket_version = b.version; + let iter = self + .index + .available_versions(&b.id)? + .filter(move |v| bucket_version.contains(v.clone())); + Box::new(iter) + } + }; + + // Put the locked version first, and then the other versions in any order (filtering to ensure that the locked version isn't repeated). + Ok(locked_version + .clone() + .into_iter() + .chain(rest.filter(move |v| Some(v) != locked_version.as_ref()))) + } + + pub fn dep(&self, pkg: &Package, version: &SemVer, dep_id: &Id) -> Result { + let deps = match pkg { + Package::Unversioned(pkg) => self.unversioned_deps(pkg), + Package::Bucket(b) => self.index_deps(&b.id, version)?, + }; + Ok(deps + .iter() + .find_map(|d| match d { + Dependency::Index { id, version } if id == dep_id => Some(version.clone()), + _ => None, + }) + .unwrap()) + } + + pub fn unversioned_deps(&self, pkg: &UnversionedPrecise) -> Vec { + let precise = Precise::from(pkg.clone()); + let manifest = &self.realized_unversioned.manifests[&precise]; + manifest.dependencies.values().cloned().collect() + } + + pub fn index_deps(&self, id: &Id, version: &SemVer) -> Result, Error> { + let all_versions = self.index.all_versions(id)?; + let pkg = all_versions.get(version).unwrap(); + Ok(pkg + .deps + .iter() + .map(|(_, IndexDependency { id, req })| Dependency::Index { + id: id.clone(), + version: req.clone(), + }) + .collect()) + } +} + +/// A bucket version represents a collection of compatible semver versions. +#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)] +pub enum BucketVersion { + /// A collection of versions all having the same major version number. + /// (For example, 1.x.y) + Major(u64), + /// A collection of versions all having major version zero, and the same minor version number. + /// (For example, 0.2.x) + Minor(u64), +} + +impl std::fmt::Display for BucketVersion { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + BucketVersion::Major(v) => write!(f, "{v}"), + BucketVersion::Minor(v) => write!(f, "0.{v}"), + } + } +} + +impl BucketVersion { + pub fn contains(&self, semver: SemVer) -> bool { + match *self { + BucketVersion::Major(v) => v == semver.major, + BucketVersion::Minor(v) => semver.major == 0 && semver.minor == v, + } + } + + pub fn next(&self) -> BucketVersion { + match *self { + BucketVersion::Major(v) => BucketVersion::Major(v + 1), + BucketVersion::Minor(v) => BucketVersion::Minor(v + 1), + } + } + + pub fn compatible_range(&self) -> VersionRange { + VersionRange::between(SemVer::from(*self), SemVer::from(self.next())) + } +} + +impl From for BucketVersion { + fn from(v: SemVer) -> Self { + if v.major == 0 { + BucketVersion::Minor(v.minor) + } else { + BucketVersion::Major(v.major) + } + } +} + +impl From for SemVer { + fn from(bv: BucketVersion) -> Self { + match bv { + BucketVersion::Major(v) => SemVer::new(v, 0, 0), + BucketVersion::Minor(v) => SemVer::new(0, v, 0), + } + } +} + +impl From for BucketVersion { + fn from(v: VersionReq) -> Self { + match v { + VersionReq::Compatible(v) | VersionReq::Exact(v) => v.into(), + } + } +} + +#[derive(Debug, Clone, Eq, PartialEq, Hash)] +pub struct Bucket { + pub id: Id, + pub version: BucketVersion, +} + +/// Identical to `Precise`, but contains only the unversioned variants. +#[derive(Debug, Clone, Eq, PartialEq, Hash)] +pub enum UnversionedPrecise { + Git { + url: gix::Url, + id: ObjectId, + path: PathBuf, + }, + Path { + path: PathBuf, + }, +} + +impl From for Precise { + fn from(up: UnversionedPrecise) -> Self { + match up { + UnversionedPrecise::Git { url, id, path } => Precise::Git { url, id, path }, + UnversionedPrecise::Path { path } => Precise::Path { path }, + } + } +} + +#[derive(Debug, Clone, Eq, PartialEq, Hash)] +pub enum Package { + /// A package that only comes in one version (like a path or a git dependency). + /// TODO: right now we say that all unversioned packages have version `0.0.0`, but it + /// isn't great for error messages + Unversioned(UnversionedPrecise), + Bucket(Bucket), +} + +impl Package { + pub fn from_index(id: index::Id, version_req: VersionReq) -> (Self, VersionRange) { + let pkg = Package::Bucket(Bucket { + id, + version: version_req.clone().into(), + }); + + let range = match version_req { + VersionReq::Compatible(v) => VersionRange::higher_than(v), + VersionReq::Exact(v) => VersionRange::exact(v), + }; + (pkg, range) + } +} + +impl std::fmt::Display for Package { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Package::Unversioned(UnversionedPrecise::Git { url, .. }) => { + write!(f, "{}", url) + } + Package::Unversioned(UnversionedPrecise::Path { path }) => { + write!(f, "{}", path.display()) + } + Package::Bucket(b) => { + write!(f, "{}#{}", b.id, b.version) + } + } + } +} + +// Makes the precise less precise, by returning the bucket that it falls into. +impl From for Package { + fn from(p: Precise) -> Self { + match p { + Precise::Git { url, id, path } => { + Package::Unversioned(UnversionedPrecise::Git { url, id, path }) + } + Precise::Path { path } => Package::Unversioned(UnversionedPrecise::Path { path }), + Precise::Index { id, version } => Package::Bucket(Bucket { + id, + version: version.into(), + }), + } + } +} + +impl DependencyProvider for PackageRegistry { + fn choose_package_version, U: Borrow>>( + &self, + potential_packages: impl Iterator, + ) -> Result<(T, Option), Box> { + // We try to choose the package with the fewest available versions, as the pubgrub + // docs recommend this as a reasonably-performant heuristic. We count a previously locked package + // as having one version (even if we'd theoretically be willing to ignore the lock). + let count_valid = |(p, range): (T, U)| -> Result<_, Box> { + let count = if self.previously_locked.contains_key(p.borrow()) { + 1 + } else { + self.list_versions(p.borrow())? + .filter(|v| range.borrow().contains(v)) + .count() + }; + Ok((count, p, range)) + }; + + let pkgs_and_ranges = potential_packages + .map(count_valid) + .collect::, _>>()?; + let (_count, pkg, range) = pkgs_and_ranges + .into_iter() + .min_by_key(|(count, _, _)| *count) + .expect("potential_packages gave us an empty iterator"); + let version = self + .list_versions(pkg.borrow())? + .find(|v| range.borrow().contains(v)); + Ok((pkg, version)) + } + + fn get_dependencies( + &self, + package: &Package, + version: &SemVer, + ) -> Result, Box> { + match package { + Package::Unversioned(p) => { + let precise = Precise::from(p.clone()); + let deps = self + .unversioned_deps(p) + .into_iter() + .map(|dep| match dep { + Dependency::Git(_) | Dependency::Path { .. } => { + let dep_precise = self.realized_unversioned.dependency + [&(precise.clone(), dep.clone())] + .clone(); + (Package::Unversioned(dep_precise), VersionRange::any()) + } + Dependency::Index { id, version } => Package::from_index(id, version), + }) + .collect(); + Ok(pubgrub::solver::Dependencies::Known(deps)) + } + Package::Bucket(b) => { + let deps = self + .index_deps(&b.id, version)? + .into_iter() + .map(|dep| { + let IndexDependency { id, req } = dep.as_index_dep(b.id.clone())?; + Ok(Package::from_index(id, req)) + }) + .collect::>()?; + Ok(pubgrub::solver::Dependencies::Known(deps)) + } + } + } +} + +#[derive(Debug)] +pub struct Resolution { + pub realization: Realization, + pub index_packages: HashMap>, + pub index: PackageIndex, +} + +pub fn resolve(manifest: &ManifestFile, config: Config) -> Result { + resolve_with_lock(manifest, &LockFile::default(), config) +} + +fn previously_locked(_top_level: &Package, lock: &LockFile) -> HashMap { + fn precise_to_index(p: &Precise) -> Option { + match p { + Precise::Index { id, version } => Some(IndexPrecise { + id: id.clone(), + version: version.clone(), + }), + _ => None, + } + } + + // A list of (package: Package, version of the package: SemVer, dependency: IndexPrecise) + let pkg_deps = lock + .dependencies + .values() + .filter_map(precise_to_index) + .chain( + lock.packages + .values() + .flat_map(|entry| entry.dependencies.values().filter_map(precise_to_index)), + ); + + pkg_deps + .map(|IndexPrecise { id, version }| { + let dep_bucket: BucketVersion = version.clone().into(); + ( + Package::Bucket(Bucket { + id, + version: dep_bucket, + }), + version, + ) + }) + .collect() +} + +pub fn resolve_with_lock( + manifest: &ManifestFile, + lock: &LockFile, + config: Config, +) -> Result { + // We're forcing the index's root cache directory to be the same as the one used for downloading git + // deps. In principle we could decouple them, but I'm not sure it's necessary. + let index = PackageIndex::new(config.clone()); + let mut realization = Realization::new(config); + + // TODO: this assumes that the top-level package has a path. Is there a a use-case for resolving + // packages without a top-level path? + let root_path = manifest.parent_dir.as_deref(); + for dep in manifest.dependencies.values() { + realization.realize_all(root_path.unwrap(), dep, None)?; + } + let top_level = UnversionedPrecise::Path { + path: root_path.unwrap().to_path_buf(), + }; + realization + .manifests + .insert(top_level.clone().into(), manifest.clone()); + + let top_level_pkg = Package::Unversioned(top_level); + let registry = PackageRegistry { + previously_locked: previously_locked(&top_level_pkg, lock), + index, + realized_unversioned: realization, + }; + // TODO: we could avoid this if there are no index deps + registry.index.ensure_exists()?; + + let resolution = match pubgrub::solver::resolve(®istry, top_level_pkg, SemVer::new(0, 0, 0)) + { + Ok(r) => r, + Err(pubgrub::error::PubGrubError::NoSolution(derivation_tree)) => { + //derivation_tree.collapse_no_versions(); + let msg = DefaultStringReporter::report(&derivation_tree); + return Err(Error::Resolution { msg }); + } + Err(e) => return Err(Error::Resolution { msg: e.to_string() }), + }; + let mut selected = HashMap::>::new(); + for (pkg, vers) in resolution.iter() { + if let Package::Bucket(Bucket { id, .. }) = pkg { + selected.entry(id.clone()).or_default().push(vers.clone()); + } + } + Ok(Resolution { + realization: registry.realized_unversioned, + index: registry.index, + index_packages: selected, + }) +} + +impl Resolution { + /// Finds the precise resolved version of this dependency. + /// + /// # Panics + /// + /// Panics if the dependency was not part of the dependency tree that this resolution + /// was generated for. + pub fn precise(&self, dep: &Dependency) -> Precise { + match dep { + Dependency::Git(git) => Precise::Git { + url: git.url.clone(), + id: self.realization.git[git], + path: git.path.clone(), + }, + Dependency::Path { path } => Precise::Path { + path: path.to_owned(), + }, + Dependency::Index { id, version } => Precise::Index { + id: id.clone(), + version: self.index_packages[id] + .iter() + .filter(|v| version.matches(v)) + .max() + .unwrap() + .clone(), + }, + } + } + + /// Returns all the dependencies of a package, along with their package-local names. + pub fn dependencies(&self, pkg: &Precise) -> Result, Error> { + let ret = match pkg { + Precise::Path { .. } | Precise::Git { .. } => { + let manifest = &self.realization.manifests[pkg]; + manifest + .dependencies + .iter() + .map(move |(dep_name, dep)| { + match self.realization.dependency.get(&(pkg.clone(), dep.clone())) { + Some(precise_dep) => (*dep_name, precise_dep.clone().into()), + None => { + // Since the realization contains all the unversioned deps, if we didn't + // find our dep then it must be an index dep. + (*dep_name, self.precise(dep)) + } + } + }) + .collect() + } + Precise::Index { id, version } => { + let index_pkg = self.index.package(id, version.clone())?.unwrap(); + index_pkg + .deps + .into_iter() + .map(move |(dep_name, dep)| { + let precise_dep = self.precise(&Dependency::Index { + id: dep.id.clone(), + version: dep.req.clone(), + }); + (dep_name, precise_dep) + }) + .collect() + } + }; + Ok(ret) + } + + /// Returns all the resolved packages in the dependency tree. + pub fn all_precises(&self) -> Vec { + let mut ret: Vec<_> = self + .realization + .dependency + .values() + .map(|p| p.clone().into()) + .collect(); + ret.sort(); + ret.dedup(); + + let index_precises = self.index_packages.iter().flat_map(|(id, vs)| { + vs.iter().map(|v| Precise::Index { + id: id.clone(), + version: v.clone(), + }) + }); + ret.extend(index_precises); + ret + } + + pub fn package_map(&self, manifest: &ManifestFile) -> Result { + // TODO: we can still make a package map without a root directory; we just have to disallow + // relative path dependencies + let parent_dir = manifest.parent_dir.clone().unwrap(); + let manifest_dir = normalize_path(&parent_dir).with_path(&parent_dir)?; + let config = &self.realization.config; + + let all = self.all_precises(); + + let mut packages = HashMap::new(); + for p in &all { + let p_path = p.clone().with_abs_path(&manifest_dir).local_path(config); + let root_path = &manifest_dir; + for (dep_id, dep_precise) in self.dependencies(p)? { + packages.insert( + (p_path.clone(), dep_id), + dep_precise.with_abs_path(root_path).local_path(config), + ); + } + } + + Ok(PackageMap { + // Copy over dependencies of the root, making paths absolute. + top_level: manifest + .dependencies + .iter() + .map(|(name, source)| { + ( + *name, + self.precise(source) + .with_abs_path(&manifest_dir) + .local_path(config), + ) + }) + .collect(), + + packages, + }) + } +} diff --git a/package/src/version.rs b/package/src/version.rs new file mode 100644 index 000000000..2550a5f1a --- /dev/null +++ b/package/src/version.rs @@ -0,0 +1,210 @@ +//! This module contains everything to do with version numbers. + +use std::{num::ParseIntError, str::FromStr}; + +use semver::{BuildMetadata, Prerelease}; +use serde_with::{DeserializeFromStr, SerializeDisplay}; + +/// A full semantic version, including prerelease and build metadata. +pub type FullSemVer = semver::Version; + +/// Our most-widely-used version type. +/// +/// This drops the build metadata part (which we allow during parsing but +/// ignore for all version-resolution purposes). +/// +/// Possible optimizations: +/// - shrink the numbers to `u32` +/// - intern the prerelease tag. This needs to be done in a way that preserves +/// the ordering rules, which are rather more complicated than a string comparison. +#[derive( + Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash, DeserializeFromStr, SerializeDisplay, +)] +pub struct SemVer { + pub major: u64, + pub minor: u64, + pub patch: u64, + pub pre: Prerelease, +} + +impl SemVer { + pub fn new(major: u64, minor: u64, patch: u64) -> Self { + Self { + major, + minor, + patch, + pre: Prerelease::EMPTY, + } + } + + pub fn bump_major(&self) -> SemVer { + SemVer { + major: self.major + 1, + minor: 0, + patch: 0, + pre: Prerelease::EMPTY, + } + } + + pub fn bump_minor(&self) -> SemVer { + SemVer { + major: self.major, + minor: self.minor + 1, + patch: 0, + pre: Prerelease::EMPTY, + } + } + + pub fn next_incompatible(&self) -> SemVer { + // TODO: should we panic or something if pre is non-empty? + if self.major == 0 { + self.bump_minor() + } else { + self.bump_major() + } + } +} + +impl From for SemVer { + fn from(fsv: FullSemVer) -> Self { + Self { + major: fsv.major, + minor: fsv.minor, + patch: fsv.patch, + pre: fsv.pre, + } + } +} + +impl From for FullSemVer { + fn from(sv: SemVer) -> Self { + Self { + major: sv.major, + minor: sv.minor, + patch: sv.patch, + pre: sv.pre, + build: BuildMetadata::EMPTY, + } + } +} + +// This conversion loses information on which of the fields were present. This +// information is sometimes relevant for comparing version requirements (e.g., +// "1.3.0" matches the requirement "1.2" but it doesn't match the requirement +// "1.2.0"). +impl From for SemVer { + fn from(psv: PartialSemVer) -> Self { + Self { + major: psv.major, + minor: psv.minor.unwrap_or(0), + patch: psv.patch.unwrap_or(0), + pre: Prerelease::EMPTY, + } + } +} + +#[derive(Debug, thiserror::Error)] +pub enum SemVerParseError { + #[error("build metadata is not allowed in this semver")] + Metadata, + #[error(transparent)] + Inner(#[from] semver::Error), +} + +impl FromStr for SemVer { + type Err = SemVerParseError; + + fn from_str(s: &str) -> Result { + let full = FullSemVer::from_str(s)?; + if !full.build.is_empty() { + Err(SemVerParseError::Metadata) + } else { + Ok(full.into()) + } + } +} + +impl std::fmt::Display for SemVer { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + FullSemVer::from(self.clone()).fmt(f) + } +} + +/// A partial semantic version, with no pre-release part, and optional minor and patch versions. +#[derive( + Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash, DeserializeFromStr, SerializeDisplay, +)] +pub struct PartialSemVer { + pub major: u64, + pub minor: Option, + pub patch: Option, +} + +impl PartialSemVer { + pub fn major_minor(major: u64, minor: u64) -> Self { + Self { + major, + minor: Some(minor), + patch: None, + } + } +} + +#[derive(Debug, thiserror::Error)] +pub enum PartialSemVerParseError { + #[error("empty string")] + Empty, + #[error("a semantic version can contain at most 2 dots")] + TooManyDots, + #[error("invalid number: `{0}`")] + Num(#[from] ParseIntError), +} + +impl FromStr for PartialSemVer { + type Err = PartialSemVerParseError; + + fn from_str(s: &str) -> Result { + let mut parts = s.split('.'); + let major = parts + .next() + .ok_or(PartialSemVerParseError::Empty)? + .parse()?; + let minor = parts.next().map(u64::from_str).transpose()?; + let patch = parts.next().map(u64::from_str).transpose()?; + if parts.next().is_some() { + return Err(PartialSemVerParseError::TooManyDots); + } + + Ok(Self { + major, + minor, + patch, + }) + } +} + +impl std::fmt::Display for PartialSemVer { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match (self.minor, self.patch) { + (None, _) => { + write!(f, "{}", self.major) + } + (Some(minor), None) => { + write!(f, "{}.{}", self.major, minor) + } + (Some(minor), Some(patch)) => { + write!(f, "{}.{}.{}", self.major, minor, patch) + } + } + } +} + +impl pubgrub::version::Version for SemVer { + fn lowest() -> Self { + Self::new(0, 0, 0) + } + + fn bump(&self) -> Self { + Self::new(self.major, self.minor, self.patch + 1) + } +} diff --git a/package/tests/integration/inputs/git/branch-leaf/branch.txt b/package/tests/integration/inputs/git/branch-leaf/branch.txt new file mode 100644 index 000000000..b3ffe0adb --- /dev/null +++ b/package/tests/integration/inputs/git/branch-leaf/branch.txt @@ -0,0 +1 @@ +cành diff --git a/package/tests/integration/inputs/git/branch-leaf/package.ncl b/package/tests/integration/inputs/git/branch-leaf/package.ncl new file mode 100644 index 000000000..ffd6d9537 --- /dev/null +++ b/package/tests/integration/inputs/git/branch-leaf/package.ncl @@ -0,0 +1,8 @@ +{ + name = "branch-leaf", + description = "A package with no dependencies, available at a git branch", + version = "0.1.0", + authors = ["Joe"], + nickel_version = "1.9.0", + dependencies = {}, +} | std.package.Manifest diff --git a/package/tests/integration/inputs/git/leaf/package.ncl b/package/tests/integration/inputs/git/leaf/package.ncl new file mode 100644 index 000000000..40d19dd40 --- /dev/null +++ b/package/tests/integration/inputs/git/leaf/package.ncl @@ -0,0 +1,8 @@ +{ + name = "leaf", + description = "A package with no dependencies", + version = "0.1.0", + nickel_version = "1.9.0", + authors = ["Joe"], + dependencies = {}, +} | std.package.Manifest diff --git a/package/tests/integration/inputs/git/tag-leaf/package.ncl b/package/tests/integration/inputs/git/tag-leaf/package.ncl new file mode 100644 index 000000000..ddf42627f --- /dev/null +++ b/package/tests/integration/inputs/git/tag-leaf/package.ncl @@ -0,0 +1,8 @@ +{ + name = "leaf", + description = "A package with no dependencies, available at a git tag", + version = "0.1.0", + authors = ["Joe"], + nickel_version = "1.9.0", + dependencies = {}, +} | std.package.Manifest diff --git a/package/tests/integration/inputs/git/tag-leaf/tag.txt b/package/tests/integration/inputs/git/tag-leaf/tag.txt new file mode 100644 index 000000000..04eaf200f --- /dev/null +++ b/package/tests/integration/inputs/git/tag-leaf/tag.txt @@ -0,0 +1 @@ +mytag diff --git a/package/tests/integration/inputs/git/with-subdirs/leaf-subdir/package.ncl b/package/tests/integration/inputs/git/with-subdirs/leaf-subdir/package.ncl new file mode 100644 index 000000000..96b60d19b --- /dev/null +++ b/package/tests/integration/inputs/git/with-subdirs/leaf-subdir/package.ncl @@ -0,0 +1,8 @@ +{ + name = "leaf-subdir", + description = "A package in a subdirectory of a git repo", + version = "0.1.0", + authors = ["Joe"], + nickel_version = "1.9.0", + dependencies = {}, +} | std.package.Manifest diff --git a/package/tests/integration/inputs/git/with-subdirs/package.ncl b/package/tests/integration/inputs/git/with-subdirs/package.ncl new file mode 100644 index 000000000..713024610 --- /dev/null +++ b/package/tests/integration/inputs/git/with-subdirs/package.ncl @@ -0,0 +1,8 @@ +{ + name = "with-subdirs", + description = "A package in a git repo with other packages in subdirectories", + version = "0.1.0", + authors = ["Joe"], + nickel_version = "1.9.0", + dependencies = {}, +} | std.package.Manifest diff --git a/package/tests/integration/inputs/git/with-subdirs/subdir-with-path-dep/package.ncl b/package/tests/integration/inputs/git/with-subdirs/subdir-with-path-dep/package.ncl new file mode 100644 index 000000000..ac51b07a5 --- /dev/null +++ b/package/tests/integration/inputs/git/with-subdirs/subdir-with-path-dep/package.ncl @@ -0,0 +1,11 @@ +{ + name = "subdir-with-path-dep", + description = "A package in a subdir of a git repo, depending on packages in other directories", + version = "0.1.0", + authors = ["Joe"], + nickel_version = "1.9.0", + dependencies = { + leaf = 'Path "../leaf-subdir", + parent = 'Path "..", + }, +} | std.package.Manifest diff --git a/package/tests/integration/inputs/path/git-branch-and-tag-dep/package.ncl b/package/tests/integration/inputs/path/git-branch-and-tag-dep/package.ncl new file mode 100644 index 000000000..fce282c3d --- /dev/null +++ b/package/tests/integration/inputs/path/git-branch-and-tag-dep/package.ncl @@ -0,0 +1,11 @@ +{ + name = "branch-and-tag-git-dep", + description = "A package depending on a git branch and a git tag", + version = "0.1.0", + authors = ["Joe"], + nickel_version = "1.9.0", + dependencies = { + branch = 'Git { url = "https://example.com/branch-leaf", ref = 'Branch "cành" }, + tag = 'Git { url = "https://example.com/tag-leaf", ref = 'Tag "mytag" }, + }, +} | std.package.Manifest diff --git a/package/tests/integration/inputs/path/git-path-dep/package.ncl b/package/tests/integration/inputs/path/git-path-dep/package.ncl new file mode 100644 index 000000000..a70b35cc7 --- /dev/null +++ b/package/tests/integration/inputs/path/git-path-dep/package.ncl @@ -0,0 +1,11 @@ +{ + name = "git-path-dep", + description = "A package whose deps live in subdirs of a git repo", + version = "0.1.0", + authors = ["Joe"], + nickel_version = "1.9.0", + dependencies = { + git-root = 'Git { url = "https://example.com/with-subdirs" }, + git-subdir = 'Git { url = "https://example.com/with-subdirs", path = "subdir-with-path-dep" }, + }, +} | std.package.Manifest diff --git a/package/tests/integration/inputs/path/leaf/package.ncl b/package/tests/integration/inputs/path/leaf/package.ncl new file mode 100644 index 000000000..40d19dd40 --- /dev/null +++ b/package/tests/integration/inputs/path/leaf/package.ncl @@ -0,0 +1,8 @@ +{ + name = "leaf", + description = "A package with no dependencies", + version = "0.1.0", + nickel_version = "1.9.0", + authors = ["Joe"], + dependencies = {}, +} | std.package.Manifest diff --git a/package/tests/integration/inputs/path/single-git-dep/package.ncl b/package/tests/integration/inputs/path/single-git-dep/package.ncl new file mode 100644 index 000000000..cc343ad16 --- /dev/null +++ b/package/tests/integration/inputs/path/single-git-dep/package.ncl @@ -0,0 +1,10 @@ +{ + name = "single-git-dep", + description = "A package with a git dependency", + version = "0.1.0", + authors = ["Joe"], + nickel_version = "1.9.0", + dependencies = { + leaf = 'Git { url = "https://example.com/leaf" } + }, +} | std.package.Manifest diff --git a/package/tests/integration/inputs/path/single-path-dep/package.ncl b/package/tests/integration/inputs/path/single-path-dep/package.ncl new file mode 100644 index 000000000..5e82b1c33 --- /dev/null +++ b/package/tests/integration/inputs/path/single-path-dep/package.ncl @@ -0,0 +1,10 @@ +{ + name = "single-path-dep", + description = "A package with a single path dependency", + version = "0.1.0", + authors = ["Joe"], + nickel_version = "1.9.0", + dependencies = { + leaf = 'Path "../leaf" + }, +} | std.package.Manifest diff --git a/package/tests/integration/main.rs b/package/tests/integration/main.rs new file mode 100644 index 000000000..cb1222a56 --- /dev/null +++ b/package/tests/integration/main.rs @@ -0,0 +1,119 @@ +use std::{path::Path, process::Command}; + +use nickel_lang_package::{config::Config, lock::LockFile, ManifestFile}; +use nickel_lang_utils::project_root::project_root; +use tempfile::TempDir; +use test_generator::test_resources; + +macro_rules! assert_lock_snapshot_filtered { + { $name:expr, $snapshot:expr } => { + insta::with_settings!({filters => vec![ + // Lock files contain git ids, and I haven't figured out if it's possible to + // get them consistent between runs (e.g., they include hashes of timestamps). + // So we just filter them out of the comparison. + (r#""id": "[a-z0-9]+""#, r#""id": "#) + ]}, + { + insta::assert_snapshot!($name, $snapshot); + }) + } +} + +// We'd like to test git dependencies, but it's considered bad form (and is annoying to manage) +// to nest the test git repos in our main repo. So what we do is just keep the contents of our +// test git repos in `package/tests/integration/inputs/git`. Then when we run our tests, we +// create temporary git repos for these contents, and use the source replacement mechanism +// to redirect to these temporary git repos. +// +// This function does all the git repo creation and population. We run it on every test, which is +// a bit wasteful because not every test needs every repo. Maybe we can share the set up step? +// The tests shouldn't modify the repos... +fn set_up_git_repos(config: &mut Config) -> TempDir { + let tmp = TempDir::new().unwrap(); + let git_inputs = + std::fs::read_dir(project_root().join("package/tests/integration/inputs/git")).unwrap(); + + for input in git_inputs { + let input = input.unwrap(); + let input_path = input.path(); + let file_name = input_path.file_name().unwrap(); + + let dir_path = tmp.path().join(file_name); + + let run = |cmd: &mut Command| { + assert!(cmd.output().unwrap().status.success()); + }; + + let run_in_dir = |cmd: &mut Command| { + run(cmd.current_dir(&dir_path)); + }; + + // The rust stdlib doesn't have anything for recursively copying a directory. There are + // some crates for that, but it's easier just to shell out. + run(Command::new("cp") + .arg("-r") + .arg(&input_path) + .arg(tmp.path())); + + // We have some hacky ways to test branch/tag fetching: if the input contains a tag.txt file, + // make a git tag named with the contents of that file. If the input contains a branch.txt file, + // make a git branch named with the contents of that file. + let tag = std::fs::read_to_string(dir_path.join("tag.txt")).ok(); + let branch = std::fs::read_to_string(dir_path.join("branch.txt")).ok(); + + run_in_dir(Command::new("git").arg("init")); + + if let Some(branch) = branch { + run_in_dir(Command::new("git").args(["commit", "-m", "initial", "--allow-empty"])); + run_in_dir(Command::new("git").args(["checkout", "-b", branch.trim()])); + } + + run_in_dir(Command::new("git").args(["add", "--all"])); + run_in_dir(Command::new("git").args(["commit", "-m", "initial"])); + + if let Some(tag) = tag { + run_in_dir(Command::new("git").args(["tag", tag.trim()])); + } + + let orig_url = gix::Url::try_from(format!( + "https://example.com/{}", + Path::new(file_name).display() + )) + .unwrap(); + let new_url = gix::Url::try_from(dir_path.display().to_string()).unwrap(); + config.git_replacements.insert(orig_url, new_url); + } + + tmp +} + +#[test_resources("package/tests/integration/inputs/path/**/package.ncl")] +fn generate_lock_file(path: &str) { + let full_path = project_root().join(path); + let cache_dir = TempDir::new().unwrap(); + let index_dir = TempDir::new().unwrap(); + let mut config = Config::default().with_cache_dir(cache_dir.path().to_owned()); + + let _git_dir = set_up_git_repos(&mut config); + + // Make an empty git repo as the index. + Command::new("git") + .arg("init") + .current_dir(index_dir.path()) + .output() + .unwrap(); + Command::new("git") + .args(["commit", "--allow-empty", "-m", "initial"]) + .current_dir(index_dir.path()) + .output() + .unwrap(); + config.index_url = index_dir.path().try_into().unwrap(); + + // TODO: test error messages also, and allow manifests to fail + let manifest = ManifestFile::from_path(&full_path).unwrap(); + let resolution = manifest.resolve(config).unwrap(); + let lock = LockFile::new(&manifest, &resolution).unwrap(); + let lock_contents = serde_json::to_string_pretty(&lock).unwrap(); + + assert_lock_snapshot_filtered!(path, lock_contents); +} diff --git a/package/tests/integration/snapshots/integration__package__tests__integration__inputs__path__git-branch-and-tag-dep__package.ncl.snap b/package/tests/integration/snapshots/integration__package__tests__integration__inputs__path__git-branch-and-tag-dep__package.ncl.snap new file mode 100644 index 000000000..fe8f9b1b8 --- /dev/null +++ b/package/tests/integration/snapshots/integration__package__tests__integration__inputs__path__git-branch-and-tag-dep__package.ncl.snap @@ -0,0 +1,44 @@ +--- +source: package/tests/integration/main.rs +expression: lock_contents +--- +{ + "dependencies": { + "branch": { + "Git": { + "url": "https://example.com/branch-leaf", + "id": , + "path": "" + } + }, + "tag": { + "Git": { + "url": "https://example.com/tag-leaf", + "id": , + "path": "" + } + } + }, + "packages": [ + { + "source": { + "Git": { + "url": "https://example.com/branch-leaf", + "id": , + "path": "" + } + }, + "dependencies": {} + }, + { + "source": { + "Git": { + "url": "https://example.com/tag-leaf", + "id": , + "path": "" + } + }, + "dependencies": {} + } + ] +} diff --git a/package/tests/integration/snapshots/integration__package__tests__integration__inputs__path__git-path-dep__package.ncl.snap b/package/tests/integration/snapshots/integration__package__tests__integration__inputs__path__git-path-dep__package.ncl.snap new file mode 100644 index 000000000..560d6bad4 --- /dev/null +++ b/package/tests/integration/snapshots/integration__package__tests__integration__inputs__path__git-path-dep__package.ncl.snap @@ -0,0 +1,69 @@ +--- +source: package/tests/integration/main.rs +expression: lock_contents +--- +{ + "dependencies": { + "git-root": { + "Git": { + "url": "https://example.com/with-subdirs", + "id": , + "path": "" + } + }, + "git-subdir": { + "Git": { + "url": "https://example.com/with-subdirs", + "id": , + "path": "subdir-with-path-dep" + } + } + }, + "packages": [ + { + "source": { + "Git": { + "url": "https://example.com/with-subdirs", + "id": , + "path": "" + } + }, + "dependencies": {} + }, + { + "source": { + "Git": { + "url": "https://example.com/with-subdirs", + "id": , + "path": "leaf-subdir" + } + }, + "dependencies": {} + }, + { + "source": { + "Git": { + "url": "https://example.com/with-subdirs", + "id": , + "path": "subdir-with-path-dep" + } + }, + "dependencies": { + "leaf": { + "Git": { + "url": "https://example.com/with-subdirs", + "id": , + "path": "leaf-subdir" + } + }, + "parent": { + "Git": { + "url": "https://example.com/with-subdirs", + "id": , + "path": "" + } + } + } + } + ] +} diff --git a/package/tests/integration/snapshots/integration__package__tests__integration__inputs__path__leaf__package.ncl.snap b/package/tests/integration/snapshots/integration__package__tests__integration__inputs__path__leaf__package.ncl.snap new file mode 100644 index 000000000..80b6eb754 --- /dev/null +++ b/package/tests/integration/snapshots/integration__package__tests__integration__inputs__path__leaf__package.ncl.snap @@ -0,0 +1,8 @@ +--- +source: package/tests/integration/main.rs +expression: lock_contents +--- +{ + "dependencies": {}, + "packages": [] +} diff --git a/package/tests/integration/snapshots/integration__package__tests__integration__inputs__path__single-git-dep__package.ncl.snap b/package/tests/integration/snapshots/integration__package__tests__integration__inputs__path__single-git-dep__package.ncl.snap new file mode 100644 index 000000000..006dde6d2 --- /dev/null +++ b/package/tests/integration/snapshots/integration__package__tests__integration__inputs__path__single-git-dep__package.ncl.snap @@ -0,0 +1,27 @@ +--- +source: package/tests/integration/main.rs +expression: lock_contents +--- +{ + "dependencies": { + "leaf": { + "Git": { + "url": "https://example.com/leaf", + "id": , + "path": "" + } + } + }, + "packages": [ + { + "source": { + "Git": { + "url": "https://example.com/leaf", + "id": , + "path": "" + } + }, + "dependencies": {} + } + ] +} diff --git a/package/tests/integration/snapshots/integration__package__tests__integration__inputs__path__single-path-dep__package.ncl.snap b/package/tests/integration/snapshots/integration__package__tests__integration__inputs__path__single-path-dep__package.ncl.snap new file mode 100644 index 000000000..1a225a42e --- /dev/null +++ b/package/tests/integration/snapshots/integration__package__tests__integration__inputs__path__single-path-dep__package.ncl.snap @@ -0,0 +1,23 @@ +--- +source: package/tests/integration/main.rs +expression: lock_contents +--- +{ + "dependencies": { + "leaf": { + "Path": { + "path": "../leaf" + } + } + }, + "packages": [ + { + "source": { + "Path": { + "path": "../leaf" + } + }, + "dependencies": {} + } + ] +}