Skip to content

Commit

Permalink
feat: parse url and path as matchspec (#704)
Browse files Browse the repository at this point in the history
Fixes #640

---------

Co-authored-by: Bas Zalmstra <[email protected]>
  • Loading branch information
ruben-arts and baszalmstra authored Jun 4, 2024
1 parent ab1daa3 commit aa71062
Show file tree
Hide file tree
Showing 10 changed files with 201 additions and 85 deletions.
1 change: 1 addition & 0 deletions crates/file_url/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ url = { workspace = true }
percent-encoding = { workspace = true }
itertools = { workspace = true }
typed-path = { workspace = true }
thiserror = "1.0.61"

[dev-dependencies]
rstest = { workspace = true }
25 changes: 16 additions & 9 deletions crates/file_url/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use percent_encoding::{percent_decode, percent_encode, AsciiSet, CONTROLS};
use std::fmt::Write;
use std::path::PathBuf;
use std::str::FromStr;
use thiserror::Error;
use typed_path::{
Utf8TypedComponent, Utf8TypedPath, Utf8UnixComponent, Utf8WindowsComponent, Utf8WindowsPrefix,
};
Expand Down Expand Up @@ -97,7 +98,7 @@ fn starts_with_windows_drive_letter(s: &str) -> bool {
&& (s.len() == 2 || matches!(s.as_bytes()[2], b'/' | b'\\' | b'?' | b'#'))
}

fn path_to_url<'a>(path: impl Into<Utf8TypedPath<'a>>) -> Result<String, NotAnAbsolutePath> {
fn path_to_url<'a>(path: impl Into<Utf8TypedPath<'a>>) -> Result<String, FileURLParseError> {
let path = path.into();
let mut components = path.components();

Expand All @@ -114,15 +115,16 @@ fn path_to_url<'a>(path: impl Into<Utf8TypedPath<'a>>) -> Result<String, NotAnAb
}
Utf8WindowsPrefix::UNC(server, share)
| Utf8WindowsPrefix::VerbatimUNC(server, share) => {
let host = Host::parse(server).map_err(|_err| NotAnAbsolutePath)?;
let host =
Host::parse(server).map_err(|_err| FileURLParseError::NotAnAbsolutePath)?;
write!(result, "{host}").unwrap();
result.push('/');
result.extend(percent_encode(share.as_bytes(), PATH_SEGMENT));
}
_ => return Err(NotAnAbsolutePath),
_ => return Err(FileURLParseError::NotAnAbsolutePath),
},
Some(Utf8TypedComponent::Unix(Utf8UnixComponent::RootDir)) => {}
_ => return Err(NotAnAbsolutePath),
_ => return Err(FileURLParseError::NotAnAbsolutePath),
}

let mut path_only_has_prefix = true;
Expand Down Expand Up @@ -153,17 +155,22 @@ fn path_to_url<'a>(path: impl Into<Utf8TypedPath<'a>>) -> Result<String, NotAnAb
Ok(result)
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct NotAnAbsolutePath;
#[derive(Debug, Error)]
pub enum FileURLParseError {
#[error("The path is not an absolute path")]
NotAnAbsolutePath,

pub fn file_path_to_url<'a>(path: impl Into<Utf8TypedPath<'a>>) -> Result<Url, NotAnAbsolutePath> {
#[error("The URL string is invalid")]
InvalidUrl(#[from] url::ParseError),
}
pub fn file_path_to_url<'a>(path: impl Into<Utf8TypedPath<'a>>) -> Result<Url, FileURLParseError> {
let url = path_to_url(path)?;
Ok(Url::from_str(&url).expect("url string must be a valid url"))
Url::from_str(&url).map_err(FileURLParseError::InvalidUrl)
}

pub fn directory_path_to_url<'a>(
path: impl Into<Utf8TypedPath<'a>>,
) -> Result<Url, NotAnAbsolutePath> {
) -> Result<Url, FileURLParseError> {
let mut url = path_to_url(path)?;
if !url.ends_with('/') {
url.push('/');
Expand Down
50 changes: 2 additions & 48 deletions crates/rattler_conda_types/src/channel/mod.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
use itertools::Itertools;
use std::borrow::Cow;
use std::fmt::{Display, Formatter};
use std::path::{Path, PathBuf};
use std::str::FromStr;

use crate::utils::path::is_path;
use crate::utils::url::parse_scheme;
use file_url::directory_path_to_url;
use serde::{Deserialize, Serialize};
use thiserror::Error;
Expand Down Expand Up @@ -381,53 +382,6 @@ pub(crate) const fn default_platforms() -> &'static [Platform] {
&CURRENT_PLATFORMS
}

/// Parses the schema part of the human-readable channel. Returns the scheme part if it exists.
fn parse_scheme(channel: &str) -> Option<&str> {
let scheme_end = channel.find("://")?;

// Scheme part is too long
if scheme_end > 11 {
return None;
}

let scheme_part = &channel[0..scheme_end];
let mut scheme_chars = scheme_part.chars();

// First character must be alphabetic
if scheme_chars.next().map(char::is_alphabetic) != Some(true) {
return None;
}

// The rest must be alpha-numeric
if scheme_chars.all(char::is_alphanumeric) {
Some(scheme_part)
} else {
None
}
}

/// Returns true if the specified string is considered to be a path
fn is_path(path: &str) -> bool {
if path.contains("://") {
return false;
}

// Check if the path starts with a common path prefix
if path.starts_with("./")
|| path.starts_with("..")
|| path.starts_with('~')
|| path.starts_with('/')
|| path.starts_with("\\\\")
|| path.starts_with("//")
{
return true;
}

// A drive letter followed by a colon and a (backward or forward) slash
matches!(path.chars().take(3).collect_tuple(),
Some((letter, ':', '/' | '\\')) if letter.is_alphabetic())
}

/// Returns the specified path as an absolute path
fn absolute_path(path: &str, root_dir: &Path) -> Result<Utf8TypedPathBuf, ParseChannelError> {
let path = Utf8TypedPath::from(path);
Expand Down
8 changes: 8 additions & 0 deletions crates/rattler_conda_types/src/match_spec/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use serde_with::{serde_as, skip_serializing_none, DisplayFromStr};
use std::fmt::{Debug, Display, Formatter};
use std::hash::Hash;
use std::sync::Arc;
use url::Url;

use crate::Channel;
use crate::ChannelConfig;
Expand Down Expand Up @@ -142,6 +143,8 @@ pub struct MatchSpec {
/// The sha256 hash of the package
#[serde_as(as = "Option<SerializableHash::<rattler_digest::Sha256>>")]
pub sha256: Option<Sha256Hash>,
/// The url of the package
pub url: Option<Url>,
}

impl Display for MatchSpec {
Expand Down Expand Up @@ -248,6 +251,7 @@ impl MatchSpec {
namespace: self.namespace,
md5: self.md5,
sha256: self.sha256,
url: self.url,
},
)
}
Expand Down Expand Up @@ -292,6 +296,8 @@ pub struct NamelessMatchSpec {
/// The sha256 hash of the package
#[serde_as(as = "Option<SerializableHash::<rattler_digest::Sha256>>")]
pub sha256: Option<Sha256Hash>,
/// The url of the package
pub url: Option<Url>,
}

impl NamelessMatchSpec {
Expand Down Expand Up @@ -366,6 +372,7 @@ impl From<MatchSpec> for NamelessMatchSpec {
namespace: spec.namespace,
md5: spec.md5,
sha256: spec.sha256,
url: spec.url,
}
}
}
Expand All @@ -384,6 +391,7 @@ impl MatchSpec {
namespace: spec.namespace,
md5: spec.md5,
sha256: spec.sha256,
url: spec.url,
}
}
}
Expand Down
126 changes: 101 additions & 25 deletions crates/rattler_conda_types/src/match_spec/parse.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::{borrow::Cow, path::PathBuf, str::FromStr};
use std::{borrow::Cow, str::FromStr};

use nom::{
branch::alt,
Expand All @@ -13,31 +13,41 @@ use nom::{
use rattler_digest::{parse_digest_from_hex, Md5, Sha256};
use smallvec::SmallVec;
use thiserror::Error;
use typed_path::Utf8TypedPath;
use url::Url;

use super::{
matcher::{StringMatcher, StringMatcherParseError},
MatchSpec,
};
use crate::utils::url::parse_scheme;
use crate::{
build_spec::{BuildNumberSpec, ParseBuildNumberSpecError},
package::ArchiveType,
utils::path::is_path,
version_spec::{
is_start_of_version_constraint,
version_tree::{recognize_constraint, recognize_version},
ParseVersionSpecError,
},
Channel, ChannelConfig, InvalidPackageNameError, NamelessMatchSpec, PackageName,
ParseChannelError, ParseStrictness, VersionSpec,
ParseChannelError, ParseStrictness, ParseVersionError, VersionSpec,
};

/// The type of parse error that occurred when parsing match spec.
#[derive(Debug, Clone, Error)]
#[derive(Debug, Clone, Error, PartialEq)]
pub enum ParseMatchSpecError {
/// The path or url of the package was invalid
#[error("invalid package path or url")]
InvalidPackagePathOrUrl,

/// Invalid package spec url
#[error("invalid package spec url")]
InvalidPackageUrl(#[from] url::ParseError),

/// Invalid version in path or url
#[error(transparent)]
InvalidPackagePathOrUrlVersion(#[from] ParseVersionError),

/// Invalid bracket in match spec
#[error("invalid bracket")]
InvalidBracket,
Expand Down Expand Up @@ -124,11 +134,6 @@ fn strip_if(input: &str) -> (&str, Option<&str>) {
(input, None)
}

/// Returns true if the specified string represents a package path.
fn is_package_file(input: &str) -> bool {
ArchiveType::try_from(input).is_some()
}

/// An optimized data structure to store key value pairs in between a bracket
/// string `[key1=value1, key2=value2]`. The optimization stores two such values
/// on the stack and otherwise allocates a vector on the heap. Two is chosen
Expand Down Expand Up @@ -380,22 +385,23 @@ fn matchspec_parser(
let (input, _comment) = strip_comment(input);
let (input, _if_clause) = strip_if(input);

// 2. Is the spec a tarball?
if is_package_file(input) {
let _url = match Url::parse(input) {
Ok(url) => url,
#[cfg(target_arch = "wasm32")]
Err(_) => return Err(ParseMatchSpecError::InvalidPackagePathOrUrl),
#[cfg(not(target_arch = "wasm32"))]
Err(_) => match PathBuf::from_str(input) {
Ok(path) => Url::from_file_path(path)
.map_err(|_err| ParseMatchSpecError::InvalidPackagePathOrUrl)?,
Err(_) => return Err(ParseMatchSpecError::InvalidPackagePathOrUrl),
},
};

// TODO: Implementing package file specs
unimplemented!()
// 2.a Is the spec an url, parse it as an url
if parse_scheme(input).is_some() {
let url = Url::parse(input)?;
return Ok(MatchSpec {
url: Some(url),
..MatchSpec::default()
});
}
// 2.b Is the spec a path, parse it as an url
if is_path(input) {
let path = Utf8TypedPath::from(input);
let url = file_url::file_path_to_url(path)
.map_err(|_error| ParseMatchSpecError::InvalidPackagePathOrUrl)?;
return Ok(MatchSpec {
url: Some(url),
..MatchSpec::default()
});
}

// 3. Strip off brackets portion
Expand Down Expand Up @@ -507,6 +513,7 @@ mod tests {
use rattler_digest::{parse_digest_from_hex, Md5, Sha256};
use serde::Serialize;
use smallvec::smallvec;
use url::Url;

use super::{
split_version_and_build, strip_brackets, strip_package_name, BracketVec, MatchSpec,
Expand Down Expand Up @@ -757,11 +764,17 @@ mod tests {
// Please keep this list sorted.
let specs = [
"blas *.* mkl",
"C:\\Users\\user\\conda-bld\\linux-64\\foo-1.0-py27_0.tar.bz2",
"foo=1.0=py27_0",
"foo==1.0=py27_0",
"https://conda.anaconda.org/conda-forge/linux-64/py-rattler-0.6.1-py39h8169da8_0.conda",
"https://repo.prefix.dev/ruben-arts/linux-64/boost-cpp-1.78.0-h75c5d50_1.tar.bz2",
"python 3.8.* *_cpython",
"pytorch=*=cuda*",
"x264 >=1!164.3095,<1!165",
"/home/user/conda-bld/linux-64/foo-1.0-py27_0.tar.bz2",
"conda-forge::foo[version=1.0.*]",
"conda-forge::foo[version=1.0.*, build_number=\">6\"]",
];

let evaluated: BTreeMap<_, _> = specs
Expand Down Expand Up @@ -820,4 +833,67 @@ mod tests {
let spec = MatchSpec::from_str("conda-forge::foo", Strict).unwrap();
assert!(spec.namespace.is_none());
}

#[test]
fn test_parsing_url() {
let spec = MatchSpec::from_str(
"https://conda.anaconda.org/conda-forge/linux-64/py-rattler-0.6.1-py39h8169da8_0.conda",
Strict,
)
.unwrap();

assert_eq!(spec.url, Some(Url::parse("https://conda.anaconda.org/conda-forge/linux-64/py-rattler-0.6.1-py39h8169da8_0.conda").unwrap()));
}

#[test]
fn test_parsing_path() {
let spec = MatchSpec::from_str(
"C:\\Users\\user\\conda-bld\\linux-64\\foo-1.0-py27_0.tar.bz2",
Strict,
)
.unwrap();
assert_eq!(
spec.url,
Some(
Url::parse("file://C:/Users/user/conda-bld/linux-64/foo-1.0-py27_0.tar.bz2")
.unwrap()
)
);

let spec = MatchSpec::from_str(
"/home/user/conda-bld/linux-64/foo-1.0-py27_0.tar.bz2",
Strict,
)
.unwrap();

assert_eq!(
spec.url,
Some(Url::parse("file:/home/user/conda-bld/linux-64/foo-1.0-py27_0.tar.bz2").unwrap())
);

let spec = MatchSpec::from_str("C:\\Users\\user\\Downloads\\package", Strict).unwrap();
assert_eq!(
spec.url,
Some(Url::parse("file://C:/Users/user/Downloads/package").unwrap())
);
let spec = MatchSpec::from_str("/home/user/Downloads/package", Strict).unwrap();

assert_eq!(
spec.url,
Some(Url::parse("file:/home/user/Downloads/package").unwrap())
);
}

#[test]
fn test_non_happy_url_parsing() {
let err = MatchSpec::from_str("http://username@", Strict).expect_err("Invalid url");
assert_eq!(err.to_string(), "invalid package spec url");

let err = MatchSpec::from_str("bla/bla", Strict)
.expect_err("Should try to parse as name not url");
assert_eq!(err.to_string(), "'bla/bla' is not a valid package name. Package names can only contain 0-9, a-z, A-Z, -, _, or .");

let err = MatchSpec::from_str("./test/file", Strict).expect_err("Invalid url");
assert_eq!(err.to_string(), "invalid package path or url");
}
}
Loading

0 comments on commit aa71062

Please sign in to comment.