Skip to content

Commit 7c82ff2

Browse files
committed
Auto merge of #12280 - weihanglo:git-ref-ambiguity, r=ehuss
fix: encode URL params correctly for SourceId in Cargo.lock We use [`form_urlencoded::byte_serialize`](https://docs.rs/form_urlencoded/1.2.0/form_urlencoded/fn.byte_serialize.html), which is re-exported by `url` crate. Tests are copied from <#11086>. Kudos to the original author!
2 parents af8643c + 273285f commit 7c82ff2

File tree

6 files changed

+363
-21
lines changed

6 files changed

+363
-21
lines changed

src/cargo/core/resolver/encode.rs

+93-11
Original file line numberDiff line numberDiff line change
@@ -196,13 +196,13 @@ impl EncodableResolve {
196196
let enc_id = EncodablePackageId {
197197
name: pkg.name.clone(),
198198
version: Some(pkg.version.clone()),
199-
source: pkg.source,
199+
source: pkg.source.clone(),
200200
};
201201

202202
if !all_pkgs.insert(enc_id.clone()) {
203203
anyhow::bail!("package `{}` is specified twice in the lockfile", pkg.name);
204204
}
205-
let id = match pkg.source.as_ref().or_else(|| path_deps.get(&pkg.name)) {
205+
let id = match pkg.source.as_deref().or_else(|| path_deps.get(&pkg.name)) {
206206
// We failed to find a local package in the workspace.
207207
// It must have been removed and should be ignored.
208208
None => {
@@ -366,7 +366,7 @@ impl EncodableResolve {
366366

367367
let mut unused_patches = Vec::new();
368368
for pkg in self.patch.unused {
369-
let id = match pkg.source.as_ref().or_else(|| path_deps.get(&pkg.name)) {
369+
let id = match pkg.source.as_deref().or_else(|| path_deps.get(&pkg.name)) {
370370
Some(&src) => PackageId::new(&pkg.name, &pkg.version, src)?,
371371
None => continue,
372372
};
@@ -488,17 +488,95 @@ impl Patch {
488488
pub struct EncodableDependency {
489489
name: String,
490490
version: String,
491-
source: Option<SourceId>,
491+
source: Option<EncodableSourceId>,
492492
checksum: Option<String>,
493493
dependencies: Option<Vec<EncodablePackageId>>,
494494
replace: Option<EncodablePackageId>,
495495
}
496496

497+
/// Pretty much equivalent to [`SourceId`] with a different serialization method.
498+
///
499+
/// The serialization for `SourceId` doesn't do URL encode for parameters.
500+
/// In contrast, this type is aware of that whenever [`ResolveVersion`] allows
501+
/// us to do so (v4 or later).
502+
///
503+
/// [`EncodableResolve`] turns into a `
504+
#[derive(Deserialize, Debug, PartialOrd, Ord, Clone)]
505+
#[serde(transparent)]
506+
pub struct EncodableSourceId {
507+
inner: SourceId,
508+
/// We don't care about the deserialization of this, as the `url` crate
509+
/// will always decode as the URL was encoded. Only when a [`Resolve`]
510+
/// turns into a [`EncodableResolve`] will it set the value accordingly
511+
/// via [`encodable_source_id`].
512+
#[serde(skip)]
513+
encoded: bool,
514+
}
515+
516+
impl EncodableSourceId {
517+
/// Creates a `EncodableSourceId` that always encodes URL params.
518+
fn new(inner: SourceId) -> Self {
519+
Self {
520+
inner,
521+
encoded: true,
522+
}
523+
}
524+
525+
/// Creates a `EncodableSourceId` that doesn't encode URL params. This is
526+
/// for backward compatibility for order lockfile version.
527+
fn without_url_encoded(inner: SourceId) -> Self {
528+
Self {
529+
inner,
530+
encoded: false,
531+
}
532+
}
533+
534+
/// Encodes the inner [`SourceId`] as a URL.
535+
fn as_url(&self) -> impl fmt::Display + '_ {
536+
if self.encoded {
537+
self.inner.as_encoded_url()
538+
} else {
539+
self.inner.as_url()
540+
}
541+
}
542+
}
543+
544+
impl std::ops::Deref for EncodableSourceId {
545+
type Target = SourceId;
546+
547+
fn deref(&self) -> &Self::Target {
548+
&self.inner
549+
}
550+
}
551+
552+
impl ser::Serialize for EncodableSourceId {
553+
fn serialize<S>(&self, s: S) -> Result<S::Ok, S::Error>
554+
where
555+
S: ser::Serializer,
556+
{
557+
s.collect_str(&self.as_url())
558+
}
559+
}
560+
561+
impl std::hash::Hash for EncodableSourceId {
562+
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
563+
self.inner.hash(state)
564+
}
565+
}
566+
567+
impl std::cmp::PartialEq for EncodableSourceId {
568+
fn eq(&self, other: &Self) -> bool {
569+
self.inner == other.inner
570+
}
571+
}
572+
573+
impl std::cmp::Eq for EncodableSourceId {}
574+
497575
#[derive(Debug, PartialOrd, Ord, PartialEq, Eq, Hash, Clone)]
498576
pub struct EncodablePackageId {
499577
name: String,
500578
version: Option<String>,
501-
source: Option<SourceId>,
579+
source: Option<EncodableSourceId>,
502580
}
503581

504582
impl fmt::Display for EncodablePackageId {
@@ -535,7 +613,8 @@ impl FromStr for EncodablePackageId {
535613
Ok(EncodablePackageId {
536614
name: name.to_string(),
537615
version: version.map(|v| v.to_string()),
538-
source: source_id,
616+
// Default to url encoded.
617+
source: source_id.map(EncodableSourceId::new),
539618
})
540619
}
541620
}
@@ -603,7 +682,7 @@ impl ser::Serialize for Resolve {
603682
.map(|id| EncodableDependency {
604683
name: id.name().to_string(),
605684
version: id.version().to_string(),
606-
source: encode_source(id.source_id()),
685+
source: encodable_source_id(id.source_id(), self.version()),
607686
dependencies: None,
608687
replace: None,
609688
checksum: if self.version() >= ResolveVersion::V2 {
@@ -676,7 +755,7 @@ fn encodable_resolve_node(
676755
EncodableDependency {
677756
name: id.name().to_string(),
678757
version: id.version().to_string(),
679-
source: encode_source(id.source_id()),
758+
source: encodable_source_id(id.source_id(), resolve.version()),
680759
dependencies: deps,
681760
replace,
682761
checksum: if resolve.version() >= ResolveVersion::V2 {
@@ -702,7 +781,7 @@ pub fn encodable_package_id(
702781
}
703782
}
704783
}
705-
let mut source = encode_source(id_to_encode).map(|s| s.with_precise(None));
784+
let mut source = encodable_source_id(id_to_encode.with_precise(None), resolve_version);
706785
if let Some(counts) = &state.counts {
707786
let version_counts = &counts[&id.name()];
708787
if version_counts[&id.version()] == 1 {
@@ -719,10 +798,13 @@ pub fn encodable_package_id(
719798
}
720799
}
721800

722-
fn encode_source(id: SourceId) -> Option<SourceId> {
801+
fn encodable_source_id(id: SourceId, version: ResolveVersion) -> Option<EncodableSourceId> {
723802
if id.is_path() {
724803
None
725804
} else {
726-
Some(id)
805+
Some(match version {
806+
ResolveVersion::V4 => EncodableSourceId::new(id),
807+
_ => EncodableSourceId::without_url_encoded(id),
808+
})
727809
}
728810
}

src/cargo/core/resolver/resolve.rs

+2
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,8 @@ pub enum ResolveVersion {
8383
/// Unstable. Will collect a certain amount of changes and then go.
8484
///
8585
/// Changes made:
86+
///
87+
/// * SourceId URL serialization is aware of URL encoding.
8688
V4,
8789
}
8890

src/cargo/core/source/source_id.rs

+64-8
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,15 @@ impl SourceId {
195195
pub fn as_url(&self) -> SourceIdAsUrl<'_> {
196196
SourceIdAsUrl {
197197
inner: &*self.inner,
198+
encoded: false,
199+
}
200+
}
201+
202+
/// Like [`Self::as_url`] but with URL parameters encoded.
203+
pub fn as_encoded_url(&self) -> SourceIdAsUrl<'_> {
204+
SourceIdAsUrl {
205+
inner: &*self.inner,
206+
encoded: true,
198207
}
199208
}
200209

@@ -566,7 +575,10 @@ impl fmt::Display for SourceId {
566575
// Don't replace the URL display for git references,
567576
// because those are kind of expected to be URLs.
568577
write!(f, "{}", self.inner.url)?;
569-
if let Some(pretty) = reference.pretty_ref() {
578+
// TODO(-Znext-lockfile-bump): set it to true when stabilizing
579+
// lockfile v4, because we want Source ID serialization to be
580+
// consistent with lockfile.
581+
if let Some(pretty) = reference.pretty_ref(false) {
570582
write!(f, "?{}", pretty)?;
571583
}
572584

@@ -714,6 +726,7 @@ impl Ord for SourceKind {
714726
/// A `Display`able view into a `SourceId` that will write it as a url
715727
pub struct SourceIdAsUrl<'a> {
716728
inner: &'a SourceIdInner,
729+
encoded: bool,
717730
}
718731

719732
impl<'a> fmt::Display for SourceIdAsUrl<'a> {
@@ -731,7 +744,7 @@ impl<'a> fmt::Display for SourceIdAsUrl<'a> {
731744
..
732745
} => {
733746
write!(f, "git+{}", url)?;
734-
if let Some(pretty) = reference.pretty_ref() {
747+
if let Some(pretty) = reference.pretty_ref(self.encoded) {
735748
write!(f, "?{}", pretty)?;
736749
}
737750
if let Some(precise) = precise.as_ref() {
@@ -771,27 +784,49 @@ impl<'a> fmt::Display for SourceIdAsUrl<'a> {
771784
impl GitReference {
772785
/// Returns a `Display`able view of this git reference, or None if using
773786
/// the head of the default branch
774-
pub fn pretty_ref(&self) -> Option<PrettyRef<'_>> {
787+
pub fn pretty_ref(&self, url_encoded: bool) -> Option<PrettyRef<'_>> {
775788
match self {
776789
GitReference::DefaultBranch => None,
777-
_ => Some(PrettyRef { inner: self }),
790+
_ => Some(PrettyRef {
791+
inner: self,
792+
url_encoded,
793+
}),
778794
}
779795
}
780796
}
781797

782798
/// A git reference that can be `Display`ed
783799
pub struct PrettyRef<'a> {
784800
inner: &'a GitReference,
801+
url_encoded: bool,
785802
}
786803

787804
impl<'a> fmt::Display for PrettyRef<'a> {
788805
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
789-
match *self.inner {
790-
GitReference::Branch(ref b) => write!(f, "branch={}", b),
791-
GitReference::Tag(ref s) => write!(f, "tag={}", s),
792-
GitReference::Rev(ref s) => write!(f, "rev={}", s),
806+
let value: &str;
807+
match self.inner {
808+
GitReference::Branch(s) => {
809+
write!(f, "branch=")?;
810+
value = s;
811+
}
812+
GitReference::Tag(s) => {
813+
write!(f, "tag=")?;
814+
value = s;
815+
}
816+
GitReference::Rev(s) => {
817+
write!(f, "rev=")?;
818+
value = s;
819+
}
793820
GitReference::DefaultBranch => unreachable!(),
794821
}
822+
if self.url_encoded {
823+
for value in url::form_urlencoded::byte_serialize(value.as_bytes()) {
824+
write!(f, "{value}")?;
825+
}
826+
} else {
827+
write!(f, "{value}")?;
828+
}
829+
Ok(())
795830
}
796831
}
797832

@@ -905,6 +940,27 @@ mod tests {
905940
assert_eq!(formatted, "sparse+https://my-crates.io/");
906941
assert_eq!(source_id, deserialized);
907942
}
943+
944+
#[test]
945+
fn gitrefs_roundtrip() {
946+
let base = "https://host/path".into_url().unwrap();
947+
let branch = GitReference::Branch("*-._+20%30 Z/z#foo=bar&zap[]?to\\()'\"".to_string());
948+
let s1 = SourceId::for_git(&base, branch).unwrap();
949+
let ser1 = format!("{}", s1.as_encoded_url());
950+
let s2 = SourceId::from_url(&ser1).expect("Failed to deserialize");
951+
let ser2 = format!("{}", s2.as_encoded_url());
952+
// Serializing twice should yield the same result
953+
assert_eq!(ser1, ser2, "Serialized forms don't match");
954+
// SourceId serializing the same should have the same semantics
955+
// This used to not be the case (# was ambiguous)
956+
assert_eq!(s1, s2, "SourceId doesn't round-trip");
957+
// Freeze the format to match an x-www-form-urlencoded query string
958+
// https://url.spec.whatwg.org/#application/x-www-form-urlencoded
959+
assert_eq!(
960+
ser1,
961+
"git+https://host/path?branch=*-._%2B20%2530+Z%2Fz%23foo%3Dbar%26zap%5B%5D%3Fto%5C%28%29%27%22"
962+
);
963+
}
908964
}
909965

910966
/// Check if `url` equals to the overridden crates.io URL.

src/cargo/sources/git/source.rs

+4-1
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,10 @@ impl<'cfg> Debug for GitSource<'cfg> {
164164
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
165165
write!(f, "git repo at {}", self.remote.url())?;
166166

167-
match self.manifest_reference.pretty_ref() {
167+
// TODO(-Znext-lockfile-bump): set it to true when stabilizing
168+
// lockfile v4, because we want Source ID serialization to be
169+
// consistent with lockfile.
170+
match self.manifest_reference.pretty_ref(false) {
168171
Some(s) => write!(f, " ({})", s),
169172
None => Ok(()),
170173
}

src/cargo/util/toml_mut/dependency.rs

+5-1
Original file line numberDiff line numberDiff line change
@@ -881,7 +881,11 @@ impl GitSource {
881881
impl std::fmt::Display for GitSource {
882882
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
883883
let git_ref = self.git_ref();
884-
if let Some(pretty_ref) = git_ref.pretty_ref() {
884+
885+
// TODO(-Znext-lockfile-bump): set it to true when stabilizing
886+
// lockfile v4, because we want Source ID serialization to be
887+
// consistent with lockfile.
888+
if let Some(pretty_ref) = git_ref.pretty_ref(false) {
885889
write!(f, "{}?{}", self.git, pretty_ref)
886890
} else {
887891
write!(f, "{}", self.git)

0 commit comments

Comments
 (0)