Skip to content

Commit 7442c14

Browse files
committed
Auto merge of #9161 - ehuss:index-v-features2, r=alexcrichton
Add schema field and `features2` to the index. This adds a `v` field to the index which indicates a format version for an index entry. If Cargo encounters a version newer than it understands, it will ignore those entries. This makes it safer to make changes to the index entries (such as adding new things), and not need to worry about how older cargos will react to it. In particular, this will make it safer to run `cargo update` on older versions if we ever decide to add new things to the index. Currently this is not written anywhere, and is intended as a safety guard for the future. For now I will leave it undocumented until we actually decide to start using it. This also moves the new syntax for namespaced features and weak dependency features into a new field ("features2") in the index. This is necessary to avoid breaking Cargo versions older than 1.19, which fail to parse the index even if there is a Cargo.lock file. It is intended that only crates.io will bother with creating this field. Other registries don't need to bother, since they generally don't support Cargo older than 1.19. I'm uncertain exactly when we should try to update crates.io to start accepting this, as that is a somewhat permanent decision.
2 parents 51a437d + f258569 commit 7442c14

File tree

11 files changed

+985
-19
lines changed

11 files changed

+985
-19
lines changed

crates/cargo-test-support/src/registry.rs

Lines changed: 46 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use cargo::sources::CRATES_IO_INDEX;
44
use cargo::util::Sha256;
55
use flate2::write::GzEncoder;
66
use flate2::Compression;
7-
use std::collections::HashMap;
7+
use std::collections::BTreeMap;
88
use std::fmt::Write as _;
99
use std::fs::{self, File};
1010
use std::io::{BufRead, BufReader, Write};
@@ -319,16 +319,19 @@ pub struct Package {
319319
deps: Vec<Dependency>,
320320
files: Vec<PackageFile>,
321321
yanked: bool,
322-
features: HashMap<String, Vec<String>>,
322+
features: FeatureMap,
323323
local: bool,
324324
alternative: bool,
325325
invalid_json: bool,
326326
proc_macro: bool,
327327
links: Option<String>,
328328
rust_version: Option<String>,
329329
cargo_features: Vec<String>,
330+
v: Option<u32>,
330331
}
331332

333+
type FeatureMap = BTreeMap<String, Vec<String>>;
334+
332335
#[derive(Clone)]
333336
pub struct Dependency {
334337
name: String,
@@ -393,14 +396,15 @@ impl Package {
393396
deps: Vec::new(),
394397
files: Vec::new(),
395398
yanked: false,
396-
features: HashMap::new(),
399+
features: BTreeMap::new(),
397400
local: false,
398401
alternative: false,
399402
invalid_json: false,
400403
proc_macro: false,
401404
links: None,
402405
rust_version: None,
403406
cargo_features: Vec::new(),
407+
v: None,
404408
}
405409
}
406410

@@ -554,6 +558,14 @@ impl Package {
554558
self
555559
}
556560

561+
/// Sets the index schema version for this package.
562+
///
563+
/// See [`cargo::sources::registry::RegistryPackage`] for more information.
564+
pub fn schema_version(&mut self, version: u32) -> &mut Package {
565+
self.v = Some(version);
566+
self
567+
}
568+
557569
/// Creates the package and place it in the registry.
558570
///
559571
/// This does not actually use Cargo's publishing system, but instead
@@ -599,16 +611,25 @@ impl Package {
599611
} else {
600612
serde_json::json!(self.name)
601613
};
602-
let line = serde_json::json!({
614+
// This emulates what crates.io may do in the future.
615+
let (features, features2) = split_index_features(self.features.clone());
616+
let mut json = serde_json::json!({
603617
"name": name,
604618
"vers": self.vers,
605619
"deps": deps,
606620
"cksum": cksum,
607-
"features": self.features,
621+
"features": features,
608622
"yanked": self.yanked,
609623
"links": self.links,
610-
})
611-
.to_string();
624+
});
625+
if let Some(f2) = &features2 {
626+
json["features2"] = serde_json::json!(f2);
627+
json["v"] = serde_json::json!(2);
628+
}
629+
if let Some(v) = self.v {
630+
json["v"] = serde_json::json!(v);
631+
}
632+
let line = json.to_string();
612633

613634
let file = match self.name.len() {
614635
1 => format!("1/{}", self.name),
@@ -837,3 +858,21 @@ impl Dependency {
837858
self
838859
}
839860
}
861+
862+
fn split_index_features(mut features: FeatureMap) -> (FeatureMap, Option<FeatureMap>) {
863+
let mut features2 = FeatureMap::new();
864+
for (feat, values) in features.iter_mut() {
865+
if values
866+
.iter()
867+
.any(|value| value.starts_with("dep:") || value.contains("?/"))
868+
{
869+
let new_values = values.drain(..).collect();
870+
features2.insert(feat.clone(), new_values);
871+
}
872+
}
873+
if features2.is_empty() {
874+
(features, None)
875+
} else {
876+
(features, Some(features2))
877+
}
878+
}

crates/crates-io/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ pub struct NewCrate {
5656
pub repository: Option<String>,
5757
pub badges: BTreeMap<String, BTreeMap<String, String>>,
5858
pub links: Option<String>,
59+
#[serde(skip_serializing_if = "Option::is_none")]
60+
pub v: Option<u32>,
5961
}
6062

6163
#[derive(Serialize)]

src/cargo/core/summary.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@ impl Summary {
3737
features: &BTreeMap<InternedString, Vec<InternedString>>,
3838
links: Option<impl Into<InternedString>>,
3939
) -> CargoResult<Summary> {
40+
// ****CAUTION**** If you change anything here than may raise a new
41+
// error, be sure to coordinate that change with either the index
42+
// schema field or the SummariesCache version.
4043
let mut has_overlapping_features = None;
4144
for dep in dependencies.iter() {
4245
let dep_name = dep.name_in_toml();

src/cargo/ops/registry.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,7 @@ fn transmit(
305305
license_file: license_file.clone(),
306306
badges: badges.clone(),
307307
links: links.clone(),
308+
v: None,
308309
},
309310
tarball,
310311
);

src/cargo/sources/registry/index.rs

Lines changed: 83 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -68,13 +68,15 @@
6868
6969
use crate::core::dependency::Dependency;
7070
use crate::core::{PackageId, SourceId, Summary};
71-
use crate::sources::registry::{RegistryData, RegistryPackage};
71+
use crate::sources::registry::{RegistryData, RegistryPackage, INDEX_V_MAX};
7272
use crate::util::interning::InternedString;
7373
use crate::util::paths;
7474
use crate::util::{internal, CargoResult, Config, Filesystem, ToSemver};
75-
use log::info;
75+
use anyhow::bail;
76+
use log::{debug, info};
7677
use semver::{Version, VersionReq};
7778
use std::collections::{HashMap, HashSet};
79+
use std::convert::TryInto;
7880
use std::fs;
7981
use std::path::Path;
8082
use std::str;
@@ -233,6 +235,8 @@ enum MaybeIndexSummary {
233235
pub struct IndexSummary {
234236
pub summary: Summary,
235237
pub yanked: bool,
238+
/// Schema version, see [`RegistryPackage`].
239+
v: u32,
236240
}
237241

238242
/// A representation of the cache on disk that Cargo maintains of summaries.
@@ -305,6 +309,11 @@ impl<'cfg> RegistryIndex<'cfg> {
305309
// minimize the amount of work being done here and parse as little as
306310
// necessary.
307311
let raw_data = &summaries.raw_data;
312+
let max_version = if namespaced_features || weak_dep_features {
313+
INDEX_V_MAX
314+
} else {
315+
1
316+
};
308317
Ok(summaries
309318
.versions
310319
.iter_mut()
@@ -318,6 +327,19 @@ impl<'cfg> RegistryIndex<'cfg> {
318327
}
319328
},
320329
)
330+
.filter(move |is| {
331+
if is.v > max_version {
332+
debug!(
333+
"unsupported schema version {} ({} {})",
334+
is.v,
335+
is.summary.name(),
336+
is.summary.version()
337+
);
338+
false
339+
} else {
340+
true
341+
}
342+
})
321343
.filter(move |is| {
322344
is.summary
323345
.unstable_gate(namespaced_features, weak_dep_features)
@@ -550,6 +572,14 @@ impl Summaries {
550572
let summary = match IndexSummary::parse(config, line, source_id) {
551573
Ok(summary) => summary,
552574
Err(e) => {
575+
// This should only happen when there is an index
576+
// entry from a future version of cargo that this
577+
// version doesn't understand. Hopefully, those future
578+
// versions of cargo correctly set INDEX_V_MAX and
579+
// CURRENT_CACHE_VERSION, otherwise this will skip
580+
// entries in the cache preventing those newer
581+
// versions from reading them (that is, until the
582+
// cache is rebuilt).
553583
log::info!("failed to parse {:?} registry package: {}", relative, e);
554584
continue;
555585
}
@@ -578,7 +608,14 @@ impl Summaries {
578608
// actually happens to verify that our cache is indeed fresh and
579609
// computes exactly the same value as before.
580610
if cfg!(debug_assertions) && cache_contents.is_some() {
581-
assert_eq!(cache_bytes, cache_contents);
611+
if cache_bytes != cache_contents {
612+
panic!(
613+
"original cache contents:\n{:?}\n\
614+
does not equal new cache contents:\n{:?}\n",
615+
cache_contents.as_ref().map(|s| String::from_utf8_lossy(s)),
616+
cache_bytes.as_ref().map(|s| String::from_utf8_lossy(s)),
617+
);
618+
}
582619
}
583620

584621
// Once we have our `cache_bytes` which represents the `Summaries` we're
@@ -630,9 +667,9 @@ impl Summaries {
630667
// Implementation of serializing/deserializing the cache of summaries on disk.
631668
// Currently the format looks like:
632669
//
633-
// +--------------+-------------+---+
634-
// | version byte | git sha rev | 0 |
635-
// +--------------+-------------+---+
670+
// +--------------------+----------------------+-------------+---+
671+
// | cache version byte | index format version | git sha rev | 0 |
672+
// +--------------------+----------------------+-------------+---+
636673
//
637674
// followed by...
638675
//
@@ -649,8 +686,14 @@ impl Summaries {
649686
// versions of Cargo share the same cache they don't get too confused. The git
650687
// sha lets us know when the file needs to be regenerated (it needs regeneration
651688
// whenever the index itself updates).
689+
//
690+
// Cache versions:
691+
// * `1`: The original version.
692+
// * `2`: Added the "index format version" field so that if the index format
693+
// changes, different versions of cargo won't get confused reading each
694+
// other's caches.
652695

653-
const CURRENT_CACHE_VERSION: u8 = 1;
696+
const CURRENT_CACHE_VERSION: u8 = 2;
654697

655698
impl<'a> SummariesCache<'a> {
656699
fn parse(data: &'a [u8], last_index_update: &str) -> CargoResult<SummariesCache<'a>> {
@@ -659,19 +702,32 @@ impl<'a> SummariesCache<'a> {
659702
.split_first()
660703
.ok_or_else(|| anyhow::format_err!("malformed cache"))?;
661704
if *first_byte != CURRENT_CACHE_VERSION {
662-
anyhow::bail!("looks like a different Cargo's cache, bailing out");
705+
bail!("looks like a different Cargo's cache, bailing out");
706+
}
707+
let index_v_bytes = rest
708+
.get(..4)
709+
.ok_or_else(|| anyhow::anyhow!("cache expected 4 bytes for index version"))?;
710+
let index_v = u32::from_le_bytes(index_v_bytes.try_into().unwrap());
711+
if index_v != INDEX_V_MAX {
712+
bail!(
713+
"index format version {} doesn't match the version I know ({})",
714+
index_v,
715+
INDEX_V_MAX
716+
);
663717
}
718+
let rest = &rest[4..];
719+
664720
let mut iter = split(rest, 0);
665721
if let Some(update) = iter.next() {
666722
if update != last_index_update.as_bytes() {
667-
anyhow::bail!(
723+
bail!(
668724
"cache out of date: current index ({}) != cache ({})",
669725
last_index_update,
670726
str::from_utf8(update)?,
671727
)
672728
}
673729
} else {
674-
anyhow::bail!("malformed file");
730+
bail!("malformed file");
675731
}
676732
let mut ret = SummariesCache::default();
677733
while let Some(version) = iter.next() {
@@ -692,6 +748,7 @@ impl<'a> SummariesCache<'a> {
692748
.sum();
693749
let mut contents = Vec::with_capacity(size);
694750
contents.push(CURRENT_CACHE_VERSION);
751+
contents.extend(&u32::to_le_bytes(INDEX_V_MAX));
695752
contents.extend_from_slice(index_version.as_bytes());
696753
contents.push(0);
697754
for (version, data) in self.versions.iter() {
@@ -741,26 +798,41 @@ impl IndexSummary {
741798
///
742799
/// The `line` provided is expected to be valid JSON.
743800
fn parse(config: &Config, line: &[u8], source_id: SourceId) -> CargoResult<IndexSummary> {
801+
// ****CAUTION**** Please be extremely careful with returning errors
802+
// from this function. Entries that error are not included in the
803+
// index cache, and can cause cargo to get confused when switching
804+
// between different versions that understand the index differently.
805+
// Make sure to consider the INDEX_V_MAX and CURRENT_CACHE_VERSION
806+
// values carefully when making changes here.
744807
let RegistryPackage {
745808
name,
746809
vers,
747810
cksum,
748811
deps,
749-
features,
812+
mut features,
813+
features2,
750814
yanked,
751815
links,
816+
v,
752817
} = serde_json::from_slice(line)?;
818+
let v = v.unwrap_or(1);
753819
log::trace!("json parsed registry {}/{}", name, vers);
754820
let pkgid = PackageId::new(name, &vers, source_id)?;
755821
let deps = deps
756822
.into_iter()
757823
.map(|dep| dep.into_dep(source_id))
758824
.collect::<CargoResult<Vec<_>>>()?;
825+
if let Some(features2) = features2 {
826+
for (name, values) in features2 {
827+
features.entry(name).or_default().extend(values);
828+
}
829+
}
759830
let mut summary = Summary::new(config, pkgid, deps, &features, links)?;
760831
summary.set_checksum(cksum);
761832
Ok(IndexSummary {
762833
summary,
763834
yanked: yanked.unwrap_or(false),
835+
v,
764836
})
765837
}
766838
}

src/cargo/sources/registry/mod.rs

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,10 @@ pub struct RegistryConfig {
250250
pub api: Option<String>,
251251
}
252252

253+
/// The maximum version of the `v` field in the index this version of cargo
254+
/// understands.
255+
pub(crate) const INDEX_V_MAX: u32 = 2;
256+
253257
/// A single line in the index representing a single version of a package.
254258
#[derive(Deserialize)]
255259
pub struct RegistryPackage<'a> {
@@ -258,6 +262,13 @@ pub struct RegistryPackage<'a> {
258262
#[serde(borrow)]
259263
deps: Vec<RegistryDependency<'a>>,
260264
features: BTreeMap<InternedString, Vec<InternedString>>,
265+
/// This field contains features with new, extended syntax. Specifically,
266+
/// namespaced features (`dep:`) and weak dependencies (`pkg?/feat`).
267+
///
268+
/// This is separated from `features` because versions older than 1.19
269+
/// will fail to load due to not being able to parse the new syntax, even
270+
/// with a `Cargo.lock` file.
271+
features2: Option<BTreeMap<InternedString, Vec<InternedString>>>,
261272
cksum: String,
262273
/// If `true`, Cargo will skip this version when resolving.
263274
///
@@ -269,6 +280,26 @@ pub struct RegistryPackage<'a> {
269280
/// Added early 2018 (see <https://github.com/rust-lang/cargo/pull/4978>),
270281
/// can be `None` if published before then.
271282
links: Option<InternedString>,
283+
/// The schema version for this entry.
284+
///
285+
/// If this is None, it defaults to version 1. Entries with unknown
286+
/// versions are ignored.
287+
///
288+
/// Version `2` format adds the `features2` field.
289+
///
290+
/// This provides a method to safely introduce changes to index entries
291+
/// and allow older versions of cargo to ignore newer entries it doesn't
292+
/// understand. This is honored as of 1.51, so unfortunately older
293+
/// versions will ignore it, and potentially misinterpret version 2 and
294+
/// newer entries.
295+
///
296+
/// The intent is that versions older than 1.51 will work with a
297+
/// pre-existing `Cargo.lock`, but they may not correctly process `cargo
298+
/// update` or build a lock from scratch. In that case, cargo may
299+
/// incorrectly select a new package that uses a new index format. A
300+
/// workaround is to downgrade any packages that are incompatible with the
301+
/// `--precise` flag of `cargo update`.
302+
v: Option<u32>,
272303
}
273304

274305
#[test]

0 commit comments

Comments
 (0)