Skip to content

Commit a4a882d

Browse files
committed
Refactor RegistryData::load to handle management of the index cache itself.
This enables registry implementations to signal if the cache is valid on a per-request basis. Fixes a bug introduced by #10064 that caused Cargo not to update for several cases in a release build because it believed the index cache to be valid when it was not.
1 parent 2ac382d commit a4a882d

File tree

4 files changed

+148
-135
lines changed

4 files changed

+148
-135
lines changed

src/cargo/sources/registry/index.rs

Lines changed: 92 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@
6868
6969
use crate::core::dependency::Dependency;
7070
use crate::core::{PackageId, SourceId, Summary};
71-
use crate::sources::registry::{RegistryData, RegistryPackage, INDEX_V_MAX};
71+
use crate::sources::registry::{LoadResponse, RegistryData, RegistryPackage, INDEX_V_MAX};
7272
use crate::util::interning::InternedString;
7373
use crate::util::{internal, CargoResult, Config, Filesystem, OptVersionReq, ToSemver};
7474
use anyhow::bail;
@@ -247,6 +247,7 @@ pub struct IndexSummary {
247247
#[derive(Default)]
248248
struct SummariesCache<'a> {
249249
versions: Vec<(Version, &'a [u8])>,
250+
index_version: &'a str,
250251
}
251252

252253
impl<'cfg> RegistryIndex<'cfg> {
@@ -358,7 +359,6 @@ impl<'cfg> RegistryIndex<'cfg> {
358359

359360
let root = load.assert_index_locked(&self.path);
360361
let cache_root = root.join(".cache");
361-
let index_version = load.current_version();
362362

363363
// See module comment in `registry/mod.rs` for why this is structured
364364
// the way it is.
@@ -376,7 +376,6 @@ impl<'cfg> RegistryIndex<'cfg> {
376376
// along the way produce helpful "did you mean?" suggestions.
377377
for (i, path) in UncanonicalizedIter::new(&raw_path).take(1024).enumerate() {
378378
let summaries = Summaries::parse(
379-
index_version.as_deref(),
380379
root,
381380
&cache_root,
382381
path.as_ref(),
@@ -559,7 +558,6 @@ impl Summaries {
559558
/// * `load` - the actual index implementation which may be very slow to
560559
/// call. We avoid this if we can.
561560
pub fn parse(
562-
index_version: Option<&str>,
563561
root: &Path,
564562
cache_root: &Path,
565563
relative: &Path,
@@ -571,88 +569,101 @@ impl Summaries {
571569
// of reasons, but consider all of them non-fatal and just log their
572570
// occurrence in case anyone is debugging anything.
573571
let cache_path = cache_root.join(relative);
574-
let mut cache_contents = None;
575-
if let Some(index_version) = index_version {
576-
match fs::read(&cache_path) {
577-
Ok(contents) => match Summaries::parse_cache(contents, index_version) {
578-
Ok(s) => {
579-
log::debug!("fast path for registry cache of {:?}", relative);
580-
if cfg!(debug_assertions) {
581-
cache_contents = Some(s.raw_data);
582-
} else {
583-
return Poll::Ready(Ok(Some(s)));
584-
}
585-
}
586-
Err(e) => {
587-
log::debug!("failed to parse {:?} cache: {}", relative, e);
588-
}
589-
},
590-
Err(e) => log::debug!("cache missing for {:?} error: {}", relative, e),
591-
}
572+
let mut cached_summaries = None;
573+
let mut index_version = None;
574+
match fs::read(&cache_path) {
575+
Ok(contents) => match Summaries::parse_cache(contents) {
576+
Ok((s, v)) => {
577+
cached_summaries = Some(s);
578+
index_version = Some(v);
579+
}
580+
Err(e) => {
581+
log::debug!("failed to parse {:?} cache: {}", relative, e);
582+
}
583+
},
584+
Err(e) => log::debug!("cache missing for {:?} error: {}", relative, e),
592585
}
593586

594-
// This is the fallback path where we actually talk to the registry backend to load
595-
// information. Here we parse every single line in the index (as we need
596-
// to find the versions)
597-
log::debug!("slow path for {:?}", relative);
587+
let mut response = load.load(root, relative, index_version.as_deref())?;
588+
// In debug builds, perform a second load without the cache so that
589+
// we can validate that the cache is correct.
590+
if cfg!(debug_assertions) && matches!(response, Poll::Ready(LoadResponse::CacheValid)) {
591+
response = load.load(root, relative, None)?;
592+
}
593+
let response = match response {
594+
Poll::Pending => return Poll::Pending,
595+
Poll::Ready(response) => response,
596+
};
597+
598+
let mut bytes_to_cache = None;
599+
let mut version_to_cache = None;
598600
let mut ret = Summaries::default();
599-
let mut hit_closure = false;
600-
let mut cache_bytes = None;
601-
let result = load.load(root, relative, &mut |contents| {
602-
ret.raw_data = contents.to_vec();
603-
let mut cache = SummariesCache::default();
604-
hit_closure = true;
605-
for line in split(contents, b'\n') {
606-
// Attempt forwards-compatibility on the index by ignoring
607-
// everything that we ourselves don't understand, that should
608-
// allow future cargo implementations to break the
609-
// interpretation of each line here and older cargo will simply
610-
// ignore the new lines.
611-
let summary = match IndexSummary::parse(config, line, source_id) {
612-
Ok(summary) => summary,
613-
Err(e) => {
614-
// This should only happen when there is an index
615-
// entry from a future version of cargo that this
616-
// version doesn't understand. Hopefully, those future
617-
// versions of cargo correctly set INDEX_V_MAX and
618-
// CURRENT_CACHE_VERSION, otherwise this will skip
619-
// entries in the cache preventing those newer
620-
// versions from reading them (that is, until the
621-
// cache is rebuilt).
622-
log::info!("failed to parse {:?} registry package: {}", relative, e);
623-
continue;
624-
}
625-
};
626-
let version = summary.summary.package_id().version().clone();
627-
cache.versions.push((version.clone(), line));
628-
ret.versions.insert(version, summary.into());
601+
match response {
602+
LoadResponse::CacheValid => {
603+
log::debug!("fast path for registry cache of {:?}", relative);
604+
return Poll::Ready(Ok(cached_summaries));
629605
}
630-
if let Some(index_version) = index_version {
631-
cache_bytes = Some(cache.serialize(index_version));
606+
LoadResponse::NotFound => {
607+
debug_assert!(cached_summaries.is_none());
608+
return Poll::Ready(Ok(None));
609+
}
610+
LoadResponse::Data {
611+
raw_data,
612+
index_version,
613+
} => {
614+
// This is the fallback path where we actually talk to the registry backend to load
615+
// information. Here we parse every single line in the index (as we need
616+
// to find the versions)
617+
log::debug!("slow path for {:?}", relative);
618+
let mut cache = SummariesCache::default();
619+
ret.raw_data = raw_data;
620+
for line in split(&ret.raw_data, b'\n') {
621+
// Attempt forwards-compatibility on the index by ignoring
622+
// everything that we ourselves don't understand, that should
623+
// allow future cargo implementations to break the
624+
// interpretation of each line here and older cargo will simply
625+
// ignore the new lines.
626+
let summary = match IndexSummary::parse(config, line, source_id) {
627+
Ok(summary) => summary,
628+
Err(e) => {
629+
// This should only happen when there is an index
630+
// entry from a future version of cargo that this
631+
// version doesn't understand. Hopefully, those future
632+
// versions of cargo correctly set INDEX_V_MAX and
633+
// CURRENT_CACHE_VERSION, otherwise this will skip
634+
// entries in the cache preventing those newer
635+
// versions from reading them (that is, until the
636+
// cache is rebuilt).
637+
log::info!("failed to parse {:?} registry package: {}", relative, e);
638+
continue;
639+
}
640+
};
641+
let version = summary.summary.package_id().version().clone();
642+
cache.versions.push((version.clone(), line));
643+
ret.versions.insert(version, summary.into());
644+
}
645+
if let Some(index_version) = index_version {
646+
bytes_to_cache = Some(cache.serialize(index_version.as_str()));
647+
version_to_cache = Some(index_version);
648+
}
632649
}
633-
Ok(())
634-
});
635-
636-
if result?.is_pending() {
637-
assert!(!hit_closure);
638-
return Poll::Pending;
639-
}
640-
641-
if !hit_closure {
642-
debug_assert!(cache_contents.is_none());
643-
return Poll::Ready(Ok(None));
644650
}
645651

646652
// If we've got debug assertions enabled and the cache was previously
647653
// present and considered fresh this is where the debug assertions
648654
// actually happens to verify that our cache is indeed fresh and
649655
// computes exactly the same value as before.
650-
if cfg!(debug_assertions) && cache_contents.is_some() && cache_bytes != cache_contents {
656+
let cache_contents = cached_summaries.as_ref().map(|s| &s.raw_data);
657+
if cfg!(debug_assertions)
658+
&& index_version.as_deref() == version_to_cache.as_deref()
659+
&& cached_summaries.is_some()
660+
&& bytes_to_cache.as_ref() != cache_contents
661+
{
651662
panic!(
652663
"original cache contents:\n{:?}\n\
653664
does not equal new cache contents:\n{:?}\n",
654665
cache_contents.as_ref().map(|s| String::from_utf8_lossy(s)),
655-
cache_bytes.as_ref().map(|s| String::from_utf8_lossy(s)),
666+
bytes_to_cache.as_ref().map(|s| String::from_utf8_lossy(s)),
656667
);
657668
}
658669

@@ -662,7 +673,7 @@ impl Summaries {
662673
//
663674
// This is opportunistic so we ignore failure here but are sure to log
664675
// something in case of error.
665-
if let Some(cache_bytes) = cache_bytes {
676+
if let Some(cache_bytes) = bytes_to_cache {
666677
if paths::create_dir_all(cache_path.parent().unwrap()).is_ok() {
667678
let path = Filesystem::new(cache_path.clone());
668679
config.assert_package_cache_locked(&path);
@@ -677,16 +688,17 @@ impl Summaries {
677688

678689
/// Parses an open `File` which represents information previously cached by
679690
/// Cargo.
680-
pub fn parse_cache(contents: Vec<u8>, last_index_update: &str) -> CargoResult<Summaries> {
681-
let cache = SummariesCache::parse(&contents, last_index_update)?;
691+
pub fn parse_cache(contents: Vec<u8>) -> CargoResult<(Summaries, InternedString)> {
692+
let cache = SummariesCache::parse(&contents)?;
693+
let index_version = InternedString::new(cache.index_version);
682694
let mut ret = Summaries::default();
683695
for (version, summary) in cache.versions {
684696
let (start, end) = subslice_bounds(&contents, summary);
685697
ret.versions
686698
.insert(version, MaybeIndexSummary::Unparsed { start, end });
687699
}
688700
ret.raw_data = contents;
689-
return Ok(ret);
701+
return Ok((ret, index_version));
690702

691703
// Returns the start/end offsets of `inner` with `outer`. Asserts that
692704
// `inner` is a subslice of `outer`.
@@ -742,7 +754,7 @@ impl Summaries {
742754
const CURRENT_CACHE_VERSION: u8 = 3;
743755

744756
impl<'a> SummariesCache<'a> {
745-
fn parse(data: &'a [u8], last_index_update: &str) -> CargoResult<SummariesCache<'a>> {
757+
fn parse(data: &'a [u8]) -> CargoResult<SummariesCache<'a>> {
746758
// NB: keep this method in sync with `serialize` below
747759
let (first_byte, rest) = data
748760
.split_first()
@@ -764,18 +776,13 @@ impl<'a> SummariesCache<'a> {
764776
let rest = &rest[4..];
765777

766778
let mut iter = split(rest, 0);
767-
if let Some(update) = iter.next() {
768-
if update != last_index_update.as_bytes() {
769-
bail!(
770-
"cache out of date: current index ({}) != cache ({})",
771-
last_index_update,
772-
str::from_utf8(update)?,
773-
)
774-
}
779+
let last_index_update = if let Some(update) = iter.next() {
780+
str::from_utf8(update)?
775781
} else {
776782
bail!("malformed file");
777-
}
783+
};
778784
let mut ret = SummariesCache::default();
785+
ret.index_version = last_index_update;
779786
while let Some(version) = iter.next() {
780787
let version = str::from_utf8(version)?;
781788
let version = Version::parse(version)?;

src/cargo/sources/registry/local.rs

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
use crate::core::PackageId;
2-
use crate::sources::registry::{MaybeLock, RegistryConfig, RegistryData};
2+
use crate::sources::registry::{LoadResponse, MaybeLock, RegistryConfig, RegistryData};
33
use crate::util::errors::CargoResult;
4-
use crate::util::interning::InternedString;
54
use crate::util::{Config, Filesystem};
65
use cargo_util::{paths, Sha256};
76
use std::fs::File;
@@ -48,18 +47,17 @@ impl<'cfg> RegistryData for LocalRegistry<'cfg> {
4847
path.as_path_unlocked()
4948
}
5049

51-
fn current_version(&self) -> Option<InternedString> {
52-
None
53-
}
54-
5550
fn load(
5651
&self,
5752
root: &Path,
5853
path: &Path,
59-
data: &mut dyn FnMut(&[u8]) -> CargoResult<()>,
60-
) -> Poll<CargoResult<()>> {
54+
_index_version: Option<&str>,
55+
) -> Poll<CargoResult<LoadResponse>> {
6156
if self.updated {
62-
Poll::Ready(Ok(data(&paths::read_bytes(&root.join(path))?)?))
57+
Poll::Ready(Ok(LoadResponse::Data {
58+
raw_data: paths::read_bytes(&root.join(path))?,
59+
index_version: None,
60+
}))
6361
} else {
6462
Poll::Pending
6563
}

src/cargo/sources/registry/mod.rs

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -411,6 +411,20 @@ impl<'a> RegistryDependency<'a> {
411411
}
412412
}
413413

414+
pub enum LoadResponse {
415+
/// The cache is valid. The cached data should be used.
416+
CacheValid,
417+
418+
/// The cache is out of date. Returned data should be used.
419+
Data {
420+
raw_data: Vec<u8>,
421+
index_version: Option<String>,
422+
},
423+
424+
/// The requested crate was found.
425+
NotFound,
426+
}
427+
414428
/// An abstract interface to handle both a local (see `local::LocalRegistry`)
415429
/// and remote (see `remote::RemoteRegistry`) registry.
416430
///
@@ -432,15 +446,13 @@ pub trait RegistryData {
432446
///
433447
/// * `root` is the root path to the index.
434448
/// * `path` is the relative path to the package to load (like `ca/rg/cargo`).
435-
/// * `data` is a callback that will receive the raw bytes of the index JSON file.
436-
///
437-
/// If `load` returns a `Poll::Pending` then it must not have called data.
449+
/// * `index_version` is the version of the requested crate data currently in cache.
438450
fn load(
439451
&self,
440452
root: &Path,
441453
path: &Path,
442-
data: &mut dyn FnMut(&[u8]) -> CargoResult<()>,
443-
) -> Poll<CargoResult<()>>;
454+
index_version: Option<&str>,
455+
) -> Poll<CargoResult<LoadResponse>>;
444456

445457
/// Loads the `config.json` file and returns it.
446458
///
@@ -495,15 +507,6 @@ pub trait RegistryData {
495507
/// Returns the [`Path`] to the [`Filesystem`].
496508
fn assert_index_locked<'a>(&self, path: &'a Filesystem) -> &'a Path;
497509

498-
/// Returns the current "version" of the index.
499-
///
500-
/// For local registries, this returns `None` because there is no
501-
/// versioning. For remote registries, this returns the SHA hash of the
502-
/// git index on disk (or None if the index hasn't been downloaded yet).
503-
///
504-
/// This is used by index caching to check if the cache is out of date.
505-
fn current_version(&self) -> Option<InternedString>;
506-
507510
/// Block until all outstanding Poll::Pending requests are Poll::Ready.
508511
fn block_until_ready(&mut self) -> CargoResult<()>;
509512
}

0 commit comments

Comments
 (0)