Skip to content

Commit 109bfbd

Browse files
committed
Auto merge of #10482 - arlosi:refactor_load, r=Eh2406
Refactor RegistryData::load to handle management of the index cache Enables registry implementations to signal if the cache is valid on a per-request basis. Fixes a bug introduced by #10064 that caused Cargo not to update for several cases in a release build because it believed the index cache to be valid when it was not. The issue only occurred in release builds because debug builds verify that the cache contents is correct (by refreshing it). Previously `current_version` was called by the index to determine whether the cache was valid. In the new model, `RegistryData::load` is passed the current version of the cache and returns an enum to indicate the status of the cached data. r? `@eh2406` cc `@ehuss`
2 parents 1023fa5 + a4a882d commit 109bfbd

File tree

4 files changed

+148
-135
lines changed

4 files changed

+148
-135
lines changed

src/cargo/sources/registry/index.rs

Lines changed: 92 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@
6868
6969
use crate::core::dependency::Dependency;
7070
use crate::core::{PackageId, SourceId, Summary};
71-
use crate::sources::registry::{RegistryData, RegistryPackage, INDEX_V_MAX};
71+
use crate::sources::registry::{LoadResponse, RegistryData, RegistryPackage, INDEX_V_MAX};
7272
use crate::util::interning::InternedString;
7373
use crate::util::{internal, CargoResult, Config, Filesystem, OptVersionReq, ToSemver};
7474
use anyhow::bail;
@@ -247,6 +247,7 @@ pub struct IndexSummary {
247247
#[derive(Default)]
248248
struct SummariesCache<'a> {
249249
versions: Vec<(Version, &'a [u8])>,
250+
index_version: &'a str,
250251
}
251252

252253
impl<'cfg> RegistryIndex<'cfg> {
@@ -358,7 +359,6 @@ impl<'cfg> RegistryIndex<'cfg> {
358359

359360
let root = load.assert_index_locked(&self.path);
360361
let cache_root = root.join(".cache");
361-
let index_version = load.current_version();
362362

363363
// See module comment in `registry/mod.rs` for why this is structured
364364
// the way it is.
@@ -376,7 +376,6 @@ impl<'cfg> RegistryIndex<'cfg> {
376376
// along the way produce helpful "did you mean?" suggestions.
377377
for (i, path) in UncanonicalizedIter::new(&raw_path).take(1024).enumerate() {
378378
let summaries = Summaries::parse(
379-
index_version.as_deref(),
380379
root,
381380
&cache_root,
382381
path.as_ref(),
@@ -559,7 +558,6 @@ impl Summaries {
559558
/// * `load` - the actual index implementation which may be very slow to
560559
/// call. We avoid this if we can.
561560
pub fn parse(
562-
index_version: Option<&str>,
563561
root: &Path,
564562
cache_root: &Path,
565563
relative: &Path,
@@ -571,88 +569,101 @@ impl Summaries {
571569
// of reasons, but consider all of them non-fatal and just log their
572570
// occurrence in case anyone is debugging anything.
573571
let cache_path = cache_root.join(relative);
574-
let mut cache_contents = None;
575-
if let Some(index_version) = index_version {
576-
match fs::read(&cache_path) {
577-
Ok(contents) => match Summaries::parse_cache(contents, index_version) {
578-
Ok(s) => {
579-
log::debug!("fast path for registry cache of {:?}", relative);
580-
if cfg!(debug_assertions) {
581-
cache_contents = Some(s.raw_data);
582-
} else {
583-
return Poll::Ready(Ok(Some(s)));
584-
}
585-
}
586-
Err(e) => {
587-
log::debug!("failed to parse {:?} cache: {}", relative, e);
588-
}
589-
},
590-
Err(e) => log::debug!("cache missing for {:?} error: {}", relative, e),
591-
}
572+
let mut cached_summaries = None;
573+
let mut index_version = None;
574+
match fs::read(&cache_path) {
575+
Ok(contents) => match Summaries::parse_cache(contents) {
576+
Ok((s, v)) => {
577+
cached_summaries = Some(s);
578+
index_version = Some(v);
579+
}
580+
Err(e) => {
581+
log::debug!("failed to parse {:?} cache: {}", relative, e);
582+
}
583+
},
584+
Err(e) => log::debug!("cache missing for {:?} error: {}", relative, e),
592585
}
593586

594-
// This is the fallback path where we actually talk to the registry backend to load
595-
// information. Here we parse every single line in the index (as we need
596-
// to find the versions)
597-
log::debug!("slow path for {:?}", relative);
587+
let mut response = load.load(root, relative, index_version.as_deref())?;
588+
// In debug builds, perform a second load without the cache so that
589+
// we can validate that the cache is correct.
590+
if cfg!(debug_assertions) && matches!(response, Poll::Ready(LoadResponse::CacheValid)) {
591+
response = load.load(root, relative, None)?;
592+
}
593+
let response = match response {
594+
Poll::Pending => return Poll::Pending,
595+
Poll::Ready(response) => response,
596+
};
597+
598+
let mut bytes_to_cache = None;
599+
let mut version_to_cache = None;
598600
let mut ret = Summaries::default();
599-
let mut hit_closure = false;
600-
let mut cache_bytes = None;
601-
let result = load.load(root, relative, &mut |contents| {
602-
ret.raw_data = contents.to_vec();
603-
let mut cache = SummariesCache::default();
604-
hit_closure = true;
605-
for line in split(contents, b'\n') {
606-
// Attempt forwards-compatibility on the index by ignoring
607-
// everything that we ourselves don't understand, that should
608-
// allow future cargo implementations to break the
609-
// interpretation of each line here and older cargo will simply
610-
// ignore the new lines.
611-
let summary = match IndexSummary::parse(config, line, source_id) {
612-
Ok(summary) => summary,
613-
Err(e) => {
614-
// This should only happen when there is an index
615-
// entry from a future version of cargo that this
616-
// version doesn't understand. Hopefully, those future
617-
// versions of cargo correctly set INDEX_V_MAX and
618-
// CURRENT_CACHE_VERSION, otherwise this will skip
619-
// entries in the cache preventing those newer
620-
// versions from reading them (that is, until the
621-
// cache is rebuilt).
622-
log::info!("failed to parse {:?} registry package: {}", relative, e);
623-
continue;
624-
}
625-
};
626-
let version = summary.summary.package_id().version().clone();
627-
cache.versions.push((version.clone(), line));
628-
ret.versions.insert(version, summary.into());
601+
match response {
602+
LoadResponse::CacheValid => {
603+
log::debug!("fast path for registry cache of {:?}", relative);
604+
return Poll::Ready(Ok(cached_summaries));
629605
}
630-
if let Some(index_version) = index_version {
631-
cache_bytes = Some(cache.serialize(index_version));
606+
LoadResponse::NotFound => {
607+
debug_assert!(cached_summaries.is_none());
608+
return Poll::Ready(Ok(None));
609+
}
610+
LoadResponse::Data {
611+
raw_data,
612+
index_version,
613+
} => {
614+
// This is the fallback path where we actually talk to the registry backend to load
615+
// information. Here we parse every single line in the index (as we need
616+
// to find the versions)
617+
log::debug!("slow path for {:?}", relative);
618+
let mut cache = SummariesCache::default();
619+
ret.raw_data = raw_data;
620+
for line in split(&ret.raw_data, b'\n') {
621+
// Attempt forwards-compatibility on the index by ignoring
622+
// everything that we ourselves don't understand, that should
623+
// allow future cargo implementations to break the
624+
// interpretation of each line here and older cargo will simply
625+
// ignore the new lines.
626+
let summary = match IndexSummary::parse(config, line, source_id) {
627+
Ok(summary) => summary,
628+
Err(e) => {
629+
// This should only happen when there is an index
630+
// entry from a future version of cargo that this
631+
// version doesn't understand. Hopefully, those future
632+
// versions of cargo correctly set INDEX_V_MAX and
633+
// CURRENT_CACHE_VERSION, otherwise this will skip
634+
// entries in the cache preventing those newer
635+
// versions from reading them (that is, until the
636+
// cache is rebuilt).
637+
log::info!("failed to parse {:?} registry package: {}", relative, e);
638+
continue;
639+
}
640+
};
641+
let version = summary.summary.package_id().version().clone();
642+
cache.versions.push((version.clone(), line));
643+
ret.versions.insert(version, summary.into());
644+
}
645+
if let Some(index_version) = index_version {
646+
bytes_to_cache = Some(cache.serialize(index_version.as_str()));
647+
version_to_cache = Some(index_version);
648+
}
632649
}
633-
Ok(())
634-
});
635-
636-
if result?.is_pending() {
637-
assert!(!hit_closure);
638-
return Poll::Pending;
639-
}
640-
641-
if !hit_closure {
642-
debug_assert!(cache_contents.is_none());
643-
return Poll::Ready(Ok(None));
644650
}
645651

646652
// If we've got debug assertions enabled and the cache was previously
647653
// present and considered fresh this is where the debug assertions
648654
// actually happens to verify that our cache is indeed fresh and
649655
// computes exactly the same value as before.
650-
if cfg!(debug_assertions) && cache_contents.is_some() && cache_bytes != cache_contents {
656+
let cache_contents = cached_summaries.as_ref().map(|s| &s.raw_data);
657+
if cfg!(debug_assertions)
658+
&& index_version.as_deref() == version_to_cache.as_deref()
659+
&& cached_summaries.is_some()
660+
&& bytes_to_cache.as_ref() != cache_contents
661+
{
651662
panic!(
652663
"original cache contents:\n{:?}\n\
653664
does not equal new cache contents:\n{:?}\n",
654665
cache_contents.as_ref().map(|s| String::from_utf8_lossy(s)),
655-
cache_bytes.as_ref().map(|s| String::from_utf8_lossy(s)),
666+
bytes_to_cache.as_ref().map(|s| String::from_utf8_lossy(s)),
656667
);
657668
}
658669

@@ -662,7 +673,7 @@ impl Summaries {
662673
//
663674
// This is opportunistic so we ignore failure here but are sure to log
664675
// something in case of error.
665-
if let Some(cache_bytes) = cache_bytes {
676+
if let Some(cache_bytes) = bytes_to_cache {
666677
if paths::create_dir_all(cache_path.parent().unwrap()).is_ok() {
667678
let path = Filesystem::new(cache_path.clone());
668679
config.assert_package_cache_locked(&path);
@@ -677,16 +688,17 @@ impl Summaries {
677688

678689
/// Parses an open `File` which represents information previously cached by
679690
/// Cargo.
680-
pub fn parse_cache(contents: Vec<u8>, last_index_update: &str) -> CargoResult<Summaries> {
681-
let cache = SummariesCache::parse(&contents, last_index_update)?;
691+
pub fn parse_cache(contents: Vec<u8>) -> CargoResult<(Summaries, InternedString)> {
692+
let cache = SummariesCache::parse(&contents)?;
693+
let index_version = InternedString::new(cache.index_version);
682694
let mut ret = Summaries::default();
683695
for (version, summary) in cache.versions {
684696
let (start, end) = subslice_bounds(&contents, summary);
685697
ret.versions
686698
.insert(version, MaybeIndexSummary::Unparsed { start, end });
687699
}
688700
ret.raw_data = contents;
689-
return Ok(ret);
701+
return Ok((ret, index_version));
690702

691703
// Returns the start/end offsets of `inner` with `outer`. Asserts that
692704
// `inner` is a subslice of `outer`.
@@ -742,7 +754,7 @@ impl Summaries {
742754
const CURRENT_CACHE_VERSION: u8 = 3;
743755

744756
impl<'a> SummariesCache<'a> {
745-
fn parse(data: &'a [u8], last_index_update: &str) -> CargoResult<SummariesCache<'a>> {
757+
fn parse(data: &'a [u8]) -> CargoResult<SummariesCache<'a>> {
746758
// NB: keep this method in sync with `serialize` below
747759
let (first_byte, rest) = data
748760
.split_first()
@@ -764,18 +776,13 @@ impl<'a> SummariesCache<'a> {
764776
let rest = &rest[4..];
765777

766778
let mut iter = split(rest, 0);
767-
if let Some(update) = iter.next() {
768-
if update != last_index_update.as_bytes() {
769-
bail!(
770-
"cache out of date: current index ({}) != cache ({})",
771-
last_index_update,
772-
str::from_utf8(update)?,
773-
)
774-
}
779+
let last_index_update = if let Some(update) = iter.next() {
780+
str::from_utf8(update)?
775781
} else {
776782
bail!("malformed file");
777-
}
783+
};
778784
let mut ret = SummariesCache::default();
785+
ret.index_version = last_index_update;
779786
while let Some(version) = iter.next() {
780787
let version = str::from_utf8(version)?;
781788
let version = Version::parse(version)?;

src/cargo/sources/registry/local.rs

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
use crate::core::PackageId;
2-
use crate::sources::registry::{MaybeLock, RegistryConfig, RegistryData};
2+
use crate::sources::registry::{LoadResponse, MaybeLock, RegistryConfig, RegistryData};
33
use crate::util::errors::CargoResult;
4-
use crate::util::interning::InternedString;
54
use crate::util::{Config, Filesystem};
65
use cargo_util::{paths, Sha256};
76
use std::fs::File;
@@ -48,18 +47,17 @@ impl<'cfg> RegistryData for LocalRegistry<'cfg> {
4847
path.as_path_unlocked()
4948
}
5049

51-
fn current_version(&self) -> Option<InternedString> {
52-
None
53-
}
54-
5550
fn load(
5651
&self,
5752
root: &Path,
5853
path: &Path,
59-
data: &mut dyn FnMut(&[u8]) -> CargoResult<()>,
60-
) -> Poll<CargoResult<()>> {
54+
_index_version: Option<&str>,
55+
) -> Poll<CargoResult<LoadResponse>> {
6156
if self.updated {
62-
Poll::Ready(Ok(data(&paths::read_bytes(&root.join(path))?)?))
57+
Poll::Ready(Ok(LoadResponse::Data {
58+
raw_data: paths::read_bytes(&root.join(path))?,
59+
index_version: None,
60+
}))
6361
} else {
6462
Poll::Pending
6563
}

src/cargo/sources/registry/mod.rs

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -411,6 +411,20 @@ impl<'a> RegistryDependency<'a> {
411411
}
412412
}
413413

414+
pub enum LoadResponse {
415+
/// The cache is valid. The cached data should be used.
416+
CacheValid,
417+
418+
/// The cache is out of date. Returned data should be used.
419+
Data {
420+
raw_data: Vec<u8>,
421+
index_version: Option<String>,
422+
},
423+
424+
/// The requested crate was found.
425+
NotFound,
426+
}
427+
414428
/// An abstract interface to handle both a local (see `local::LocalRegistry`)
415429
/// and remote (see `remote::RemoteRegistry`) registry.
416430
///
@@ -432,15 +446,13 @@ pub trait RegistryData {
432446
///
433447
/// * `root` is the root path to the index.
434448
/// * `path` is the relative path to the package to load (like `ca/rg/cargo`).
435-
/// * `data` is a callback that will receive the raw bytes of the index JSON file.
436-
///
437-
/// If `load` returns a `Poll::Pending` then it must not have called data.
449+
/// * `index_version` is the version of the requested crate data currently in cache.
438450
fn load(
439451
&self,
440452
root: &Path,
441453
path: &Path,
442-
data: &mut dyn FnMut(&[u8]) -> CargoResult<()>,
443-
) -> Poll<CargoResult<()>>;
454+
index_version: Option<&str>,
455+
) -> Poll<CargoResult<LoadResponse>>;
444456

445457
/// Loads the `config.json` file and returns it.
446458
///
@@ -495,15 +507,6 @@ pub trait RegistryData {
495507
/// Returns the [`Path`] to the [`Filesystem`].
496508
fn assert_index_locked<'a>(&self, path: &'a Filesystem) -> &'a Path;
497509

498-
/// Returns the current "version" of the index.
499-
///
500-
/// For local registries, this returns `None` because there is no
501-
/// versioning. For remote registries, this returns the SHA hash of the
502-
/// git index on disk (or None if the index hasn't been downloaded yet).
503-
///
504-
/// This is used by index caching to check if the cache is out of date.
505-
fn current_version(&self) -> Option<InternedString>;
506-
507510
/// Block until all outstanding Poll::Pending requests are Poll::Ready.
508511
fn block_until_ready(&mut self) -> CargoResult<()>;
509512
}

0 commit comments

Comments
 (0)