Skip to content

Commit f2d5a7d

Browse files
committed
feat(gc): record workspace manifest and target dir in global cache tracker
1 parent bd1cf58 commit f2d5a7d

File tree

2 files changed

+207
-3
lines changed

2 files changed

+207
-3
lines changed

src/cargo/core/global_cache_tracker.rs

Lines changed: 198 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,13 +131,15 @@ use std::time::{Duration, SystemTime};
131131
use tracing::{debug, trace};
132132

133133
/// The filename of the database.
134-
const GLOBAL_CACHE_FILENAME: &str = ".global-cache";
134+
const GLOBAL_CACHE_FILENAME: &str = ".global-cache.sqlite";
135135

136136
const REGISTRY_INDEX_TABLE: &str = "registry_index";
137137
const REGISTRY_CRATE_TABLE: &str = "registry_crate";
138138
const REGISTRY_SRC_TABLE: &str = "registry_src";
139139
const GIT_DB_TABLE: &str = "git_db";
140140
const GIT_CO_TABLE: &str = "git_checkout";
141+
const WORKSPACE_MANIFEST_TABLE: &str = "workspace_manifest_index";
142+
const TARGET_DIR_TABLE: &str = "target_dir_index";
141143

142144
/// How often timestamps will be updated.
143145
///
@@ -209,6 +211,27 @@ pub struct GitCheckout {
209211
pub size: Option<u64>,
210212
}
211213

214+
/// The key for a workspace manifest entry stored in the database.
215+
#[derive(Clone, Debug, Hash, Eq, PartialEq)]
216+
pub struct WorkspaceManifestIndex {
217+
/// A unique name of the workspace manifest.
218+
pub encoded_workspace_manifest_name: InternedString,
219+
}
220+
221+
#[derive(Clone, Debug, Hash, Eq, PartialEq)]
222+
pub struct TargetDirIndex {
223+
/// A unique name of the target directory.
224+
pub encoded_target_dir_name: InternedString,
225+
}
226+
227+
/// The key for a workspace entry stored in the database.
228+
#[derive(Clone, Debug, Hash, Eq, PartialEq)]
229+
pub struct WorkspaceSrc {
230+
pub encoded_workspace_manifest_name: InternedString,
231+
pub encoded_target_dir_name: InternedString,
232+
233+
}
234+
212235
/// Filesystem paths in the global cache.
213236
///
214237
/// Accessing these assumes a lock has already been acquired.
@@ -303,6 +326,30 @@ fn migrations() -> Vec<Migration> {
303326
)?;
304327
Ok(())
305328
}),
329+
basic_migration(
330+
"CREATE TABLE workspace_manifest_index (
331+
id INTEGER PRIMARY KEY AUTOINCREMENT,
332+
name TEXT UNIQUE NOT NULL,
333+
timestamp INTEGER NOT NULL
334+
)",
335+
),
336+
basic_migration(
337+
"CREATE TABLE target_dir_index (
338+
id INTEGER PRIMARY KEY AUTOINCREMENT,
339+
name TEXT UNIQUE NOT NULL,
340+
timestamp INTEGER NOT NULL
341+
)",
342+
),
343+
basic_migration(
344+
"CREATE TABLE workspace_src (
345+
workspace_id INTEGER NOT NULL,
346+
target_dir_id INTEGER NOT NULL,
347+
timestamp INTEGER NOT NULL,
348+
PRIMARY KEY (workspace_id, target_dir_id),
349+
FOREIGN KEY (workspace_id) REFERENCES workspace_manifest_index (id) ON DELETE CASCADE,
350+
FOREIGN KEY (target_dir_id) REFERENCES target_dir_index (id) ON DELETE CASCADE
351+
)",
352+
)
306353
]
307354
}
308355

@@ -348,6 +395,7 @@ impl GlobalCacheTracker {
348395
/// [`CacheLockMode::DownloadExclusive`] before calling this.
349396
pub fn new(gctx: &GlobalContext) -> CargoResult<GlobalCacheTracker> {
350397
let db_path = Self::db_path(gctx);
398+
println!("db_path: {:?}", db_path);
351399
// A package cache lock is required to ensure only one cargo is
352400
// accessing at the same time. If there is concurrent access, we
353401
// want to rely on cargo's own "Blocking" system (which can
@@ -1413,7 +1461,16 @@ pub struct DeferredGlobalLastUse {
14131461
/// The key is the git db name (which is its directory name) and the value
14141462
/// is the `id` in the `git_db` table.
14151463
git_keys: HashMap<InternedString, ParentId>,
1416-
1464+
/// Cache of workspace manifest keys, used for faster fetching.
1465+
///
1466+
/// The key is the workspace manifest path and the value
1467+
/// is the `id` in the `workspace_manifest` table.
1468+
workspace_manifest_keys: HashMap<InternedString, ParentId>,
1469+
/// Cache of target dir keys, used for faster fetching.
1470+
///
1471+
/// The key is the target dir path and the value
1472+
/// is the `id` in the `target_dir` table.
1473+
target_dir_keys: HashMap<InternedString, ParentId>,
14171474
/// New registry index entries to insert.
14181475
registry_index_timestamps: HashMap<RegistryIndex, Timestamp>,
14191476
/// New registry `.crate` entries to insert.
@@ -1424,6 +1481,12 @@ pub struct DeferredGlobalLastUse {
14241481
git_db_timestamps: HashMap<GitDb, Timestamp>,
14251482
/// New git checkout entries to insert.
14261483
git_checkout_timestamps: HashMap<GitCheckout, Timestamp>,
1484+
/// New workspace manifest entries to insert.
1485+
workspace_db_timestamps: HashMap<WorkspaceManifestIndex, Timestamp>,
1486+
/// New target dir entries to insert.
1487+
target_dir_db_timestamps: HashMap<TargetDirIndex, Timestamp>,
1488+
/// New workspace src entries to insert.
1489+
workspace_src_timestamps: HashMap<WorkspaceSrc, Timestamp>,
14271490
/// This is used so that a warning about failing to update the database is
14281491
/// only displayed once.
14291492
save_err_has_warned: bool,
@@ -1437,11 +1500,16 @@ impl DeferredGlobalLastUse {
14371500
DeferredGlobalLastUse {
14381501
registry_keys: HashMap::new(),
14391502
git_keys: HashMap::new(),
1503+
workspace_manifest_keys: HashMap::new(),
1504+
target_dir_keys: HashMap::new(),
14401505
registry_index_timestamps: HashMap::new(),
14411506
registry_crate_timestamps: HashMap::new(),
14421507
registry_src_timestamps: HashMap::new(),
14431508
git_db_timestamps: HashMap::new(),
14441509
git_checkout_timestamps: HashMap::new(),
1510+
target_dir_db_timestamps: HashMap::new(),
1511+
workspace_db_timestamps: HashMap::new(),
1512+
workspace_src_timestamps: HashMap::new(),
14451513
save_err_has_warned: false,
14461514
now: now(),
14471515
}
@@ -1453,6 +1521,9 @@ impl DeferredGlobalLastUse {
14531521
&& self.registry_src_timestamps.is_empty()
14541522
&& self.git_db_timestamps.is_empty()
14551523
&& self.git_checkout_timestamps.is_empty()
1524+
&& self.target_dir_db_timestamps.is_empty()
1525+
&& self.workspace_db_timestamps.is_empty()
1526+
&& self.workspace_src_timestamps.is_empty()
14561527
}
14571528

14581529
fn clear(&mut self) {
@@ -1461,6 +1532,9 @@ impl DeferredGlobalLastUse {
14611532
self.registry_src_timestamps.clear();
14621533
self.git_db_timestamps.clear();
14631534
self.git_checkout_timestamps.clear();
1535+
self.target_dir_db_timestamps.clear();
1536+
self.workspace_db_timestamps.clear();
1537+
self.workspace_src_timestamps.clear();
14641538
}
14651539

14661540
/// Indicates the given [`RegistryIndex`] has been used right now.
@@ -1489,6 +1563,13 @@ impl DeferredGlobalLastUse {
14891563
self.mark_git_checkout_used_stamp(git_checkout, None);
14901564
}
14911565

1566+
/// Indicates the given [`WorkspaceManifest`] has been used right now.
1567+
///
1568+
/// Also implicitly marks the workspace manifest used, too.
1569+
pub fn mark_workspace_src_used(&mut self, workspace_src: WorkspaceSrc) {
1570+
self.mark_workspace_src_used_stamp(workspace_src, None);
1571+
}
1572+
14921573
/// Indicates the given [`RegistryIndex`] has been used with the given
14931574
/// time (or "now" if `None`).
14941575
pub fn mark_registry_index_used_stamp(
@@ -1553,6 +1634,24 @@ impl DeferredGlobalLastUse {
15531634
self.git_checkout_timestamps.insert(git_checkout, timestamp);
15541635
}
15551636

1637+
pub fn mark_workspace_src_used_stamp(
1638+
&mut self,
1639+
workspace_src: WorkspaceSrc,
1640+
timestamp: Option<&SystemTime>,
1641+
) {
1642+
let timestamp = timestamp.map_or(self.now, to_timestamp);
1643+
let workspace_db = WorkspaceManifestIndex {
1644+
encoded_workspace_manifest_name: workspace_src.encoded_workspace_manifest_name,
1645+
};
1646+
let target_dir_db = TargetDirIndex {
1647+
encoded_target_dir_name: workspace_src.encoded_target_dir_name,
1648+
};
1649+
self.target_dir_db_timestamps.insert(target_dir_db, timestamp);
1650+
self.workspace_db_timestamps.insert(workspace_db, timestamp);
1651+
self.workspace_src_timestamps
1652+
.insert(workspace_src, timestamp);
1653+
}
1654+
15561655
/// Saves all of the deferred information to the database.
15571656
///
15581657
/// This will also clear the state of `self`.
@@ -1566,9 +1665,13 @@ impl DeferredGlobalLastUse {
15661665
// These must run before the ones that refer to their IDs.
15671666
self.insert_registry_index_from_cache(&tx)?;
15681667
self.insert_git_db_from_cache(&tx)?;
1668+
self.insert_target_dir_index_from_cache(&tx)?;
1669+
self.insert_workspace_manifest_index_from_cache(&tx)?;
1670+
15691671
self.insert_registry_crate_from_cache(&tx)?;
15701672
self.insert_registry_src_from_cache(&tx)?;
15711673
self.insert_git_checkout_from_cache(&tx)?;
1674+
self.insert_workspace_src_from_cache(&tx)?;
15721675
tx.commit()?;
15731676
trace!(target: "gc", "last-use save complete");
15741677
Ok(())
@@ -1632,6 +1735,32 @@ impl DeferredGlobalLastUse {
16321735
);
16331736
}
16341737

1738+
// Flushes all of the `target_dir_db_timestamps` to the database,
1739+
// clearing `target_dir_index_timestamps`.
1740+
fn insert_target_dir_index_from_cache(&mut self, conn: &Connection) -> CargoResult<()> {
1741+
insert_or_update_parent!(
1742+
self,
1743+
conn,
1744+
"target_dir_index",
1745+
target_dir_db_timestamps,
1746+
target_dir_keys,
1747+
encoded_target_dir_name
1748+
);
1749+
}
1750+
1751+
// Flushes all of the `workspace_db_timestamps` to the database,
1752+
// clearing `workspace_manifest_index_timestamps`.
1753+
fn insert_workspace_manifest_index_from_cache(&mut self, conn: &Connection) -> CargoResult<()> {
1754+
insert_or_update_parent!(
1755+
self,
1756+
conn,
1757+
"workspace_manifest_index",
1758+
workspace_db_timestamps,
1759+
workspace_manifest_keys,
1760+
encoded_workspace_manifest_name
1761+
);
1762+
}
1763+
16351764
/// Flushes all of the `registry_crate_timestamps` to the database,
16361765
/// clearing `registry_index_timestamps`.
16371766
fn insert_registry_crate_from_cache(&mut self, conn: &Connection) -> CargoResult<()> {
@@ -1707,6 +1836,73 @@ impl DeferredGlobalLastUse {
17071836
Ok(())
17081837
}
17091838

1839+
// Flushes all of the `workspace_src_timestamps` to the database,
1840+
// clearing `workspace_src_timestamps`.
1841+
fn insert_workspace_src_from_cache(&mut self, conn: &Connection) -> CargoResult<()> {
1842+
let workspace_src_timestamps = std::mem::take(&mut self.workspace_src_timestamps);
1843+
for (workspace_src, timestamp) in workspace_src_timestamps {
1844+
let workspace_id = self.workspace_id(conn, workspace_src.encoded_workspace_manifest_name)?;
1845+
let target_dir_id = self.target_dir_id(conn, workspace_src.encoded_target_dir_name)?;
1846+
let mut stmt = conn.prepare_cached(
1847+
"INSERT INTO workspace_src (workspace_id, target_dir_id, timestamp)
1848+
VALUES (?1, ?2, ?3)
1849+
ON CONFLICT DO UPDATE SET timestamp=excluded.timestamp
1850+
WHERE timestamp < ?4",
1851+
)?;
1852+
stmt.execute(params![
1853+
workspace_id,
1854+
target_dir_id,
1855+
timestamp,
1856+
timestamp - UPDATE_RESOLUTION
1857+
])?;
1858+
}
1859+
Ok(())
1860+
}
1861+
1862+
fn workspace_id(
1863+
&mut self,
1864+
conn: &Connection,
1865+
encoded_workspace_manifest_name: InternedString,
1866+
) -> CargoResult<ParentId> {
1867+
match self.workspace_manifest_keys.get(&encoded_workspace_manifest_name) {
1868+
Some(i) => Ok(*i),
1869+
None => {
1870+
let Some(id) = GlobalCacheTracker::id_from_name(
1871+
conn,
1872+
WORKSPACE_MANIFEST_TABLE,
1873+
&encoded_workspace_manifest_name,
1874+
)?
1875+
else {
1876+
bail!("expected workspace_manifest {encoded_workspace_manifest_name} to exist, but wasn't found");
1877+
};
1878+
self.workspace_manifest_keys.insert(encoded_workspace_manifest_name, id);
1879+
Ok(id)
1880+
}
1881+
}
1882+
}
1883+
1884+
fn target_dir_id(
1885+
&mut self,
1886+
conn: &Connection,
1887+
encoded_target_dir_name: InternedString,
1888+
) -> CargoResult<ParentId> {
1889+
match self.target_dir_keys.get(&encoded_target_dir_name) {
1890+
Some(i) => Ok(*i),
1891+
None => {
1892+
let Some(id) = GlobalCacheTracker::id_from_name(
1893+
conn,
1894+
TARGET_DIR_TABLE,
1895+
&encoded_target_dir_name,
1896+
)?
1897+
else {
1898+
bail!("expected target_dir {encoded_target_dir_name} to exist, but wasn't found");
1899+
};
1900+
self.target_dir_keys.insert(encoded_target_dir_name, id);
1901+
Ok(id)
1902+
}
1903+
}
1904+
}
1905+
17101906
/// Returns the numeric ID of the registry, either fetching from the local
17111907
/// cache, or getting it from the database.
17121908
///

src/cargo/ops/cargo_compile/mod.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ use crate::core::compiler::{DefaultExecutor, Executor, UnitInterner};
4949
use crate::core::profiles::Profiles;
5050
use crate::core::resolver::features::{self, CliFeatures, FeaturesFor};
5151
use crate::core::resolver::{HasDevUnits, Resolve};
52-
use crate::core::{PackageId, PackageSet, SourceId, TargetKind, Workspace};
52+
use crate::core::{global_cache_tracker, PackageId, PackageSet, SourceId, TargetKind, Workspace};
5353
use crate::drop_println;
5454
use crate::ops;
5555
use crate::ops::resolve::WorkspaceResolve;
@@ -264,6 +264,14 @@ pub fn create_bcx<'a, 'gctx>(
264264
HasDevUnits::No
265265
}
266266
};
267+
let _ = &gctx
268+
.deferred_global_last_use()?
269+
.mark_workspace_src_used(global_cache_tracker::WorkspaceSrc {
270+
encoded_workspace_manifest_name: InternedString::new(
271+
ws.root_manifest().to_str().unwrap(),
272+
),
273+
encoded_target_dir_name: InternedString::new(ws.target_dir().as_path_unlocked().to_str().unwrap()),
274+
});
267275
let resolve = ops::resolve_ws_with_opts(
268276
ws,
269277
&mut target_data,

0 commit comments

Comments
 (0)