Skip to content

Commit 8fde4e3

Browse files
committed
Auto merge of #5121 - Eh2406:string_interning, r=alexcrichton
String interning This builds on the work from #5118. This interns the strings in the part of resolver that gets cloned a lot. In a test on #4810 (comment) Before we got to 1700000 ticks in ~(63 to 67) sec from #5118 After we got to 1700000 ticks in ~(42 to 45) sec The interning code itself would be much better with a `leak` function that converts a `String` to a `&'static str`. Something like: ```rust pub fn leek(s: String) -> &'static str { let ptr = s.as_ptr(); let len = s.len(); mem::forget(s); unsafe { let slice = slice::from_raw_parts(ptr, len); str::from_utf8(slice).unwrap() } } ``` but "there is no unsafe in Cargo", and I am not the best at unsafe. So I just `to_string` and lived with the extra copy. Is there a better way to hand out references? I assumed that `InternedString::new` world start appearing in profile result, and that we would want `PackageId`, and `Summary`, Et Al. to store the `InternedString`. That is why I put the interner in a shared folder. So far it is just used in the resolver. It may make sense for a lot more of the Strings to be interned, but with the extra copy... I have not explored it yet.
2 parents 879f483 + 98480e8 commit 8fde4e3

File tree

5 files changed

+85
-15
lines changed

5 files changed

+85
-15
lines changed

Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ glob = "0.2"
3434
hex = "0.3"
3535
home = "0.3"
3636
ignore = "0.4"
37+
lazy_static = "1.0.0"
3738
jobserver = "0.1.9"
3839
lazycell = "0.6"
3940
libc = "0.2"

src/cargo/core/interning.rs

+68
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
use std::sync::RwLock;
2+
use std::collections::HashSet;
3+
use std::slice;
4+
use std::str;
5+
use std::mem;
6+
use std::cmp::Ordering;
7+
use std::ops::Deref;
8+
9+
pub fn leek(s: String) -> &'static str {
10+
let boxed = s.into_boxed_str();
11+
let ptr = boxed.as_ptr();
12+
let len = boxed.len();
13+
mem::forget(boxed);
14+
unsafe {
15+
let slice = slice::from_raw_parts(ptr, len);
16+
str::from_utf8_unchecked(slice)
17+
}
18+
}
19+
20+
lazy_static! {
21+
static ref STRING_CASHE: RwLock<HashSet<&'static str>> =
22+
RwLock::new(HashSet::new());
23+
}
24+
25+
#[derive(Eq, PartialEq, Hash, Clone, Copy)]
26+
pub struct InternedString {
27+
ptr: *const u8,
28+
len: usize,
29+
}
30+
31+
impl InternedString {
32+
pub fn new(str: &str) -> InternedString {
33+
let mut cache = STRING_CASHE.write().unwrap();
34+
if let Some(&s) = cache.get(str) {
35+
return InternedString { ptr: s.as_ptr(), len: s.len() };
36+
}
37+
let s = leek(str.to_string());
38+
cache.insert(s);
39+
InternedString { ptr: s.as_ptr(), len: s.len() }
40+
}
41+
}
42+
43+
impl Deref for InternedString {
44+
type Target = str;
45+
46+
fn deref(&self) -> &'static str {
47+
unsafe {
48+
let slice = slice::from_raw_parts(self.ptr, self.len);
49+
&str::from_utf8_unchecked(slice)
50+
}
51+
}
52+
}
53+
54+
impl Ord for InternedString {
55+
fn cmp(&self, other: &InternedString) -> Ordering {
56+
let str: &str = &*self;
57+
str.cmp(&*other)
58+
}
59+
}
60+
61+
impl PartialOrd for InternedString {
62+
fn partial_cmp(&self, other: &InternedString) -> Option<Ordering> {
63+
Some(self.cmp(other))
64+
}
65+
}
66+
67+
unsafe impl Send for InternedString {}
68+
unsafe impl Sync for InternedString {}

src/cargo/core/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ pub mod resolver;
2121
pub mod summary;
2222
pub mod shell;
2323
pub mod registry;
24+
mod interning;
2425
mod package_id_spec;
2526
mod workspace;
2627
mod features;

src/cargo/core/resolver/mod.rs

+14-15
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ use url::Url;
6060

6161
use core::{PackageId, Registry, SourceId, Summary, Dependency};
6262
use core::PackageIdSpec;
63+
use core::interning::InternedString;
6364
use util::config::Config;
6465
use util::Graph;
6566
use util::errors::{CargoResult, CargoError};
@@ -343,8 +344,8 @@ struct Context {
343344
// switch to persistent hash maps if we can at some point or otherwise
344345
// make these much cheaper to clone in general.
345346
activations: Activations,
346-
resolve_features: HashMap<PackageId, HashSet<String>>,
347-
links: HashMap<String, PackageId>,
347+
resolve_features: HashMap<PackageId, HashSet<InternedString>>,
348+
links: HashMap<InternedString, PackageId>,
348349

349350
// These are two cheaply-cloneable lists (O(1) clone) which are effectively
350351
// hash maps but are built up as "construction lists". We'll iterate these
@@ -356,7 +357,7 @@ struct Context {
356357
warnings: RcList<String>,
357358
}
358359

359-
type Activations = HashMap<String, HashMap<SourceId, Rc<Vec<Summary>>>>;
360+
type Activations = HashMap<InternedString, HashMap<SourceId, Rc<Vec<Summary>>>>;
360361

361362
/// Builds the list of all packages required to build the first argument.
362363
pub fn resolve(summaries: &[(Summary, Method)],
@@ -382,7 +383,7 @@ pub fn resolve(summaries: &[(Summary, Method)],
382383
metadata: BTreeMap::new(),
383384
replacements: cx.resolve_replacements(),
384385
features: cx.resolve_features.iter().map(|(k, v)| {
385-
(k.clone(), v.clone())
386+
(k.clone(), v.iter().map(|x| x.to_string()).collect())
386387
}).collect(),
387388
unused_patches: Vec::new(),
388389
};
@@ -717,7 +718,7 @@ impl RemainingCandidates {
717718
fn next(
718719
&mut self,
719720
prev_active: &[Summary],
720-
links: &HashMap<String, PackageId>,
721+
links: &HashMap<InternedString, PackageId>,
721722
) -> Result<(Candidate, bool), HashMap<PackageId, ConflictReason>> {
722723
// Filter the set of candidates based on the previously activated
723724
// versions for this dependency. We can actually use a version if it
@@ -734,7 +735,7 @@ impl RemainingCandidates {
734735
use std::mem::replace;
735736
for (_, b) in self.remaining.by_ref() {
736737
if let Some(link) = b.summary.links() {
737-
if let Some(a) = links.get(link) {
738+
if let Some(a) = links.get(&InternedString::new(link)) {
738739
if a != b.summary.package_id() {
739740
self.conflicting_prev_active
740741
.entry(a.clone())
@@ -1304,14 +1305,14 @@ impl Context {
13041305
method: &Method) -> CargoResult<bool> {
13051306
let id = summary.package_id();
13061307
let prev = self.activations
1307-
.entry(id.name().to_string())
1308+
.entry(InternedString::new(id.name()))
13081309
.or_insert_with(HashMap::new)
13091310
.entry(id.source_id().clone())
13101311
.or_insert_with(||Rc::new(Vec::new()));
13111312
if !prev.iter().any(|c| c == summary) {
13121313
self.resolve_graph.push(GraphNode::Add(id.clone()));
13131314
if let Some(link) = summary.links() {
1314-
ensure!(self.links.insert(link.to_owned(), id.clone()).is_none(),
1315+
ensure!(self.links.insert(InternedString::new(link), id.clone()).is_none(),
13151316
"Attempting to resolve a with more then one crate with the links={}. \n\
13161317
This will not build as is. Consider rebuilding the .lock file.", link);
13171318
}
@@ -1332,8 +1333,8 @@ impl Context {
13321333
let has_default_feature = summary.features().contains_key("default");
13331334
Ok(match self.resolve_features.get(id) {
13341335
Some(prev) => {
1335-
features.iter().all(|f| prev.contains(f)) &&
1336-
(!use_default || prev.contains("default") ||
1336+
features.iter().all(|f| prev.contains(&InternedString::new(f))) &&
1337+
(!use_default || prev.contains(&InternedString::new("default")) ||
13371338
!has_default_feature)
13381339
}
13391340
None => features.is_empty() && (!use_default || !has_default_feature)
@@ -1367,14 +1368,14 @@ impl Context {
13671368
}
13681369

13691370
fn prev_active(&self, dep: &Dependency) -> &[Summary] {
1370-
self.activations.get(dep.name())
1371+
self.activations.get(&InternedString::new(dep.name()))
13711372
.and_then(|v| v.get(dep.source_id()))
13721373
.map(|v| &v[..])
13731374
.unwrap_or(&[])
13741375
}
13751376

13761377
fn is_active(&self, id: &PackageId) -> bool {
1377-
self.activations.get(id.name())
1378+
self.activations.get(&InternedString::new(id.name()))
13781379
.and_then(|v| v.get(id.source_id()))
13791380
.map(|v| v.iter().any(|s| s.package_id() == id))
13801381
.unwrap_or(false)
@@ -1448,9 +1449,7 @@ impl Context {
14481449
let set = self.resolve_features.entry(pkgid.clone())
14491450
.or_insert_with(HashSet::new);
14501451
for feature in reqs.used {
1451-
if !set.contains(feature) {
1452-
set.insert(feature.to_string());
1453-
}
1452+
set.insert(InternedString::new(feature));
14541453
}
14551454
}
14561455

src/cargo/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ extern crate home;
2727
extern crate ignore;
2828
extern crate jobserver;
2929
extern crate lazycell;
30+
#[macro_use] extern crate lazy_static;
3031
extern crate libc;
3132
extern crate libgit2_sys;
3233
extern crate num_cpus;

0 commit comments

Comments
 (0)