Skip to content

Commit 1292a6d

Browse files
committed
Auto merge of #6283 - Eh2406:slow-case, r=alexcrichton
Use a ad hoc trie to avoid slow cases This adds a test for the pure case described [here](#6258 (comment)). When added, this test will time out in debug (~20 sec release) with an `N` of 3. Looking with a profiler, it is spending most of its time in [`Vec.contains`](https://github.com/rust-lang/cargo/blob/d08fd375db0014f097ee3fbdc1c595b632669f6a/src/cargo/core/resolver/conflict_cache.rs#L84). Keeping a deduplicated list with contains is `O(n^2)`, so let's change things to be `Ord` so we can use a btree to deduplicate. Now the profiler claimed that we are spending our time [searching that `Vec`](https://github.com/rust-lang/cargo/blob/d08fd375db0014f097ee3fbdc1c595b632669f6a/src/cargo/core/resolver/conflict_cache.rs#L66). So time to work on that "vaporware", the simplest thing I could think of that lets us check for is_active in better then `O(n)` is nested hashmaps. Each hashmap has keys of a shared Id, and values of hashmaps representing all the conflicts that use that Id. When searching if the key is not active then we need not look at any of its descendants. There are lots of ways to make this better but even this much gets the test to pass with `N` of 3. So maybe we can justify the improvements with `N` of 4? No, eavan in debug `N` of 4 hits the [50k limit](https://github.com/rust-lang/cargo/blob/d08fd375db0014f097ee3fbdc1c595b632669f6a/src/cargo/core/resolver/types.rs#L55), so the that is as far as we need to go on the conflict_cache.
2 parents 241fac0 + 178ee0b commit 1292a6d

File tree

5 files changed

+169
-62
lines changed

5 files changed

+169
-62
lines changed

src/cargo/core/resolver/conflict_cache.rs

+117-38
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,125 @@
1-
use std::collections::{HashMap, HashSet};
1+
use std::collections::{BTreeMap, HashMap, HashSet};
22

33
use super::types::ConflictReason;
44
use core::resolver::Context;
55
use core::{Dependency, PackageId};
66

7+
/// This is a Trie for storing a large number of Sets designed to
8+
/// efficiently see if any of the stored Sets are a subset of a search Set.
9+
enum ConflictStoreTrie {
10+
/// a Leaf is one of the stored Sets.
11+
Leaf(BTreeMap<PackageId, ConflictReason>),
12+
/// a Node is a map from an element to a subTrie where
13+
/// all the Sets in the subTrie contains that element.
14+
Node(HashMap<PackageId, ConflictStoreTrie>),
15+
}
16+
17+
impl ConflictStoreTrie {
18+
/// Finds any known set of conflicts, if any,
19+
/// which are activated in `cx` and pass the `filter` specified?
20+
fn find_conflicting<F>(
21+
&self,
22+
cx: &Context,
23+
filter: &F,
24+
) -> Option<&BTreeMap<PackageId, ConflictReason>>
25+
where
26+
for<'r> F: Fn(&'r &BTreeMap<PackageId, ConflictReason>) -> bool,
27+
{
28+
match self {
29+
ConflictStoreTrie::Leaf(c) => {
30+
if filter(&c) {
31+
// is_conflicting checks that all the elements are active,
32+
// but we have checked each one by the recursion of this function.
33+
debug_assert!(cx.is_conflicting(None, c));
34+
Some(c)
35+
} else {
36+
None
37+
}
38+
}
39+
ConflictStoreTrie::Node(m) => {
40+
for (pid, store) in m {
41+
// if the key is active then we need to check all of the corresponding subTrie.
42+
if cx.is_active(pid) {
43+
if let Some(o) = store.find_conflicting(cx, filter) {
44+
return Some(o);
45+
}
46+
} // else, if it is not active then there is no way any of the corresponding
47+
// subTrie will be conflicting.
48+
}
49+
None
50+
}
51+
}
52+
}
53+
54+
fn insert<'a>(
55+
&mut self,
56+
mut iter: impl Iterator<Item = &'a PackageId>,
57+
con: BTreeMap<PackageId, ConflictReason>,
58+
) {
59+
if let Some(pid) = iter.next() {
60+
if let ConflictStoreTrie::Node(p) = self {
61+
p.entry(pid.clone())
62+
.or_insert_with(|| ConflictStoreTrie::Node(HashMap::new()))
63+
.insert(iter, con);
64+
} // else, We already have a subset of this in the ConflictStore
65+
} else {
66+
// we are at the end of the set we are adding, there are 3 cases for what to do next:
67+
// 1. self is a empty dummy Node inserted by `or_insert_with`
68+
// in witch case we should replace it with `Leaf(con)`.
69+
// 2. self is a Node because we previously inserted a superset of
70+
// the thing we are working on (I don't know if this happens in practice)
71+
// but the subset that we are working on will
72+
// always match any time the larger set would have
73+
// in witch case we can replace it with `Leaf(con)`.
74+
// 3. self is a Leaf that is in the same spot in the structure as
75+
// the thing we are working on. So it is equivalent.
76+
// We can replace it with `Leaf(con)`.
77+
if cfg!(debug_assertions) {
78+
if let ConflictStoreTrie::Leaf(c) = self {
79+
let a: Vec<_> = con.keys().collect();
80+
let b: Vec<_> = c.keys().collect();
81+
assert_eq!(a, b);
82+
}
83+
}
84+
*self = ConflictStoreTrie::Leaf(con)
85+
}
86+
}
87+
}
88+
789
pub(super) struct ConflictCache {
890
// `con_from_dep` is a cache of the reasons for each time we
991
// backtrack. For example after several backtracks we may have:
1092
//
11-
// con_from_dep[`foo = "^1.0.2"`] = vec![
12-
// map!{`foo=1.0.1`: Semver},
13-
// map!{`foo=1.0.0`: Semver},
14-
// ];
93+
// con_from_dep[`foo = "^1.0.2"`] = map!{
94+
// `foo=1.0.1`: map!{`foo=1.0.1`: Semver},
95+
// `foo=1.0.0`: map!{`foo=1.0.0`: Semver},
96+
// };
1597
//
1698
// This can be read as "we cannot find a candidate for dep `foo = "^1.0.2"`
1799
// if either `foo=1.0.1` OR `foo=1.0.0` are activated".
18100
//
19101
// Another example after several backtracks we may have:
20102
//
21-
// con_from_dep[`foo = ">=0.8.2, <=0.9.3"`] = vec![
22-
// map!{`foo=0.8.1`: Semver, `foo=0.9.4`: Semver},
23-
// ];
103+
// con_from_dep[`foo = ">=0.8.2, <=0.9.3"`] = map!{
104+
// `foo=0.8.1`: map!{
105+
// `foo=0.9.4`: map!{`foo=0.8.1`: Semver, `foo=0.9.4`: Semver},
106+
// }
107+
// };
24108
//
25109
// This can be read as "we cannot find a candidate for dep `foo = ">=0.8.2,
26110
// <=0.9.3"` if both `foo=0.8.1` AND `foo=0.9.4` are activated".
27111
//
28112
// This is used to make sure we don't queue work we know will fail. See the
29113
// discussion in https://github.com/rust-lang/cargo/pull/5168 for why this
30-
// is so important, and there can probably be a better data structure here
31-
// but for now this works well enough!
114+
// is so important. The nested HashMaps act as a kind of btree, that lets us
115+
// look up which entries are still active without
116+
// linearly scanning through the full list.
32117
//
33118
// Also, as a final note, this map is *not* ever removed from. This remains
34119
// as a global cache which we never delete from. Any entry in this map is
35120
// unconditionally true regardless of our resolution history of how we got
36121
// here.
37-
con_from_dep: HashMap<Dependency, Vec<HashMap<PackageId, ConflictReason>>>,
122+
con_from_dep: HashMap<Dependency, ConflictStoreTrie>,
38123
// `dep_from_pid` is an inverse-index of `con_from_dep`.
39124
// For every `PackageId` this lists the `Dependency`s that mention it in `dep_from_pid`.
40125
dep_from_pid: HashMap<PackageId, HashSet<Dependency>>,
@@ -54,47 +139,41 @@ impl ConflictCache {
54139
cx: &Context,
55140
dep: &Dependency,
56141
filter: F,
57-
) -> Option<&HashMap<PackageId, ConflictReason>>
142+
) -> Option<&BTreeMap<PackageId, ConflictReason>>
58143
where
59-
for<'r> F: FnMut(&'r &HashMap<PackageId, ConflictReason>) -> bool,
144+
for<'r> F: Fn(&'r &BTreeMap<PackageId, ConflictReason>) -> bool,
60145
{
61-
self.con_from_dep
62-
.get(dep)?
63-
.iter()
64-
.rev() // more general cases are normally found letter. So start the search there.
65-
.filter(filter)
66-
.find(|conflicting| cx.is_conflicting(None, conflicting))
146+
self.con_from_dep.get(dep)?.find_conflicting(cx, &filter)
67147
}
68148
pub fn conflicting(
69149
&self,
70150
cx: &Context,
71151
dep: &Dependency,
72-
) -> Option<&HashMap<PackageId, ConflictReason>> {
152+
) -> Option<&BTreeMap<PackageId, ConflictReason>> {
73153
self.find_conflicting(cx, dep, |_| true)
74154
}
75155

76156
/// Add to the cache a conflict of the form:
77157
/// `dep` is known to be unresolvable if
78158
/// all the `PackageId` entries are activated
79-
pub fn insert(&mut self, dep: &Dependency, con: &HashMap<PackageId, ConflictReason>) {
80-
let past = self
81-
.con_from_dep
159+
pub fn insert(&mut self, dep: &Dependency, con: &BTreeMap<PackageId, ConflictReason>) {
160+
self.con_from_dep
82161
.entry(dep.clone())
83-
.or_insert_with(Vec::new);
84-
if !past.contains(con) {
85-
trace!(
86-
"{} = \"{}\" adding a skip {:?}",
87-
dep.package_name(),
88-
dep.version_req(),
89-
con
90-
);
91-
past.push(con.clone());
92-
for c in con.keys() {
93-
self.dep_from_pid
94-
.entry(c.clone())
95-
.or_insert_with(HashSet::new)
96-
.insert(dep.clone());
97-
}
162+
.or_insert_with(|| ConflictStoreTrie::Node(HashMap::new()))
163+
.insert(con.keys(), con.clone());
164+
165+
trace!(
166+
"{} = \"{}\" adding a skip {:?}",
167+
dep.package_name(),
168+
dep.version_req(),
169+
con
170+
);
171+
172+
for c in con.keys() {
173+
self.dep_from_pid
174+
.entry(c.clone())
175+
.or_insert_with(HashSet::new)
176+
.insert(dep.clone());
98177
}
99178
}
100179
pub fn dependencies_conflicting_with(&self, pid: &PackageId) -> Option<&HashSet<Dependency>> {

src/cargo/core/resolver/context.rs

+9-8
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
1-
use std::collections::{HashMap, HashSet};
1+
use std::collections::{BTreeMap, HashMap, HashSet};
22
use std::rc::Rc;
33

44
use core::interning::InternedString;
55
use core::{Dependency, FeatureValue, PackageId, SourceId, Summary};
66
use util::CargoResult;
77
use util::Graph;
88

9-
use super::types::{ConflictReason, DepInfo, GraphNode, Method, RcList, RegistryQueryer};
109
use super::errors::ActivateResult;
10+
use super::types::{ConflictReason, DepInfo, GraphNode, Method, RcList, RegistryQueryer};
1111

1212
pub use super::encode::{EncodableDependency, EncodablePackageId, EncodableResolve};
1313
pub use super::encode::{Metadata, WorkspaceResolve};
@@ -133,7 +133,7 @@ impl Context {
133133
.unwrap_or(&[])
134134
}
135135

136-
fn is_active(&self, id: &PackageId) -> bool {
136+
pub fn is_active(&self, id: &PackageId) -> bool {
137137
self.activations
138138
.get(&(id.name(), id.source_id().clone()))
139139
.map(|v| v.iter().any(|s| s.package_id() == id))
@@ -145,7 +145,7 @@ impl Context {
145145
pub fn is_conflicting(
146146
&self,
147147
parent: Option<&PackageId>,
148-
conflicting_activations: &HashMap<PackageId, ConflictReason>,
148+
conflicting_activations: &BTreeMap<PackageId, ConflictReason>,
149149
) -> bool {
150150
conflicting_activations
151151
.keys()
@@ -186,10 +186,11 @@ impl Context {
186186
// name.
187187
let base = reqs.deps.get(&dep.name_in_toml()).unwrap_or(&default_dep);
188188
used_features.insert(dep.name_in_toml());
189-
let always_required = !dep.is_optional() && !s
190-
.dependencies()
191-
.iter()
192-
.any(|d| d.is_optional() && d.name_in_toml() == dep.name_in_toml());
189+
let always_required = !dep.is_optional()
190+
&& !s
191+
.dependencies()
192+
.iter()
193+
.any(|d| d.is_optional() && d.name_in_toml() == dep.name_in_toml());
193194
if always_required && base.0 {
194195
self.warnings.push(format!(
195196
"Package `{}` does not have feature `{}`. It has a required dependency \

src/cargo/core/resolver/errors.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use std::collections::HashMap;
1+
use std::collections::BTreeMap;
22
use std::fmt;
33

44
use core::{Dependency, PackageId, Registry, Summary};
@@ -73,7 +73,7 @@ pub(super) fn activation_error(
7373
registry: &mut Registry,
7474
parent: &Summary,
7575
dep: &Dependency,
76-
conflicting_activations: &HashMap<PackageId, ConflictReason>,
76+
conflicting_activations: &BTreeMap<PackageId, ConflictReason>,
7777
candidates: &[Candidate],
7878
config: Option<&Config>,
7979
) -> ResolveError {

src/cargo/core/resolver/mod.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ fn activate_deps_loop(
247247
//
248248
// This is a map of package id to a reason why that packaged caused a
249249
// conflict for us.
250-
let mut conflicting_activations = HashMap::new();
250+
let mut conflicting_activations = BTreeMap::new();
251251

252252
// When backtracking we don't fully update `conflicting_activations`
253253
// especially for the cases that we didn't make a backtrack frame in the
@@ -641,7 +641,7 @@ struct BacktrackFrame {
641641
parent: Summary,
642642
dep: Dependency,
643643
features: Rc<Vec<InternedString>>,
644-
conflicting_activations: HashMap<PackageId, ConflictReason>,
644+
conflicting_activations: BTreeMap<PackageId, ConflictReason>,
645645
}
646646

647647
/// A helper "iterator" used to extract candidates within a current `Context` of
@@ -688,7 +688,7 @@ impl RemainingCandidates {
688688
/// original list for the reason listed.
689689
fn next(
690690
&mut self,
691-
conflicting_prev_active: &mut HashMap<PackageId, ConflictReason>,
691+
conflicting_prev_active: &mut BTreeMap<PackageId, ConflictReason>,
692692
cx: &Context,
693693
dep: &Dependency,
694694
) -> Option<(Candidate, bool)> {
@@ -781,7 +781,7 @@ fn find_candidate(
781781
backtrack_stack: &mut Vec<BacktrackFrame>,
782782
parent: &Summary,
783783
backtracked: bool,
784-
conflicting_activations: &HashMap<PackageId, ConflictReason>,
784+
conflicting_activations: &BTreeMap<PackageId, ConflictReason>,
785785
) -> Option<(Candidate, bool, BacktrackFrame)> {
786786
while let Some(mut frame) = backtrack_stack.pop() {
787787
let next = frame.remaining_candidates.next(

tests/testsuite/resolve.rs

+37-10
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ use cargo::util::Config;
77
use support::project;
88
use support::registry::Package;
99
use support::resolver::{
10-
assert_contains, assert_same, dep, dep_kind, dep_loc, dep_req, loc_names, names,
11-
pkg, pkg_dep, pkg_id, pkg_loc, registry, registry_strategy, resolve, resolve_and_validated,
10+
assert_contains, assert_same, dep, dep_kind, dep_loc, dep_req, loc_names, names, pkg, pkg_dep,
11+
pkg_id, pkg_loc, registry, registry_strategy, resolve, resolve_and_validated,
1212
resolve_with_config, PrettyPrintRegistry, ToDep, ToPkgId,
1313
};
1414

@@ -433,14 +433,12 @@ fn resolving_incompat_versions() {
433433
pkg!("bar" => [dep_req("foo", "=1.0.2")]),
434434
]);
435435

436-
assert!(
437-
resolve(
438-
&pkg_id("root"),
439-
vec![dep_req("foo", "=1.0.1"), dep("bar")],
440-
&reg
441-
)
442-
.is_err()
443-
);
436+
assert!(resolve(
437+
&pkg_id("root"),
438+
vec![dep_req("foo", "=1.0.1"), dep("bar")],
439+
&reg
440+
)
441+
.is_err());
444442
}
445443

446444
#[test]
@@ -1174,3 +1172,32 @@ fn hard_equality() {
11741172
&names(&[("root", "1.0.0"), ("foo", "1.0.0"), ("bar", "1.0.0")]),
11751173
);
11761174
}
1175+
1176+
#[test]
1177+
fn large_conflict_cache() {
1178+
let mut input = vec![
1179+
pkg!(("last", "0.0.0") => [dep("bad")]), // just to make sure last is less constrained
1180+
];
1181+
let mut root_deps = vec![dep("last")];
1182+
const NUM_VERSIONS: u8 = 3;
1183+
for name in 0..=NUM_VERSIONS {
1184+
// a large number of conflicts can easily be generated by a sys crate.
1185+
let sys_name = format!("{}-sys", (b'a' + name) as char);
1186+
let in_len = input.len();
1187+
input.push(pkg!(("last", format!("{}.0.0", in_len)) => [dep_req(&sys_name, "=0.0.0")]));
1188+
root_deps.push(dep_req(&sys_name, ">= 0.0.1"));
1189+
1190+
// a large number of conflicts can also easily be generated by a major release version.
1191+
let plane_name = format!("{}", (b'a' + name) as char);
1192+
let in_len = input.len();
1193+
input.push(pkg!(("last", format!("{}.0.0", in_len)) => [dep_req(&plane_name, "=1.0.0")]));
1194+
root_deps.push(dep_req(&plane_name, ">= 1.0.1"));
1195+
1196+
for i in 0..=NUM_VERSIONS {
1197+
input.push(pkg!((&sys_name, format!("{}.0.0", i))));
1198+
input.push(pkg!((&plane_name, format!("1.0.{}", i))));
1199+
}
1200+
}
1201+
let reg = registry(input);
1202+
let _ = resolve(&pkg_id("root"), root_deps, &reg);
1203+
}

0 commit comments

Comments
 (0)