Skip to content

Commit 93b0b12

Browse files
committed
Auto merge of #5213 - Eh2406:faster_resolver, r=alexcrichton
Faster resolver: use a inverse-index to not activate the causes of conflict This adds a test for #4810 (comment) with two extensions that make it harder. It is the last reproducible and in the wild exponentially slow resolution (that I have found). The problem in the test is `backtrack_trap0 = "*"` is a lot of ways of saying `constrained = ">=1.1.0, <=2.0.0"` but `constrained= "2.0.1"` is already picked. Only then to try and solve `constrained= "~1.0.0"` which is incompatible. Our parent knows that we have been poisoned, and wont try to activate us again. Because of the order we evaluate deps we end up backtracking to where `constrained: 1.1.0` is set instead of our parent. And so the poisoning does not help. This is harder then #4810 (comment) because: 1. Having multiple semver compatible versions of constrained in play makes for a lot more bookkeeping. Specifically bookkeeping I forgot when I first submitted this PR. 2. The problematic dependencies are added deep in a combinatorial explosion of possibilities. So if we don't correctly handle caching that `backtrack_trap0 = "*"` is doomed then we will never finish looking thru the different possibilities for `level0 = "*"` This PR also includes a proof of concept solution for the test, which proves that it does solve #4810 (comment). The added code is tricky to read. It also adds a `O(remaining_deps)` job to every activation on the happy path, slower if the `past_conflicting_activations` is not empty. I'd like some brainstorming on better solutions.
2 parents bcd0300 + a7a1341 commit 93b0b12

File tree

3 files changed

+269
-57
lines changed

3 files changed

+269
-57
lines changed
+96
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
use std::collections::{HashMap, HashSet};
2+
3+
use core::{Dependency, PackageId};
4+
use core::resolver::{ConflictReason, Context};
5+
6+
pub(super) struct ConflictCache {
7+
// `con_from_dep` is a cache of the reasons for each time we
8+
// backtrack. For example after several backtracks we may have:
9+
//
10+
// con_from_dep[`foo = "^1.0.2"`] = vec![
11+
// map!{`foo=1.0.1`: Semver},
12+
// map!{`foo=1.0.0`: Semver},
13+
// ];
14+
//
15+
// This can be read as "we cannot find a candidate for dep `foo = "^1.0.2"`
16+
// if either `foo=1.0.1` OR `foo=1.0.0` are activated".
17+
//
18+
// Another example after several backtracks we may have:
19+
//
20+
// con_from_dep[`foo = ">=0.8.2, <=0.9.3"`] = vec![
21+
// map!{`foo=0.8.1`: Semver, `foo=0.9.4`: Semver},
22+
// ];
23+
//
24+
// This can be read as "we cannot find a candidate for dep `foo = ">=0.8.2,
25+
// <=0.9.3"` if both `foo=0.8.1` AND `foo=0.9.4` are activated".
26+
//
27+
// This is used to make sure we don't queue work we know will fail. See the
28+
// discussion in https://github.com/rust-lang/cargo/pull/5168 for why this
29+
// is so important, and there can probably be a better data structure here
30+
// but for now this works well enough!
31+
//
32+
// Also, as a final note, this map is *not* ever removed from. This remains
33+
// as a global cache which we never delete from. Any entry in this map is
34+
// unconditionally true regardless of our resolution history of how we got
35+
// here.
36+
con_from_dep: HashMap<Dependency, Vec<HashMap<PackageId, ConflictReason>>>,
37+
// `past_conflict_triggers` is an
38+
// of `past_conflicting_activations`.
39+
// For every `PackageId` this lists the `Dependency`s that mention it in `past_conflicting_activations`.
40+
dep_from_pid: HashMap<PackageId, HashSet<Dependency>>,
41+
}
42+
43+
impl ConflictCache {
44+
pub fn new() -> ConflictCache {
45+
ConflictCache {
46+
con_from_dep: HashMap::new(),
47+
dep_from_pid: HashMap::new(),
48+
}
49+
}
50+
/// Finds any known set of conflicts, if any,
51+
/// which are activated in `cx` and pass the `filter` specified?
52+
pub fn find_conflicting<F>(
53+
&self,
54+
cx: &Context,
55+
dep: &Dependency,
56+
filter: F,
57+
) -> Option<&HashMap<PackageId, ConflictReason>>
58+
where
59+
for<'r> F: FnMut(&'r &HashMap<PackageId, ConflictReason>) -> bool,
60+
{
61+
self.con_from_dep
62+
.get(dep)?
63+
.iter()
64+
.filter(filter)
65+
.find(|conflicting| cx.is_conflicting(None, conflicting))
66+
}
67+
pub fn conflicting(
68+
&self,
69+
cx: &Context,
70+
dep: &Dependency,
71+
) -> Option<&HashMap<PackageId, ConflictReason>> {
72+
self.find_conflicting(cx, dep, |_| true)
73+
}
74+
75+
/// Add to the cache a conflict of the form:
76+
/// `dep` is known to be unresolvable if
77+
/// all the `PackageId` entries are activated
78+
pub fn insert(&mut self, dep: &Dependency, con: &HashMap<PackageId, ConflictReason>) {
79+
let past = self.con_from_dep
80+
.entry(dep.clone())
81+
.or_insert_with(Vec::new);
82+
if !past.contains(con) {
83+
trace!("{} adding a skip {:?}", dep.name(), con);
84+
past.push(con.clone());
85+
for c in con.keys() {
86+
self.dep_from_pid
87+
.entry(c.clone())
88+
.or_insert_with(HashSet::new)
89+
.insert(dep.clone());
90+
}
91+
}
92+
}
93+
pub fn dependencies_conflicting_with(&self, pid: &PackageId) -> Option<&HashSet<Dependency>> {
94+
self.dep_from_pid.get(pid)
95+
}
96+
}

src/cargo/core/resolver/mod.rs

+80-57
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ pub use self::encode::{EncodableDependency, EncodablePackageId, EncodableResolve
7272
pub use self::encode::{Metadata, WorkspaceResolve};
7373

7474
mod encode;
75+
mod conflict_cache;
7576

7677
/// Represents a fully resolved package dependency graph. Each node in the graph
7778
/// is a package and edges represent dependencies between packages.
@@ -594,6 +595,15 @@ impl DepsFrame {
594595
.map(|(_, (_, candidates, _))| candidates.len())
595596
.unwrap_or(0)
596597
}
598+
599+
fn flatten<'s>(&'s self) -> Box<Iterator<Item = (&PackageId, Dependency)> + 's> {
600+
// TODO: with impl Trait the Box can be removed
601+
Box::new(
602+
self.remaining_siblings
603+
.clone()
604+
.map(move |(_, (d, _, _))| (self.parent.package_id(), d)),
605+
)
606+
}
597607
}
598608

599609
impl PartialEq for DepsFrame {
@@ -974,39 +984,9 @@ fn activate_deps_loop(
974984
let mut backtrack_stack = Vec::new();
975985
let mut remaining_deps = BinaryHeap::new();
976986

977-
// `past_conflicting_activations`is a cache of the reasons for each time we
978-
// backtrack. For example after several backtracks we may have:
979-
//
980-
// past_conflicting_activations[`foo = "^1.0.2"`] = vec![
981-
// map!{`foo=1.0.1`: Semver},
982-
// map!{`foo=1.0.0`: Semver},
983-
// ];
984-
//
985-
// This can be read as "we cannot find a candidate for dep `foo = "^1.0.2"`
986-
// if either `foo=1.0.1` OR `foo=1.0.0` are activated".
987-
//
988-
// Another example after several backtracks we may have:
989-
//
990-
// past_conflicting_activations[`foo = ">=0.8.2, <=0.9.3"`] = vec![
991-
// map!{`foo=0.8.1`: Semver, `foo=0.9.4`: Semver},
992-
// ];
993-
//
994-
// This can be read as "we cannot find a candidate for dep `foo = ">=0.8.2,
995-
// <=0.9.3"` if both `foo=0.8.1` AND `foo=0.9.4` are activated".
996-
//
997-
// This is used to make sure we don't queue work we know will fail. See the
998-
// discussion in https://github.com/rust-lang/cargo/pull/5168 for why this
999-
// is so important, and there can probably be a better data structure here
1000-
// but for now this works well enough!
1001-
//
1002-
// Also, as a final note, this map is *not* ever removed from. This remains
1003-
// as a global cache which we never delete from. Any entry in this map is
1004-
// unconditionally true regardless of our resolution history of how we got
1005-
// here.
1006-
let mut past_conflicting_activations: HashMap<
1007-
Dependency,
1008-
Vec<HashMap<PackageId, ConflictReason>>,
1009-
> = HashMap::new();
987+
// `past_conflicting_activations` is a cache of the reasons for each time we
988+
// backtrack.
989+
let mut past_conflicting_activations = conflict_cache::ConflictCache::new();
1010990

1011991
// Activate all the initial summaries to kick off some work.
1012992
for &(ref summary, ref method) in summaries {
@@ -1096,12 +1076,7 @@ fn activate_deps_loop(
10961076

10971077
let just_here_for_the_error_messages = just_here_for_the_error_messages
10981078
&& past_conflicting_activations
1099-
.get(&dep)
1100-
.and_then(|past_bad| {
1101-
past_bad
1102-
.iter()
1103-
.find(|conflicting| cx.is_conflicting(None, conflicting))
1104-
})
1079+
.conflicting(&cx, &dep)
11051080
.is_some();
11061081

11071082
let mut remaining_candidates = RemainingCandidates::new(&candidates);
@@ -1153,19 +1128,7 @@ fn activate_deps_loop(
11531128
// local is set to `true` then our `conflicting_activations` may
11541129
// not be right, so we can't push into our global cache.
11551130
if !just_here_for_the_error_messages && !backtracked {
1156-
let past = past_conflicting_activations
1157-
.entry(dep.clone())
1158-
.or_insert_with(Vec::new);
1159-
if !past.contains(&conflicting_activations) {
1160-
trace!(
1161-
"{}[{}]>{} adding a skip {:?}",
1162-
parent.name(),
1163-
cur,
1164-
dep.name(),
1165-
conflicting_activations
1166-
);
1167-
past.push(conflicting_activations.clone());
1168-
}
1131+
past_conflicting_activations.insert(&dep, &conflicting_activations);
11691132
}
11701133

11711134
match find_candidate(&mut backtrack_stack, &parent, &conflicting_activations) {
@@ -1270,16 +1233,76 @@ fn activate_deps_loop(
12701233
if let Some(conflicting) = frame
12711234
.remaining_siblings
12721235
.clone()
1273-
.filter_map(|(_, (new_dep, _, _))| {
1274-
past_conflicting_activations.get(&new_dep)
1236+
.filter_map(|(_, (ref new_dep, _, _))| {
1237+
past_conflicting_activations.conflicting(&cx, &new_dep)
12751238
})
1276-
.flat_map(|x| x)
1277-
.find(|con| cx.is_conflicting(None, con))
1239+
.next()
12781240
{
1279-
conflicting_activations.extend(conflicting.clone());
1241+
// If one of our deps is known unresolvable
1242+
// then we will not succeed.
1243+
// How ever if we are part of the reason that
1244+
// one of our deps conflicts then
1245+
// we can make a stronger statement
1246+
// because we will definitely be activated when
1247+
// we try our dep.
1248+
conflicting_activations.extend(
1249+
conflicting
1250+
.iter()
1251+
.filter(|&(p, _)| p != &pid)
1252+
.map(|(p, r)| (p.clone(), r.clone())),
1253+
);
1254+
12801255
has_past_conflicting_dep = true;
12811256
}
12821257
}
1258+
// If any of `remaining_deps` are known unresolvable with
1259+
// us activated, then we extend our own set of
1260+
// conflicting activations with theirs and its parent. We can do this
1261+
// because the set of conflicts we found implies the
1262+
// dependency can't be activated which implies that we
1263+
// ourselves are incompatible with that dep, so we know that deps
1264+
// parent conflict with us.
1265+
if !has_past_conflicting_dep {
1266+
if let Some(known_related_bad_deps) =
1267+
past_conflicting_activations.dependencies_conflicting_with(&pid)
1268+
{
1269+
if let Some((other_parent, conflict)) = remaining_deps
1270+
.iter()
1271+
.flat_map(|other| other.flatten())
1272+
// for deps related to us
1273+
.filter(|&(_, ref other_dep)|
1274+
known_related_bad_deps.contains(other_dep))
1275+
.filter_map(|(other_parent, other_dep)| {
1276+
past_conflicting_activations
1277+
.find_conflicting(
1278+
&cx,
1279+
&other_dep,
1280+
|con| con.contains_key(&pid)
1281+
)
1282+
.map(|con| (other_parent, con))
1283+
})
1284+
.next()
1285+
{
1286+
let rel = conflict.get(&pid).unwrap().clone();
1287+
1288+
// The conflict we found is
1289+
// "other dep will not succeed if we are activated."
1290+
// We want to add
1291+
// "our dep will not succeed if other dep is in remaining_deps"
1292+
// but that is not how the cache is set up.
1293+
// So we add the less general but much faster,
1294+
// "our dep will not succeed if other dep's parent is activated".
1295+
conflicting_activations.extend(
1296+
conflict
1297+
.iter()
1298+
.filter(|&(p, _)| p != &pid)
1299+
.map(|(p, r)| (p.clone(), r.clone())),
1300+
);
1301+
conflicting_activations.insert(other_parent.clone(), rel);
1302+
has_past_conflicting_dep = true;
1303+
}
1304+
}
1305+
}
12831306

12841307
// Ok if we're in a "known failure" state for this frame we
12851308
// may want to skip it altogether though. We don't want to

tests/testsuite/resolve.rs

+93
Original file line numberDiff line numberDiff line change
@@ -757,6 +757,99 @@ fn resolving_with_deep_traps() {
757757
assert!(res.is_err());
758758
}
759759

760+
#[test]
761+
fn resolving_with_constrained_cousins_backtrack() {
762+
let mut reglist = Vec::new();
763+
764+
const DEPTH: usize = 100;
765+
const BRANCHING_FACTOR: usize = 50;
766+
767+
// Each backtrack_trap depends on the next.
768+
// The last depends on a specific ver of constrained.
769+
for l in 0..DEPTH {
770+
let name = format!("backtrack_trap{}", l);
771+
let next = format!("backtrack_trap{}", l + 1);
772+
for i in 1..BRANCHING_FACTOR {
773+
let vsn = format!("1.0.{}", i);
774+
reglist.push(pkg!((name.as_str(), vsn.as_str()) => [dep_req(next.as_str(), "*")]));
775+
}
776+
}
777+
{
778+
let name = format!("backtrack_trap{}", DEPTH);
779+
for i in 1..BRANCHING_FACTOR {
780+
let vsn = format!("1.0.{}", i);
781+
reglist.push(
782+
pkg!((name.as_str(), vsn.as_str()) => [dep_req("constrained", ">=1.1.0, <=2.0.0")]),
783+
);
784+
}
785+
}
786+
{
787+
// slightly less constrained to make sure `constrained` gets picked last.
788+
for i in 0..(BRANCHING_FACTOR + 10) {
789+
let vsn = format!("1.0.{}", i);
790+
reglist.push(pkg!(("constrained", vsn.as_str())));
791+
}
792+
reglist.push(pkg!(("constrained", "1.1.0")));
793+
reglist.push(pkg!(("constrained", "2.0.0")));
794+
reglist.push(pkg!(("constrained", "2.0.1")));
795+
}
796+
reglist.push(pkg!(("cloaking", "1.0.0") => [dep_req("constrained", "~1.0.0")]));
797+
798+
let reg = registry(reglist.clone());
799+
800+
// `backtrack_trap0 = "*"` is a lot of ways of saying `constrained = ">=1.1.0, <=2.0.0"`
801+
// but `constrained= "2.0.1"` is already picked.
802+
// Only then to try and solve `constrained= "~1.0.0"` which is incompatible.
803+
let res = resolve(
804+
&pkg_id("root"),
805+
vec![
806+
dep_req("backtrack_trap0", "*"),
807+
dep_req("constrained", "2.0.1"),
808+
dep_req("cloaking", "*"),
809+
],
810+
&reg,
811+
);
812+
813+
assert!(res.is_err());
814+
815+
// Each level depends on the next but the last depends on incompatible deps.
816+
// Let's make sure that we can cache that a dep has incompatible deps.
817+
for l in 0..DEPTH {
818+
let name = format!("level{}", l);
819+
let next = format!("level{}", l + 1);
820+
for i in 1..BRANCHING_FACTOR {
821+
let vsn = format!("1.0.{}", i);
822+
reglist.push(pkg!((name.as_str(), vsn.as_str()) => [dep_req(next.as_str(), "*")]));
823+
}
824+
}
825+
reglist.push(
826+
pkg!((format!("level{}", DEPTH).as_str(), "1.0.0") => [dep_req("backtrack_trap0", "*"),
827+
dep_req("cloaking", "*")
828+
]),
829+
);
830+
831+
let reg = registry(reglist.clone());
832+
833+
let res = resolve(
834+
&pkg_id("root"),
835+
vec![dep_req("level0", "*"), dep_req("constrained", "2.0.1")],
836+
&reg,
837+
);
838+
839+
assert!(res.is_err());
840+
841+
let res = resolve(
842+
&pkg_id("root"),
843+
vec![dep_req("level0", "*"), dep_req("constrained", "2.0.0")],
844+
&reg,
845+
).unwrap();
846+
847+
assert_that(
848+
&res,
849+
contains(names(&[("constrained", "2.0.0"), ("cloaking", "1.0.0")])),
850+
);
851+
}
852+
760853
#[test]
761854
fn resolving_with_constrained_sibling_backtrack_activation() {
762855
// It makes sense to resolve most-constrained deps first, but

0 commit comments

Comments
 (0)