Auto merge of #5213 - Eh2406:faster_resolver, r=alexcrichton

bors · bors · commit 93b0b1253e7c · 2018-03-24T16:59:59.000Z
Faster resolver: use a inverse-index to not activate the causes of conflict This adds a test for #4810 (comment) with two extensions that make it harder. It is the last reproducible and in the wild exponentially slow resolution (that I have found). The problem in the test is `backtrack_trap0 = "*"` is a lot of ways of saying `constrained = ">=1.1.0, <=2.0.0"` but `constrained= "2.0.1"` is already picked. Only then to try and solve `constrained= "~1.0.0"` which is incompatible. Our parent knows that we have been poisoned, and wont try to activate us again. Because of the order we evaluate deps we end up backtracking to where `constrained: 1.1.0` is set instead of our parent. And so the poisoning does not help. This is harder then #4810 (comment) because: 1. Having multiple semver compatible versions of constrained in play makes for a lot more bookkeeping. Specifically bookkeeping I forgot when I first submitted this PR. 2. The problematic dependencies are added deep in a combinatorial explosion of possibilities. So if we don't correctly handle caching that `backtrack_trap0 = "*"` is doomed then we will never finish looking thru the different possibilities for `level0 = "*"` This PR also includes a proof of concept solution for the test, which proves that it does solve #4810 (comment). The added code is tricky to read. It also adds a `O(remaining_deps)` job to every activation on the happy path, slower if the `past_conflicting_activations` is not empty. I'd like some brainstorming on better solutions.
diff --git a/src/cargo/core/resolver/conflict_cache.rs b/src/cargo/core/resolver/conflict_cache.rs
@@ -0,0 +1,96 @@
+use std::collections::{HashMap, HashSet};
+
+use core::{Dependency, PackageId};
+use core::resolver::{ConflictReason, Context};
+
+pub(super) struct ConflictCache {
+    // `con_from_dep` is a cache of the reasons for each time we
+    // backtrack. For example after several backtracks we may have:
+    //
+    //  con_from_dep[`foo = "^1.0.2"`] = vec![
+    //      map!{`foo=1.0.1`: Semver},
+    //      map!{`foo=1.0.0`: Semver},
+    //  ];
+    //
+    // This can be read as "we cannot find a candidate for dep `foo = "^1.0.2"`
+    // if either `foo=1.0.1` OR `foo=1.0.0` are activated".
+    //
+    // Another example after several backtracks we may have:
+    //
+    //  con_from_dep[`foo = ">=0.8.2, <=0.9.3"`] = vec![
+    //      map!{`foo=0.8.1`: Semver, `foo=0.9.4`: Semver},
+    //  ];
+    //
+    // This can be read as "we cannot find a candidate for dep `foo = ">=0.8.2,
+    // <=0.9.3"` if both `foo=0.8.1` AND `foo=0.9.4` are activated".
+    //
+    // This is used to make sure we don't queue work we know will fail. See the
+    // discussion in https://github.com/rust-lang/cargo/pull/5168 for why this
+    // is so important, and there can probably be a better data structure here
+    // but for now this works well enough!
+    //
+    // Also, as a final note, this map is *not* ever removed from. This remains
+    // as a global cache which we never delete from. Any entry in this map is
+    // unconditionally true regardless of our resolution history of how we got
+    // here.
+    con_from_dep: HashMap<Dependency, Vec<HashMap<PackageId, ConflictReason>>>,
+    // `past_conflict_triggers` is an
+    // of `past_conflicting_activations`.
+    // For every `PackageId` this lists the `Dependency`s that mention it in `past_conflicting_activations`.
+    dep_from_pid: HashMap<PackageId, HashSet<Dependency>>,
+}
+
+impl ConflictCache {
+    pub fn new() -> ConflictCache {
+        ConflictCache {
+            con_from_dep: HashMap::new(),
+            dep_from_pid: HashMap::new(),
+        }
+    }
+    /// Finds any known set of conflicts, if any,
+    /// which are activated in `cx` and pass the `filter` specified?
+    pub fn find_conflicting<F>(
+        &self,
+        cx: &Context,
+        dep: &Dependency,
+        filter: F,
+    ) -> Option<&HashMap<PackageId, ConflictReason>>
+    where
+        for<'r> F: FnMut(&'r &HashMap<PackageId, ConflictReason>) -> bool,
+    {
+        self.con_from_dep
+            .get(dep)?
+            .iter()
+            .filter(filter)
+            .find(|conflicting| cx.is_conflicting(None, conflicting))
+    }
+    pub fn conflicting(
+        &self,
+        cx: &Context,
+        dep: &Dependency,
+    ) -> Option<&HashMap<PackageId, ConflictReason>> {
+        self.find_conflicting(cx, dep, |_| true)
+    }
+
+    /// Add to the cache a conflict of the form:
+    /// `dep` is known to be unresolvable if
+    /// all the `PackageId` entries are activated
+    pub fn insert(&mut self, dep: &Dependency, con: &HashMap<PackageId, ConflictReason>) {
+        let past = self.con_from_dep
+            .entry(dep.clone())
+            .or_insert_with(Vec::new);
+        if !past.contains(con) {
+            trace!("{} adding a skip {:?}", dep.name(), con);
+            past.push(con.clone());
+            for c in con.keys() {
+                self.dep_from_pid
+                    .entry(c.clone())
+                    .or_insert_with(HashSet::new)
+                    .insert(dep.clone());
+            }
+        }
+    }
+    pub fn dependencies_conflicting_with(&self, pid: &PackageId) -> Option<&HashSet<Dependency>> {
+        self.dep_from_pid.get(pid)
+    }
+}
diff --git a/src/cargo/core/resolver/mod.rs b/src/cargo/core/resolver/mod.rs
@@ -72,6 +72,7 @@ pub use self::encode::{EncodableDependency, EncodablePackageId, EncodableResolve
 pub use self::encode::{Metadata, WorkspaceResolve};
 
 mod encode;
+mod conflict_cache;
 
 /// Represents a fully resolved package dependency graph. Each node in the graph
 /// is a package and edges represent dependencies between packages.
@@ -594,6 +595,15 @@ impl DepsFrame {
             .map(|(_, (_, candidates, _))| candidates.len())
             .unwrap_or(0)
     }
+
+    fn flatten<'s>(&'s self) -> Box<Iterator<Item = (&PackageId, Dependency)> + 's> {
+        // TODO: with impl Trait the Box can be removed
+        Box::new(
+            self.remaining_siblings
+                .clone()
+                .map(move |(_, (d, _, _))| (self.parent.package_id(), d)),
+        )
+    }
 }
 
 impl PartialEq for DepsFrame {
@@ -974,39 +984,9 @@ fn activate_deps_loop(
     let mut backtrack_stack = Vec::new();
     let mut remaining_deps = BinaryHeap::new();
 
-    // `past_conflicting_activations`is a cache of the reasons for each time we
-    // backtrack. For example after several backtracks we may have:
-    //
-    //  past_conflicting_activations[`foo = "^1.0.2"`] = vec![
-    //      map!{`foo=1.0.1`: Semver},
-    //      map!{`foo=1.0.0`: Semver},
-    //  ];
-    //
-    // This can be read as "we cannot find a candidate for dep `foo = "^1.0.2"`
-    // if either `foo=1.0.1` OR `foo=1.0.0` are activated".
-    //
-    // Another example after several backtracks we may have:
-    //
-    //  past_conflicting_activations[`foo = ">=0.8.2, <=0.9.3"`] = vec![
-    //      map!{`foo=0.8.1`: Semver, `foo=0.9.4`: Semver},
-    //  ];
-    //
-    // This can be read as "we cannot find a candidate for dep `foo = ">=0.8.2,
-    // <=0.9.3"` if both `foo=0.8.1` AND `foo=0.9.4` are activated".
-    //
-    // This is used to make sure we don't queue work we know will fail. See the
-    // discussion in https://github.com/rust-lang/cargo/pull/5168 for why this
-    // is so important, and there can probably be a better data structure here
-    // but for now this works well enough!
-    //
-    // Also, as a final note, this map is *not* ever removed from. This remains
-    // as a global cache which we never delete from. Any entry in this map is
-    // unconditionally true regardless of our resolution history of how we got
-    // here.
-    let mut past_conflicting_activations: HashMap<
-        Dependency,
-        Vec<HashMap<PackageId, ConflictReason>>,
-    > = HashMap::new();
+    // `past_conflicting_activations` is a cache of the reasons for each time we
+    // backtrack.
+    let mut past_conflicting_activations = conflict_cache::ConflictCache::new();
 
     // Activate all the initial summaries to kick off some work.
     for &(ref summary, ref method) in summaries {
@@ -1096,12 +1076,7 @@ fn activate_deps_loop(
 
         let just_here_for_the_error_messages = just_here_for_the_error_messages
             && past_conflicting_activations
-                .get(&dep)
-                .and_then(|past_bad| {
-                    past_bad
-                        .iter()
-                        .find(|conflicting| cx.is_conflicting(None, conflicting))
-                })
+                .conflicting(&cx, &dep)
                 .is_some();
 
         let mut remaining_candidates = RemainingCandidates::new(&candidates);
@@ -1153,19 +1128,7 @@ fn activate_deps_loop(
                 // local is set to `true` then our `conflicting_activations` may
                 // not be right, so we can't push into our global cache.
                 if !just_here_for_the_error_messages && !backtracked {
-                    let past = past_conflicting_activations
-                        .entry(dep.clone())
-                        .or_insert_with(Vec::new);
-                    if !past.contains(&conflicting_activations) {
-                        trace!(
-                            "{}[{}]>{} adding a skip {:?}",
-                            parent.name(),
-                            cur,
-                            dep.name(),
-                            conflicting_activations
-                        );
-                        past.push(conflicting_activations.clone());
-                    }
+                    past_conflicting_activations.insert(&dep, &conflicting_activations);
                 }
 
                 match find_candidate(&mut backtrack_stack, &parent, &conflicting_activations) {
@@ -1270,16 +1233,76 @@ fn activate_deps_loop(
                         if let Some(conflicting) = frame
                             .remaining_siblings
                             .clone()
-                            .filter_map(|(_, (new_dep, _, _))| {
-                                past_conflicting_activations.get(&new_dep)
+                            .filter_map(|(_, (ref new_dep, _, _))| {
+                                past_conflicting_activations.conflicting(&cx, &new_dep)
                             })
-                            .flat_map(|x| x)
-                            .find(|con| cx.is_conflicting(None, con))
+                            .next()
                         {
-                            conflicting_activations.extend(conflicting.clone());
+                            // If one of our deps is known unresolvable
+                            // then we will not succeed.
+                            // How ever if we are part of the reason that
+                            // one of our deps conflicts then
+                            // we can make a stronger statement
+                            // because we will definitely be activated when
+                            // we try our dep.
+                            conflicting_activations.extend(
+                                conflicting
+                                    .iter()
+                                    .filter(|&(p, _)| p != &pid)
+                                    .map(|(p, r)| (p.clone(), r.clone())),
+                            );
+
                             has_past_conflicting_dep = true;
                         }
                     }
+                    // If any of `remaining_deps` are known unresolvable with
+                    // us activated, then we extend our own set of
+                    // conflicting activations with theirs and its parent. We can do this
+                    // because the set of conflicts we found implies the
+                    // dependency can't be activated which implies that we
+                    // ourselves are incompatible with that dep, so we know that deps
+                    // parent conflict with us.
+                    if !has_past_conflicting_dep {
+                        if let Some(known_related_bad_deps) =
+                            past_conflicting_activations.dependencies_conflicting_with(&pid)
+                        {
+                            if let Some((other_parent, conflict)) = remaining_deps
+                                .iter()
+                                .flat_map(|other| other.flatten())
+                                // for deps related to us
+                                .filter(|&(_, ref other_dep)|
+                                        known_related_bad_deps.contains(other_dep))
+                                .filter_map(|(other_parent, other_dep)| {
+                                    past_conflicting_activations
+                                        .find_conflicting(
+                                            &cx,
+                                            &other_dep,
+                                            |con| con.contains_key(&pid)
+                                        )
+                                        .map(|con| (other_parent, con))
+                                })
+                                .next()
+                            {
+                                let rel = conflict.get(&pid).unwrap().clone();
+
+                                // The conflict we found is
+                                // "other dep will not succeed if we are activated."
+                                // We want to add
+                                // "our dep will not succeed if other dep is in remaining_deps"
+                                // but that is not how the cache is set up.
+                                // So we add the less general but much faster,
+                                // "our dep will not succeed if other dep's parent is activated".
+                                conflicting_activations.extend(
+                                    conflict
+                                        .iter()
+                                        .filter(|&(p, _)| p != &pid)
+                                        .map(|(p, r)| (p.clone(), r.clone())),
+                                );
+                                conflicting_activations.insert(other_parent.clone(), rel);
+                                has_past_conflicting_dep = true;
+                            }
+                        }
+                    }
 
                     // Ok if we're in a "known failure" state for this frame we
                     // may want to skip it altogether though. We don't want to
diff --git a/tests/testsuite/resolve.rs b/tests/testsuite/resolve.rs
@@ -757,6 +757,99 @@ fn resolving_with_deep_traps() {
     assert!(res.is_err());
 }
 
+#[test]
+fn resolving_with_constrained_cousins_backtrack() {
+    let mut reglist = Vec::new();
+
+    const DEPTH: usize = 100;
+    const BRANCHING_FACTOR: usize = 50;
+
+    // Each backtrack_trap depends on the next.
+    // The last depends on a specific ver of constrained.
+    for l in 0..DEPTH {
+        let name = format!("backtrack_trap{}", l);
+        let next = format!("backtrack_trap{}", l + 1);
+        for i in 1..BRANCHING_FACTOR {
+            let vsn = format!("1.0.{}", i);
+            reglist.push(pkg!((name.as_str(), vsn.as_str()) => [dep_req(next.as_str(), "*")]));
+        }
+    }
+    {
+        let name = format!("backtrack_trap{}", DEPTH);
+        for i in 1..BRANCHING_FACTOR {
+            let vsn = format!("1.0.{}", i);
+            reglist.push(
+                pkg!((name.as_str(), vsn.as_str()) => [dep_req("constrained", ">=1.1.0, <=2.0.0")]),
+            );
+        }
+    }
+    {
+        // slightly less constrained to make sure `constrained` gets picked last.
+        for i in 0..(BRANCHING_FACTOR + 10) {
+            let vsn = format!("1.0.{}", i);
+            reglist.push(pkg!(("constrained", vsn.as_str())));
+        }
+        reglist.push(pkg!(("constrained", "1.1.0")));
+        reglist.push(pkg!(("constrained", "2.0.0")));
+        reglist.push(pkg!(("constrained", "2.0.1")));
+    }
+    reglist.push(pkg!(("cloaking", "1.0.0") => [dep_req("constrained", "~1.0.0")]));
+
+    let reg = registry(reglist.clone());
+
+    // `backtrack_trap0 = "*"` is a lot of ways of saying `constrained = ">=1.1.0, <=2.0.0"`
+    // but `constrained= "2.0.1"` is already picked.
+    // Only then to try and solve `constrained= "~1.0.0"` which is incompatible.
+    let res = resolve(
+        &pkg_id("root"),
+        vec![
+            dep_req("backtrack_trap0", "*"),
+            dep_req("constrained", "2.0.1"),
+            dep_req("cloaking", "*"),
+        ],
+        &reg,
+    );
+
+    assert!(res.is_err());
+
+    // Each level depends on the next but the last depends on incompatible deps.
+    // Let's make sure that we can cache that a dep has incompatible deps.
+    for l in 0..DEPTH {
+        let name = format!("level{}", l);
+        let next = format!("level{}", l + 1);
+        for i in 1..BRANCHING_FACTOR {
+            let vsn = format!("1.0.{}", i);
+            reglist.push(pkg!((name.as_str(), vsn.as_str()) => [dep_req(next.as_str(), "*")]));
+        }
+    }
+    reglist.push(
+        pkg!((format!("level{}", DEPTH).as_str(), "1.0.0") => [dep_req("backtrack_trap0", "*"),
+            dep_req("cloaking", "*")
+        ]),
+    );
+
+    let reg = registry(reglist.clone());
+
+    let res = resolve(
+        &pkg_id("root"),
+        vec![dep_req("level0", "*"), dep_req("constrained", "2.0.1")],
+        &reg,
+    );
+
+    assert!(res.is_err());
+
+    let res = resolve(
+        &pkg_id("root"),
+        vec![dep_req("level0", "*"), dep_req("constrained", "2.0.0")],
+        &reg,
+    ).unwrap();
+
+    assert_that(
+        &res,
+        contains(names(&[("constrained", "2.0.0"), ("cloaking", "1.0.0")])),
+    );
+}
+
 #[test]
 fn resolving_with_constrained_sibling_backtrack_activation() {
     // It makes sense to resolve most-constrained deps first, but