@@ -171,12 +171,17 @@ def get_installation_order(
171
171
get installed one-by-one.
172
172
173
173
The current implementation creates a topological ordering of the
174
- dependency graph, while breaking any cycles in the graph at arbitrary
175
- points. We make no guarantees about where the cycle would be broken,
176
- other than they would be broken.
174
+ dependency graph, giving more weight to packages with less
175
+ or no dependencies, while breaking any cycles in the graph at
176
+ arbitrary points. We make no guarantees about where the cycle
177
+ would be broken, other than it *would* be broken.
177
178
"""
178
179
assert self ._result is not None , "must call resolve() first"
179
180
181
+ if not req_set .requirements :
182
+ # Nothing is left to install, so we do not need an order.
183
+ return []
184
+
180
185
graph = self ._result .graph
181
186
weights = get_topological_weights (
182
187
graph ,
@@ -199,13 +204,19 @@ def get_topological_weights(
199
204
This implementation may change at any point in the future without prior
200
205
notice.
201
206
202
- We take the length for the longest path to any node from root, ignoring any
203
- paths that contain a single node twice (i.e. cycles). This is done through
204
- a depth-first search through the graph, while keeping track of the path to
205
- the node.
207
+ We first simplify the dependency graph by pruning any leaves and giving them
208
+ the highest weight: a package without any dependencies should be installed
209
+ first. This is done again and again in the same way, giving ever less weight
210
+ to the newly found leaves. The loop stops when no leaves are left: all
211
+ remaining packages have at least one dependency left in the graph.
212
+
213
+ Then we continue with the remaining graph, by taking the length for the
214
+ longest path to any node from root, ignoring any paths that contain a single
215
+ node twice (i.e. cycles). This is done through a depth-first search through
216
+ the graph, while keeping track of the path to the node.
206
217
207
218
Cycles in the graph result would result in node being revisited while also
208
- being it's own path. In this case, take no action. This helps ensure we
219
+ being on its own path. In this case, take no action. This helps ensure we
209
220
don't get stuck in a cycle.
210
221
211
222
When assigning weight, the longer path (i.e. larger length) is preferred.
@@ -227,6 +238,34 @@ def visit(node: Optional[str]) -> None:
227
238
last_known_parent_count = weights .get (node , 0 )
228
239
weights [node ] = max (last_known_parent_count , len (path ))
229
240
241
+ # Simplify the graph, pruning leaves that have no dependencies.
242
+ # This is needed for large graphs (say over 200 packages) because the
243
+ # `visit` function is exponentially slower then, taking minutes.
244
+ # See https://github.com/pypa/pip/issues/10557
245
+ # We will loop until we explicitly break the loop.
246
+ while True :
247
+ leaves = set ()
248
+ for key in graph :
249
+ if key is None :
250
+ continue
251
+ for _child in graph .iter_children (key ):
252
+ # This means we have at least one child
253
+ break
254
+ else :
255
+ # No child.
256
+ leaves .add (key )
257
+ if not leaves :
258
+ # We are done simplifying.
259
+ break
260
+ # Calculate the weight for the leaves.
261
+ weight = len (graph ) - 1
262
+ for leaf in leaves :
263
+ weights [leaf ] = weight
264
+ # Remove the leaves from the graph, making it simpler.
265
+ for leaf in leaves :
266
+ graph .remove (leaf )
267
+
268
+ # Visit the remaining graph.
230
269
# `None` is guaranteed to be the root node by resolvelib.
231
270
visit (None )
232
271
0 commit comments