|
1 | 1 | from __future__ import annotations |
2 | 2 |
|
3 | 3 | import dataclasses |
| 4 | +import graphlib |
4 | 5 | import json |
5 | 6 | import logging |
6 | 7 | import pathlib |
| 8 | +import threading |
7 | 9 | import typing |
8 | 10 |
|
9 | 11 | from packaging.requirements import Requirement |
@@ -379,3 +381,137 @@ def _depth_first_traversal( |
379 | 381 | yield from self._depth_first_traversal( |
380 | 382 | edge.destination_node.children, visited, match_dep_types |
381 | 383 | ) |
| 384 | + |
| 385 | + |
| 386 | +class TrackingTopologicalSorter: |
| 387 | + """A thread-safe topological sorter that tracks nodes in progress |
| 388 | +
|
| 389 | + ``TopologicalSorter.get_ready()`` returns each node only once. The |
| 390 | + tracking topological sorter keeps track which nodes are marked as done. |
| 391 | + The ``get_available()`` method returns nodes again and again, until |
| 392 | + they are marked as done. The graph is active until all nodes are marked |
| 393 | + as done. |
| 394 | +
|
| 395 | + Individual nodes can be marked as exclusive nodes. ``get_available`` |
| 396 | + treats exclusive nodes special and returns: |
| 397 | +
|
| 398 | + 1. one or more non-exclusive nodes |
| 399 | + 2. exactly one exclusive node that is a predecessor of another node |
| 400 | + 3. exactly one exclusive node |
| 401 | +
|
| 402 | + The class uses a lock for ``is_active`, ``get_available`, and ``done``, |
| 403 | + so the methods can be used from threading pool and future callback. |
| 404 | + """ |
| 405 | + |
| 406 | + __slots__ = ( |
| 407 | + "_dep_nodes", |
| 408 | + "_exclusive_nodes", |
| 409 | + "_in_progress_nodes", |
| 410 | + "_lock", |
| 411 | + "_topo", |
| 412 | + ) |
| 413 | + |
| 414 | + def __init__( |
| 415 | + self, |
| 416 | + graph: typing.Mapping[DependencyNode, typing.Iterable[DependencyNode]] |
| 417 | + | None = None, |
| 418 | + ) -> None: |
| 419 | + self._topo: graphlib.TopologicalSorter[DependencyNode] = ( |
| 420 | + graphlib.TopologicalSorter() |
| 421 | + ) |
| 422 | + # set of nodes that are not done, yet |
| 423 | + self._in_progress_nodes: set[DependencyNode] = set() |
| 424 | + # set of nodes that are predecessors of other nodes |
| 425 | + self._dep_nodes: set[DependencyNode] = set() |
| 426 | + # dict of nodes -> priority; dependency: -1, leaf: +1 |
| 427 | + self._exclusive_nodes: dict[DependencyNode, int] = {} |
| 428 | + self._lock = threading.Lock() |
| 429 | + if graph is not None: |
| 430 | + for node, predecessors in graph.items(): |
| 431 | + self.add(node, *predecessors) |
| 432 | + |
| 433 | + @property |
| 434 | + def dependency_nodes(self) -> set[DependencyNode]: |
| 435 | + """Nodes that other nodes depend on""" |
| 436 | + return self._dep_nodes.copy() |
| 437 | + |
| 438 | + @property |
| 439 | + def exclusive_nodes(self) -> set[DependencyNode]: |
| 440 | + """Nodes that are marked as exclusive""" |
| 441 | + return set(self._exclusive_nodes) |
| 442 | + |
| 443 | + def add( |
| 444 | + self, |
| 445 | + node: DependencyNode, |
| 446 | + *predecessors: DependencyNode, |
| 447 | + exclusive: bool = False, |
| 448 | + ) -> None: |
| 449 | + """Add new node |
| 450 | +
|
| 451 | + Can be called multiple times for a node to add more predecessors or |
| 452 | + to mark a node as exclusive. Exclusive nodes cannot be unmarked. |
| 453 | + """ |
| 454 | + self._topo.add(node, *predecessors) |
| 455 | + self._dep_nodes.update(predecessors) |
| 456 | + if exclusive: |
| 457 | + self._exclusive_nodes[node] = 1 |
| 458 | + |
| 459 | + def prepare(self) -> None: |
| 460 | + """Prepare and check for cyclic dependencies""" |
| 461 | + self._topo.prepare() |
| 462 | + for node in self._exclusive_nodes: |
| 463 | + if node in self._dep_nodes: |
| 464 | + # give dependency nodes a higher priority |
| 465 | + self._exclusive_nodes[node] = -1 |
| 466 | + |
| 467 | + def is_active(self) -> bool: |
| 468 | + with self._lock: |
| 469 | + return bool(self._in_progress_nodes) or self._topo.is_active() |
| 470 | + |
| 471 | + def __bool__(self) -> bool: |
| 472 | + return self.is_active() |
| 473 | + |
| 474 | + def get_available(self) -> set[DependencyNode]: |
| 475 | + """Get available nodes |
| 476 | +
|
| 477 | + A node can be returned multiple times until it is marked as 'done'. |
| 478 | + """ |
| 479 | + with self._lock: |
| 480 | + # get ready nodes, update in progress nodes. |
| 481 | + ready = self._topo.get_ready() |
| 482 | + self._in_progress_nodes.update(ready) |
| 483 | + |
| 484 | + if not self._in_progress_nodes: |
| 485 | + # API misuse, user did not check "is_active" |
| 486 | + raise ValueError("topology is not active") |
| 487 | + |
| 488 | + # get and prefer non-exclusive nodes. Exclusive nodes are |
| 489 | + # 'heavy' nodes, that that a long time to build. Start with |
| 490 | + # 'light' nodes first. |
| 491 | + exclusive_nodes = self._exclusive_nodes |
| 492 | + non_exclusive = self._in_progress_nodes.difference(exclusive_nodes) |
| 493 | + if non_exclusive: |
| 494 | + # set.difference() returns a new set object |
| 495 | + return non_exclusive |
| 496 | + |
| 497 | + # return a single exclusive node, prefer nodes that are a |
| 498 | + # dependency of other nodes. |
| 499 | + exclusive = self._in_progress_nodes.intersection(exclusive_nodes) |
| 500 | + exclusive_list = sorted( |
| 501 | + exclusive, |
| 502 | + key=lambda node: (exclusive_nodes[node], node), |
| 503 | + ) |
| 504 | + return {exclusive_list[0]} |
| 505 | + |
| 506 | + def done(self, *nodes: DependencyNode) -> None: |
| 507 | + """Mark nodes as done""" |
| 508 | + with self._lock: |
| 509 | + self._in_progress_nodes.difference_update(nodes) |
| 510 | + self._topo.done(*nodes) |
| 511 | + |
| 512 | + def static_batches(self) -> typing.Iterable[set[DependencyNode]]: |
| 513 | + self.prepare() |
| 514 | + while self.is_active(): |
| 515 | + nodes = self.get_available() |
| 516 | + yield nodes |
| 517 | + self.done(*nodes) |
0 commit comments