mapped_queue.py 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298
  1. """Priority queue class with updatable priorities.
  2. """
  3. import heapq
  4. __all__ = ["MappedQueue"]
  5. class _HeapElement:
  6. """This proxy class separates the heap element from its priority.
  7. The idea is that using a 2-tuple (priority, element) works
  8. for sorting, but not for dict lookup because priorities are
  9. often floating point values so round-off can mess up equality.
  10. So, we need inequalities to look at the priority (for sorting)
  11. and equality (and hash) to look at the element to enable
  12. updates to the priority.
  13. Unfortunately, this class can be tricky to work with if you forget that
  14. `__lt__` compares the priority while `__eq__` compares the element.
  15. In `greedy_modularity_communities()` the following code is
  16. used to check that two _HeapElements differ in either element or priority:
  17. if d_oldmax != row_max or d_oldmax.priority != row_max.priority:
  18. If the priorities are the same, this implementation uses the element
  19. as a tiebreaker. This provides compatibility with older systems that
  20. use tuples to combine priority and elements.
  21. """
  22. __slots__ = ["priority", "element", "_hash"]
  23. def __init__(self, priority, element):
  24. self.priority = priority
  25. self.element = element
  26. self._hash = hash(element)
  27. def __lt__(self, other):
  28. try:
  29. other_priority = other.priority
  30. except AttributeError:
  31. return self.priority < other
  32. # assume comparing to another _HeapElement
  33. if self.priority == other_priority:
  34. try:
  35. return self.element < other.element
  36. except TypeError as err:
  37. raise TypeError(
  38. "Consider using a tuple, with a priority value that can be compared."
  39. )
  40. return self.priority < other_priority
  41. def __gt__(self, other):
  42. try:
  43. other_priority = other.priority
  44. except AttributeError:
  45. return self.priority > other
  46. # assume comparing to another _HeapElement
  47. if self.priority == other_priority:
  48. try:
  49. return self.element > other.element
  50. except TypeError as err:
  51. raise TypeError(
  52. "Consider using a tuple, with a priority value that can be compared."
  53. )
  54. return self.priority > other_priority
  55. def __eq__(self, other):
  56. try:
  57. return self.element == other.element
  58. except AttributeError:
  59. return self.element == other
  60. def __hash__(self):
  61. return self._hash
  62. def __getitem__(self, indx):
  63. return self.priority if indx == 0 else self.element[indx - 1]
  64. def __iter__(self):
  65. yield self.priority
  66. try:
  67. yield from self.element
  68. except TypeError:
  69. yield self.element
  70. def __repr__(self):
  71. return f"_HeapElement({self.priority}, {self.element})"
  72. class MappedQueue:
  73. """The MappedQueue class implements a min-heap with removal and update-priority.
  74. The min heap uses heapq as well as custom written _siftup and _siftdown
  75. methods to allow the heap positions to be tracked by an additional dict
  76. keyed by element to position. The smallest element can be popped in O(1) time,
  77. new elements can be pushed in O(log n) time, and any element can be removed
  78. or updated in O(log n) time. The queue cannot contain duplicate elements
  79. and an attempt to push an element already in the queue will have no effect.
  80. MappedQueue complements the heapq package from the python standard
  81. library. While MappedQueue is designed for maximum compatibility with
  82. heapq, it adds element removal, lookup, and priority update.
  83. Parameters
  84. ----------
  85. data : dict or iterable
  86. Examples
  87. --------
  88. A `MappedQueue` can be created empty, or optionally, given a dictionary
  89. of initial elements and priorities. The methods `push`, `pop`,
  90. `remove`, and `update` operate on the queue.
  91. >>> colors_nm = {'red':665, 'blue': 470, 'green': 550}
  92. >>> q = MappedQueue(colors_nm)
  93. >>> q.remove('red')
  94. >>> q.update('green', 'violet', 400)
  95. >>> q.push('indigo', 425)
  96. True
  97. >>> [q.pop().element for i in range(len(q.heap))]
  98. ['violet', 'indigo', 'blue']
  99. A `MappedQueue` can also be initialized with a list or other iterable. The priority is assumed
  100. to be the sort order of the items in the list.
  101. >>> q = MappedQueue([916, 50, 4609, 493, 237])
  102. >>> q.remove(493)
  103. >>> q.update(237, 1117)
  104. >>> [q.pop() for i in range(len(q.heap))]
  105. [50, 916, 1117, 4609]
  106. An exception is raised if the elements are not comparable.
  107. >>> q = MappedQueue([100, 'a'])
  108. Traceback (most recent call last):
  109. ...
  110. TypeError: '<' not supported between instances of 'int' and 'str'
  111. To avoid the exception, use a dictionary to assign priorities to the elements.
  112. >>> q = MappedQueue({100: 0, 'a': 1 })
  113. References
  114. ----------
  115. .. [1] Cormen, T. H., Leiserson, C. E., Rivest, R. L., & Stein, C. (2001).
  116. Introduction to algorithms second edition.
  117. .. [2] Knuth, D. E. (1997). The art of computer programming (Vol. 3).
  118. Pearson Education.
  119. """
  120. def __init__(self, data=None):
  121. """Priority queue class with updatable priorities."""
  122. if data is None:
  123. self.heap = []
  124. elif isinstance(data, dict):
  125. self.heap = [_HeapElement(v, k) for k, v in data.items()]
  126. else:
  127. self.heap = list(data)
  128. self.position = {}
  129. self._heapify()
  130. def _heapify(self):
  131. """Restore heap invariant and recalculate map."""
  132. heapq.heapify(self.heap)
  133. self.position = {elt: pos for pos, elt in enumerate(self.heap)}
  134. if len(self.heap) != len(self.position):
  135. raise AssertionError("Heap contains duplicate elements")
  136. def __len__(self):
  137. return len(self.heap)
  138. def push(self, elt, priority=None):
  139. """Add an element to the queue."""
  140. if priority is not None:
  141. elt = _HeapElement(priority, elt)
  142. # If element is already in queue, do nothing
  143. if elt in self.position:
  144. return False
  145. # Add element to heap and dict
  146. pos = len(self.heap)
  147. self.heap.append(elt)
  148. self.position[elt] = pos
  149. # Restore invariant by sifting down
  150. self._siftdown(0, pos)
  151. return True
  152. def pop(self):
  153. """Remove and return the smallest element in the queue."""
  154. # Remove smallest element
  155. elt = self.heap[0]
  156. del self.position[elt]
  157. # If elt is last item, remove and return
  158. if len(self.heap) == 1:
  159. self.heap.pop()
  160. return elt
  161. # Replace root with last element
  162. last = self.heap.pop()
  163. self.heap[0] = last
  164. self.position[last] = 0
  165. # Restore invariant by sifting up
  166. self._siftup(0)
  167. # Return smallest element
  168. return elt
  169. def update(self, elt, new, priority=None):
  170. """Replace an element in the queue with a new one."""
  171. if priority is not None:
  172. new = _HeapElement(priority, new)
  173. # Replace
  174. pos = self.position[elt]
  175. self.heap[pos] = new
  176. del self.position[elt]
  177. self.position[new] = pos
  178. # Restore invariant by sifting up
  179. self._siftup(pos)
  180. def remove(self, elt):
  181. """Remove an element from the queue."""
  182. # Find and remove element
  183. try:
  184. pos = self.position[elt]
  185. del self.position[elt]
  186. except KeyError:
  187. # Not in queue
  188. raise
  189. # If elt is last item, remove and return
  190. if pos == len(self.heap) - 1:
  191. self.heap.pop()
  192. return
  193. # Replace elt with last element
  194. last = self.heap.pop()
  195. self.heap[pos] = last
  196. self.position[last] = pos
  197. # Restore invariant by sifting up
  198. self._siftup(pos)
  199. def _siftup(self, pos):
  200. """Move smaller child up until hitting a leaf.
  201. Built to mimic code for heapq._siftup
  202. only updating position dict too.
  203. """
  204. heap, position = self.heap, self.position
  205. end_pos = len(heap)
  206. startpos = pos
  207. newitem = heap[pos]
  208. # Shift up the smaller child until hitting a leaf
  209. child_pos = (pos << 1) + 1 # start with leftmost child position
  210. while child_pos < end_pos:
  211. # Set child_pos to index of smaller child.
  212. child = heap[child_pos]
  213. right_pos = child_pos + 1
  214. if right_pos < end_pos:
  215. right = heap[right_pos]
  216. if not child < right:
  217. child = right
  218. child_pos = right_pos
  219. # Move the smaller child up.
  220. heap[pos] = child
  221. position[child] = pos
  222. pos = child_pos
  223. child_pos = (pos << 1) + 1
  224. # pos is a leaf position. Put newitem there, and bubble it up
  225. # to its final resting place (by sifting its parents down).
  226. while pos > 0:
  227. parent_pos = (pos - 1) >> 1
  228. parent = heap[parent_pos]
  229. if not newitem < parent:
  230. break
  231. heap[pos] = parent
  232. position[parent] = pos
  233. pos = parent_pos
  234. heap[pos] = newitem
  235. position[newitem] = pos
  236. def _siftdown(self, start_pos, pos):
  237. """Restore invariant. keep swapping with parent until smaller.
  238. Built to mimic code for heapq._siftdown
  239. only updating position dict too.
  240. """
  241. heap, position = self.heap, self.position
  242. newitem = heap[pos]
  243. # Follow the path to the root, moving parents down until finding a place
  244. # newitem fits.
  245. while pos > start_pos:
  246. parent_pos = (pos - 1) >> 1
  247. parent = heap[parent_pos]
  248. if not newitem < parent:
  249. break
  250. heap[pos] = parent
  251. position[parent] = pos
  252. pos = parent_pos
  253. heap[pos] = newitem
  254. position[newitem] = pos