blockpartition.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. # -*- coding: utf-8 -*-
  2. # Copyright 2019 Kakao Brain
  3. #
  4. # Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
  5. #
  6. # This source code is licensed under the BSD license found in the
  7. # LICENSE file in the root directory of this source tree.
  8. """Implements "Block Partitions of Sequences" by Imre Bárány et al.
  9. Paper: https://arxiv.org/pdf/1308.2452.pdf
  10. """
  11. from typing import Iterator, List, Tuple
  12. __all__ = ["solve"]
  13. def solve(sequence: List[int], partitions: int = 1) -> List[List[int]]:
  14. """Splits a sequence into several partitions to minimize variance for each
  15. partition.
  16. The result might not be optimal. However, it can be done only in O(kn³),
  17. where k is the number of partitions and n is the length of the sequence.
  18. """
  19. if partitions < 1:
  20. raise ValueError(f"partitions must be a positive integer ({partitions} < 1)")
  21. n = len(sequence)
  22. if n < partitions:
  23. raise ValueError(f"sequence is shorter than intended partitions ({n} < {partitions})")
  24. # Normalize the sequence in [0, 1].
  25. minimum = min(sequence)
  26. maximum = max(sequence) - minimum
  27. normal_sequence: List[float]
  28. if maximum == 0:
  29. normal_sequence = [0 for _ in sequence]
  30. else:
  31. normal_sequence = [(x - minimum) / maximum for x in sequence]
  32. splits = [n // partitions * (x + 1) for x in range(partitions - 1)] + [n]
  33. def block_size(i: int) -> float:
  34. start = splits[i - 1] if i > 0 else 0
  35. stop = splits[i]
  36. return sum(normal_sequence[start:stop])
  37. def leaderboard() -> Iterator[Tuple[float, int]]:
  38. return ((block_size(i), i) for i in range(partitions))
  39. while True:
  40. """
  41. (1) Fix p ∈ [k] with M(P) = bp. So Bp is a maximal block of P.
  42. """
  43. # max_size: M(P)
  44. max_size, p = max(leaderboard())
  45. while True:
  46. """
  47. (2) If M(P) ≤ m(P) + 1, then stop.
  48. """
  49. # min_size: m(P)
  50. min_size, q = min(leaderboard())
  51. if max_size <= min_size + 1:
  52. return [sequence[i:j] for i, j in zip([0] + splits[:-1], splits)]
  53. """
  54. (3) If M(P) > m(P) + 1, then let m(P) = bq for the q ∈ [k] which is
  55. closest to p (ties broken arbitrarily). Thus Bq is a minimal block
  56. of P. Let Bh be the block next to Bq between Bp and Bq. (Note that
  57. Bh is a non-empty block: if it were, then m(P) = 0 and we should
  58. have chosen Bh instead of Bq.)
  59. """
  60. if p < q:
  61. """
  62. So either p < q and then h = q−1 and we define P ∗ by moving
  63. the last element from Bh = Bq−1 to Bq,
  64. """
  65. h = q - 1
  66. splits[h] -= 1
  67. else:
  68. """
  69. or q < p, and then h = q + 1 and P ∗ is obtained by moving the
  70. first element of Bh = Bq+1 to Bq.
  71. """
  72. h = q + 1
  73. splits[q] += 1
  74. """
  75. Set P = P ∗ . If p = h, then go to (1), else go to (2).
  76. """
  77. if p == h:
  78. break