random_sequence.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. """
  2. Utilities for generating random numbers, random sequences, and
  3. random selections.
  4. """
  5. import networkx as nx
  6. from networkx.utils import py_random_state
  7. __all__ = [
  8. "powerlaw_sequence",
  9. "zipf_rv",
  10. "cumulative_distribution",
  11. "discrete_sequence",
  12. "random_weighted_sample",
  13. "weighted_choice",
  14. ]
  15. # The same helpers for choosing random sequences from distributions
  16. # uses Python's random module
  17. # https://docs.python.org/3/library/random.html
  18. @py_random_state(2)
  19. def powerlaw_sequence(n, exponent=2.0, seed=None):
  20. """
  21. Return sample sequence of length n from a power law distribution.
  22. """
  23. return [seed.paretovariate(exponent - 1) for i in range(n)]
  24. @py_random_state(2)
  25. def zipf_rv(alpha, xmin=1, seed=None):
  26. r"""Returns a random value chosen from the Zipf distribution.
  27. The return value is an integer drawn from the probability distribution
  28. .. math::
  29. p(x)=\frac{x^{-\alpha}}{\zeta(\alpha, x_{\min})},
  30. where $\zeta(\alpha, x_{\min})$ is the Hurwitz zeta function.
  31. Parameters
  32. ----------
  33. alpha : float
  34. Exponent value of the distribution
  35. xmin : int
  36. Minimum value
  37. seed : integer, random_state, or None (default)
  38. Indicator of random number generation state.
  39. See :ref:`Randomness<randomness>`.
  40. Returns
  41. -------
  42. x : int
  43. Random value from Zipf distribution
  44. Raises
  45. ------
  46. ValueError:
  47. If xmin < 1 or
  48. If alpha <= 1
  49. Notes
  50. -----
  51. The rejection algorithm generates random values for a the power-law
  52. distribution in uniformly bounded expected time dependent on
  53. parameters. See [1]_ for details on its operation.
  54. Examples
  55. --------
  56. >>> nx.utils.zipf_rv(alpha=2, xmin=3, seed=42)
  57. 8
  58. References
  59. ----------
  60. .. [1] Luc Devroye, Non-Uniform Random Variate Generation,
  61. Springer-Verlag, New York, 1986.
  62. """
  63. if xmin < 1:
  64. raise ValueError("xmin < 1")
  65. if alpha <= 1:
  66. raise ValueError("a <= 1.0")
  67. a1 = alpha - 1.0
  68. b = 2**a1
  69. while True:
  70. u = 1.0 - seed.random() # u in (0,1]
  71. v = seed.random() # v in [0,1)
  72. x = int(xmin * u ** -(1.0 / a1))
  73. t = (1.0 + (1.0 / x)) ** a1
  74. if v * x * (t - 1.0) / (b - 1.0) <= t / b:
  75. break
  76. return x
  77. def cumulative_distribution(distribution):
  78. """Returns normalized cumulative distribution from discrete distribution."""
  79. cdf = [0.0]
  80. psum = sum(distribution)
  81. for i in range(0, len(distribution)):
  82. cdf.append(cdf[i] + distribution[i] / psum)
  83. return cdf
  84. @py_random_state(3)
  85. def discrete_sequence(n, distribution=None, cdistribution=None, seed=None):
  86. """
  87. Return sample sequence of length n from a given discrete distribution
  88. or discrete cumulative distribution.
  89. One of the following must be specified.
  90. distribution = histogram of values, will be normalized
  91. cdistribution = normalized discrete cumulative distribution
  92. """
  93. import bisect
  94. if cdistribution is not None:
  95. cdf = cdistribution
  96. elif distribution is not None:
  97. cdf = cumulative_distribution(distribution)
  98. else:
  99. raise nx.NetworkXError(
  100. "discrete_sequence: distribution or cdistribution missing"
  101. )
  102. # get a uniform random number
  103. inputseq = [seed.random() for i in range(n)]
  104. # choose from CDF
  105. seq = [bisect.bisect_left(cdf, s) - 1 for s in inputseq]
  106. return seq
  107. @py_random_state(2)
  108. def random_weighted_sample(mapping, k, seed=None):
  109. """Returns k items without replacement from a weighted sample.
  110. The input is a dictionary of items with weights as values.
  111. """
  112. if k > len(mapping):
  113. raise ValueError("sample larger than population")
  114. sample = set()
  115. while len(sample) < k:
  116. sample.add(weighted_choice(mapping, seed))
  117. return list(sample)
  118. @py_random_state(1)
  119. def weighted_choice(mapping, seed=None):
  120. """Returns a single element from a weighted sample.
  121. The input is a dictionary of items with weights as values.
  122. """
  123. # use roulette method
  124. rnd = seed.random() * sum(mapping.values())
  125. for k, w in mapping.items():
  126. rnd -= w
  127. if rnd < 0:
  128. return k