structuralholes.py 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279
  1. """Functions for computing measures of structural holes."""
  2. import networkx as nx
  3. __all__ = ["constraint", "local_constraint", "effective_size"]
  4. @nx._dispatch
  5. def mutual_weight(G, u, v, weight=None):
  6. """Returns the sum of the weights of the edge from `u` to `v` and
  7. the edge from `v` to `u` in `G`.
  8. `weight` is the edge data key that represents the edge weight. If
  9. the specified key is `None` or is not in the edge data for an edge,
  10. that edge is assumed to have weight 1.
  11. Pre-conditions: `u` and `v` must both be in `G`.
  12. """
  13. try:
  14. a_uv = G[u][v].get(weight, 1)
  15. except KeyError:
  16. a_uv = 0
  17. try:
  18. a_vu = G[v][u].get(weight, 1)
  19. except KeyError:
  20. a_vu = 0
  21. return a_uv + a_vu
  22. def normalized_mutual_weight(G, u, v, norm=sum, weight=None):
  23. """Returns normalized mutual weight of the edges from `u` to `v`
  24. with respect to the mutual weights of the neighbors of `u` in `G`.
  25. `norm` specifies how the normalization factor is computed. It must
  26. be a function that takes a single argument and returns a number.
  27. The argument will be an iterable of mutual weights
  28. of pairs ``(u, w)``, where ``w`` ranges over each (in- and
  29. out-)neighbor of ``u``. Commons values for `normalization` are
  30. ``sum`` and ``max``.
  31. `weight` can be ``None`` or a string, if None, all edge weights
  32. are considered equal. Otherwise holds the name of the edge
  33. attribute used as weight.
  34. """
  35. scale = norm(mutual_weight(G, u, w, weight) for w in set(nx.all_neighbors(G, u)))
  36. return 0 if scale == 0 else mutual_weight(G, u, v, weight) / scale
  37. def effective_size(G, nodes=None, weight=None):
  38. r"""Returns the effective size of all nodes in the graph ``G``.
  39. The *effective size* of a node's ego network is based on the concept
  40. of redundancy. A person's ego network has redundancy to the extent
  41. that her contacts are connected to each other as well. The
  42. nonredundant part of a person's relationships it's the effective
  43. size of her ego network [1]_. Formally, the effective size of a
  44. node $u$, denoted $e(u)$, is defined by
  45. .. math::
  46. e(u) = \sum_{v \in N(u) \setminus \{u\}}
  47. \left(1 - \sum_{w \in N(v)} p_{uw} m_{vw}\right)
  48. where $N(u)$ is the set of neighbors of $u$ and $p_{uw}$ is the
  49. normalized mutual weight of the (directed or undirected) edges
  50. joining $u$ and $v$, for each vertex $u$ and $v$ [1]_. And $m_{vw}$
  51. is the mutual weight of $v$ and $w$ divided by $v$ highest mutual
  52. weight with any of its neighbors. The *mutual weight* of $u$ and $v$
  53. is the sum of the weights of edges joining them (edge weights are
  54. assumed to be one if the graph is unweighted).
  55. For the case of unweighted and undirected graphs, Borgatti proposed
  56. a simplified formula to compute effective size [2]_
  57. .. math::
  58. e(u) = n - \frac{2t}{n}
  59. where `t` is the number of ties in the ego network (not including
  60. ties to ego) and `n` is the number of nodes (excluding ego).
  61. Parameters
  62. ----------
  63. G : NetworkX graph
  64. The graph containing ``v``. Directed graphs are treated like
  65. undirected graphs when computing neighbors of ``v``.
  66. nodes : container, optional
  67. Container of nodes in the graph ``G`` to compute the effective size.
  68. If None, the effective size of every node is computed.
  69. weight : None or string, optional
  70. If None, all edge weights are considered equal.
  71. Otherwise holds the name of the edge attribute used as weight.
  72. Returns
  73. -------
  74. dict
  75. Dictionary with nodes as keys and the effective size of the node as values.
  76. Notes
  77. -----
  78. Burt also defined the related concept of *efficiency* of a node's ego
  79. network, which is its effective size divided by the degree of that
  80. node [1]_. So you can easily compute efficiency:
  81. >>> G = nx.DiGraph()
  82. >>> G.add_edges_from([(0, 1), (0, 2), (1, 0), (2, 1)])
  83. >>> esize = nx.effective_size(G)
  84. >>> efficiency = {n: v / G.degree(n) for n, v in esize.items()}
  85. See also
  86. --------
  87. constraint
  88. References
  89. ----------
  90. .. [1] Burt, Ronald S.
  91. *Structural Holes: The Social Structure of Competition.*
  92. Cambridge: Harvard University Press, 1995.
  93. .. [2] Borgatti, S.
  94. "Structural Holes: Unpacking Burt's Redundancy Measures"
  95. CONNECTIONS 20(1):35-38.
  96. http://www.analytictech.com/connections/v20(1)/holes.htm
  97. """
  98. def redundancy(G, u, v, weight=None):
  99. nmw = normalized_mutual_weight
  100. r = sum(
  101. nmw(G, u, w, weight=weight) * nmw(G, v, w, norm=max, weight=weight)
  102. for w in set(nx.all_neighbors(G, u))
  103. )
  104. return 1 - r
  105. effective_size = {}
  106. if nodes is None:
  107. nodes = G
  108. # Use Borgatti's simplified formula for unweighted and undirected graphs
  109. if not G.is_directed() and weight is None:
  110. for v in nodes:
  111. # Effective size is not defined for isolated nodes
  112. if len(G[v]) == 0:
  113. effective_size[v] = float("nan")
  114. continue
  115. E = nx.ego_graph(G, v, center=False, undirected=True)
  116. effective_size[v] = len(E) - (2 * E.size()) / len(E)
  117. else:
  118. for v in nodes:
  119. # Effective size is not defined for isolated nodes
  120. if len(G[v]) == 0:
  121. effective_size[v] = float("nan")
  122. continue
  123. effective_size[v] = sum(
  124. redundancy(G, v, u, weight) for u in set(nx.all_neighbors(G, v))
  125. )
  126. return effective_size
  127. def constraint(G, nodes=None, weight=None):
  128. r"""Returns the constraint on all nodes in the graph ``G``.
  129. The *constraint* is a measure of the extent to which a node *v* is
  130. invested in those nodes that are themselves invested in the
  131. neighbors of *v*. Formally, the *constraint on v*, denoted `c(v)`,
  132. is defined by
  133. .. math::
  134. c(v) = \sum_{w \in N(v) \setminus \{v\}} \ell(v, w)
  135. where $N(v)$ is the subset of the neighbors of `v` that are either
  136. predecessors or successors of `v` and $\ell(v, w)$ is the local
  137. constraint on `v` with respect to `w` [1]_. For the definition of local
  138. constraint, see :func:`local_constraint`.
  139. Parameters
  140. ----------
  141. G : NetworkX graph
  142. The graph containing ``v``. This can be either directed or undirected.
  143. nodes : container, optional
  144. Container of nodes in the graph ``G`` to compute the constraint. If
  145. None, the constraint of every node is computed.
  146. weight : None or string, optional
  147. If None, all edge weights are considered equal.
  148. Otherwise holds the name of the edge attribute used as weight.
  149. Returns
  150. -------
  151. dict
  152. Dictionary with nodes as keys and the constraint on the node as values.
  153. See also
  154. --------
  155. local_constraint
  156. References
  157. ----------
  158. .. [1] Burt, Ronald S.
  159. "Structural holes and good ideas".
  160. American Journal of Sociology (110): 349–399.
  161. """
  162. if nodes is None:
  163. nodes = G
  164. constraint = {}
  165. for v in nodes:
  166. # Constraint is not defined for isolated nodes
  167. if len(G[v]) == 0:
  168. constraint[v] = float("nan")
  169. continue
  170. constraint[v] = sum(
  171. local_constraint(G, v, n, weight) for n in set(nx.all_neighbors(G, v))
  172. )
  173. return constraint
  174. def local_constraint(G, u, v, weight=None):
  175. r"""Returns the local constraint on the node ``u`` with respect to
  176. the node ``v`` in the graph ``G``.
  177. Formally, the *local constraint on u with respect to v*, denoted
  178. $\ell(v)$, is defined by
  179. .. math::
  180. \ell(u, v) = \left(p_{uv} + \sum_{w \in N(v)} p_{uw} p_{wv}\right)^2,
  181. where $N(v)$ is the set of neighbors of $v$ and $p_{uv}$ is the
  182. normalized mutual weight of the (directed or undirected) edges
  183. joining $u$ and $v$, for each vertex $u$ and $v$ [1]_. The *mutual
  184. weight* of $u$ and $v$ is the sum of the weights of edges joining
  185. them (edge weights are assumed to be one if the graph is
  186. unweighted).
  187. Parameters
  188. ----------
  189. G : NetworkX graph
  190. The graph containing ``u`` and ``v``. This can be either
  191. directed or undirected.
  192. u : node
  193. A node in the graph ``G``.
  194. v : node
  195. A node in the graph ``G``.
  196. weight : None or string, optional
  197. If None, all edge weights are considered equal.
  198. Otherwise holds the name of the edge attribute used as weight.
  199. Returns
  200. -------
  201. float
  202. The constraint of the node ``v`` in the graph ``G``.
  203. See also
  204. --------
  205. constraint
  206. References
  207. ----------
  208. .. [1] Burt, Ronald S.
  209. "Structural holes and good ideas".
  210. American Journal of Sociology (110): 349–399.
  211. """
  212. nmw = normalized_mutual_weight
  213. direct = nmw(G, u, v, weight=weight)
  214. indirect = sum(
  215. nmw(G, u, w, weight=weight) * nmw(G, w, v, weight=weight)
  216. for w in set(nx.all_neighbors(G, u))
  217. )
  218. return (direct + indirect) ** 2