dependencies.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318
  1. import collections
  2. import dataclasses
  3. import itertools
  4. import logging
  5. import typing
  6. from typing import Callable, cast, Dict, List, Optional, Set, Tuple, Union
  7. import sympy
  8. from .codegen.common import index_prevent_reordering
  9. from .utils import (
  10. get_dtype_size,
  11. sympy_product,
  12. sympy_str,
  13. sympy_subs,
  14. sympy_symbol,
  15. VarRanges,
  16. )
  17. from .virtualized import V
  18. log = logging.getLogger(__name__)
  19. Dep = Union["MemoryDep", "StarDep", "WeakDep"]
  20. class MemoryDep(typing.NamedTuple):
  21. name: str
  22. index: sympy.Expr # type: ignore[assignment]
  23. size: Tuple[sympy.Expr, ...]
  24. def broadcast_extend_sizes(self, extra_sizes: List[sympy.Expr]) -> "MemoryDep":
  25. size = (*self.size, *[x for x in extra_sizes if x != 1])
  26. return MemoryDep(self.name, self.index, size)
  27. def maybe_swap_sizes(self) -> "MemoryDep":
  28. # swap only in simple cases where index is trivial and
  29. # there are just 2 sizes
  30. if (
  31. len(self.size) == 2
  32. and len(self.index.args) == 0
  33. and cast(sympy.Symbol, self.index).name == canonicalization_prefix() + "0"
  34. ):
  35. c = canonicalization_prefix()
  36. size = (self.size[1], self.size[0])
  37. s0 = sympy_symbol(c + "0")
  38. s1 = sympy_symbol(c + "1")
  39. index = sympy_subs(self.index, {s0: s1})
  40. return MemoryDep(self.name, index, size)
  41. else:
  42. return self
  43. def strip_last_size(self) -> "MemoryDep":
  44. nsizes = len(self.size)
  45. if not (nsizes >= 1 and len(self.index.args) <= nsizes - 1):
  46. return self
  47. # make sure last dim index is not used
  48. prefix = canonicalization_prefix()
  49. len_prefix = len(prefix)
  50. prefixes = [
  51. fs.name[:len_prefix]
  52. for fs in cast(Set[sympy.Symbol], self.index.free_symbols)
  53. ]
  54. assert (
  55. len(prefixes) == 0 or prefix in prefixes
  56. ), "index expression should contain canonicalized symbols"
  57. last_index = f"{prefix}{len(self.size)-1}"
  58. if last_index not in self.index.free_symbols:
  59. size = self.size[:-1]
  60. return MemoryDep(self.name, self.index, size)
  61. else:
  62. return self
  63. def rename(self, renames: Dict[str, str]) -> "MemoryDep":
  64. if self.name in renames:
  65. return MemoryDep(renames[self.name], self.index, self.size)
  66. return self
  67. def numbytes_hint(self):
  68. vars = set(self.index.free_symbols)
  69. size_vars_used = []
  70. for var in vars:
  71. if var.name.startswith(canonicalization_prefix()):
  72. # Sometimes with indirect indexing we have very weird symbol names
  73. assert " " not in var.name
  74. size_vars_used.append(int(var.name[len(canonicalization_prefix()) :]))
  75. return V.graph.sizevars.size_hint(
  76. sympy_product([self.size[i] for i in size_vars_used])
  77. ) * get_dtype_size(V.graph.get_dtype(self.name))
  78. def is_contiguous(self) -> bool:
  79. return isinstance(self.index, (sympy.Symbol, sympy.Integer))
  80. class StarDep(typing.NamedTuple):
  81. # depends on the entire buffer
  82. name: str
  83. def rename(self, renames: Dict[str, str]) -> "StarDep":
  84. if self.name in renames:
  85. return StarDep(renames[self.name])
  86. return self
  87. def numbytes_hint(self):
  88. from .ir import MultiOutputLayout
  89. if self.name in V.graph.name_to_buffer:
  90. buf = V.graph.name_to_buffer[self.name]
  91. elif self.name in V.graph.graph_inputs:
  92. buf = V.graph.graph_inputs[self.name]
  93. else:
  94. return 1
  95. if hasattr(buf, "layout") and isinstance(buf.layout, MultiOutputLayout):
  96. # NB: Too annoying to acquire, should only be used for instrumentation
  97. return 1
  98. return V.graph.sizevars.size_hint(
  99. sympy_product(buf.get_size())
  100. ) * get_dtype_size(buf.get_dtype())
  101. def is_contiguous(self) -> bool:
  102. return False
  103. # Used for tracking mutation ordering
  104. # if A reads a buffer and B mutates it
  105. # B must be ordered after A
  106. class WeakDep(typing.NamedTuple):
  107. name: str
  108. def rename(self, renames: Dict[str, str]) -> "WeakDep":
  109. if self.name in renames:
  110. return WeakDep(renames[self.name])
  111. return self
  112. def numbytes_hint(self):
  113. return 1 # Purely inserted for ordering, not an actual dep
  114. def is_contiguous(self) -> bool:
  115. return False
  116. class IndexExprDep(typing.NamedTuple):
  117. index: sympy.Expr # type: ignore[assignment]
  118. size: Tuple[sympy.Expr, ...]
  119. @dataclasses.dataclass
  120. class ReadWrites:
  121. reads: Set[Dep]
  122. writes: Set[Dep]
  123. index_exprs: Set[IndexExprDep]
  124. range_vars: Optional[List[sympy.Expr]] = None
  125. var_ranges: Optional[VarRanges] = None
  126. def rename(self, renames: typing.Dict[str, str]) -> "ReadWrites":
  127. return ReadWrites(
  128. {dep.rename(renames) for dep in self.reads},
  129. {dep.rename(renames) for dep in self.writes},
  130. self.index_exprs,
  131. self.range_vars,
  132. self.var_ranges,
  133. )
  134. def with_read(self, dep: Dep) -> "ReadWrites":
  135. assert isinstance(dep, (WeakDep, StarDep))
  136. return ReadWrites(
  137. set.union(self.reads, {dep}),
  138. self.writes,
  139. self.index_exprs,
  140. self.range_vars,
  141. self.var_ranges,
  142. )
  143. def merge(self, other):
  144. reads = set.union(self.reads, other.reads)
  145. writes = set.union(self.writes, other.writes)
  146. index_exprs = set.union(self.index_exprs, other.index_exprs)
  147. return ReadWrites(
  148. reads - writes,
  149. writes,
  150. index_exprs,
  151. )
  152. def remove_reads(self, rem_reads):
  153. return ReadWrites(
  154. self.reads - rem_reads,
  155. self.writes,
  156. self.index_exprs,
  157. self.range_vars,
  158. self.var_ranges,
  159. )
  160. class _RecordLoadStoreInner(V.MockHandler):
  161. def __init__(self, var_ranges: VarRanges, normalize: bool):
  162. super().__init__()
  163. self._reads: Set[MemoryDep] = set()
  164. self._writes: Set[MemoryDep] = set()
  165. self._index_exprs: Set[IndexExprDep] = set()
  166. self._var_ranges: VarRanges = var_ranges
  167. self._normalize: bool = normalize
  168. def canonicalize(
  169. self, index: sympy.Expr
  170. ) -> Tuple[sympy.Expr, Tuple[sympy.Expr, ...]]:
  171. sizes = list(self._var_ranges.values())
  172. sizes = [V.graph.sizevars.simplify(x) for x in sizes]
  173. if not self._normalize:
  174. return index, tuple([x for x in sizes if x != 1])
  175. # Try to further simplify the indexes even if simplify_loops didn't
  176. # convert it to the simpliest form because of the interference from
  177. # different indexing formulas.
  178. index_vars = list(self._var_ranges.keys())
  179. new_sizes, reindex, prune = V.graph.sizevars._simplify_loops(
  180. index_vars,
  181. sizes,
  182. index_prevent_reordering([index], index_vars, sizes),
  183. )
  184. # assign new variables each dimension to deal with numbering mismatches
  185. # d0, d1, d2 could become d0, d2 -- which won't match d0, d1
  186. _, add_var = var_builder(canonicalization_prefix())
  187. replacement = dict(zip(index_vars, reindex([add_var(x) for x in new_sizes])))
  188. index = sympy_subs(sympy.expand(index), replacement)
  189. return index, tuple(new_sizes)
  190. def load(self, name: str, index: sympy.Expr) -> str:
  191. canonicalized_index, canonicalized_size = self.canonicalize(index)
  192. self._reads.add(MemoryDep(name, canonicalized_index, canonicalized_size))
  193. return f"load({name}, {sympy_str(index)})"
  194. def store(self, name: str, index: sympy.Expr, value: str, mode=None) -> str:
  195. canonicalized_index, canonicalized_size = self.canonicalize(index)
  196. self._writes.add(MemoryDep(name, canonicalized_index, canonicalized_size))
  197. return f"store({name}, {sympy_str(index)}, {value}, {mode})"
  198. def reduction(
  199. self, name: str, dtype, src_dtype, reduction_type, index, value
  200. ) -> str:
  201. return self.store(name, index, f"reduce_{reduction_type})({value})")
  202. def index_expr(self, index: sympy.Expr, dtype) -> str:
  203. canonicalized_index, canonicalized_size = self.canonicalize(index)
  204. self._index_exprs.add(IndexExprDep(canonicalized_index, canonicalized_size))
  205. return f"index_expr({sympy_str(index)}, {dtype})"
  206. class RecordLoadStore(V.KernelFormatterHandler):
  207. def __init__(self, var_ranges: VarRanges, normalize: bool):
  208. parent_handler = _RecordLoadStoreInner(
  209. var_ranges=var_ranges, normalize=normalize
  210. )
  211. super().__init__(parent_handler=parent_handler)
  212. def var_builder(prefix: str) -> Tuple[VarRanges, Callable[[sympy.Expr], sympy.Symbol]]:
  213. cnt = itertools.count()
  214. var_ranges: VarRanges = collections.OrderedDict()
  215. def add_var(length: sympy.Expr) -> sympy.Symbol:
  216. v = sympy_symbol(f"{prefix}{next(cnt)}")
  217. var_ranges[v] = length
  218. return v
  219. return var_ranges, add_var
  220. def index_vars_no_squeeze(*argsizes: Tuple[sympy.Expr, ...], prefix: str):
  221. var_ranges, add_var = var_builder(prefix)
  222. args: List[List[sympy.Symbol]] = []
  223. for size in argsizes:
  224. args.append(list(map(add_var, size)))
  225. return args, var_ranges
  226. def index_vars_squeeze(*argsizes: Tuple[sympy.Expr, ...], prefix: str = "d"):
  227. from .ir import SqueezeView
  228. var_ranges, add_var = var_builder(prefix)
  229. args: List[List[sympy.Expr]] = []
  230. new_sizes: List[List[sympy.Expr]] = []
  231. for size in argsizes:
  232. new_size, reindex = SqueezeView.squeezer(size)
  233. new_sizes.append(new_size)
  234. args.append(reindex(list(map(add_var, new_size))))
  235. return new_sizes, args, var_ranges
  236. def extract_read_writes(
  237. fn: Callable,
  238. *argsizes: Tuple[sympy.Expr, ...],
  239. normalize: bool = False,
  240. prefix: str = "d",
  241. ):
  242. _, args, var_ranges = index_vars_squeeze(*argsizes, prefix=prefix)
  243. rw = RecordLoadStore(var_ranges, normalize=normalize)
  244. with V.set_ops_handler(rw): # type: ignore[call-arg]
  245. fn(*args)
  246. if normalize:
  247. range_vars = [] # Number of vars could differ due to normalization
  248. else:
  249. range_vars = [*itertools.chain(*args)]
  250. inner = rw.parent_handler
  251. return ReadWrites(
  252. set(inner._reads),
  253. set(inner._writes),
  254. inner._index_exprs,
  255. range_vars,
  256. var_ranges,
  257. )
  258. def canonicalization_prefix():
  259. return "c"