vmap.py 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673
  1. # Copyright (c) Facebook, Inc. and its affiliates.
  2. # All rights reserved.
  3. #
  4. # This source code is licensed under the BSD-style license found in the
  5. # LICENSE file in the root directory of this source tree.
  6. import torch
  7. import functools
  8. from torch import Tensor
  9. from typing import Any, Callable, Optional, Tuple, Union, List
  10. from torch.utils._pytree import tree_flatten, tree_unflatten, _broadcast_to_and_flatten, TreeSpec
  11. from .pytree_hacks import tree_map_
  12. from functools import partial
  13. import os
  14. import itertools
  15. from torch._C._functorch import (
  16. _add_batch_dim,
  17. _remove_batch_dim,
  18. _vmap_decrement_nesting,
  19. _vmap_increment_nesting,
  20. is_batchedtensor,
  21. )
  22. from torch._functorch.utils import exposed_in
  23. in_dims_t = Union[int, Tuple]
  24. out_dims_t = Union[int, Tuple[int, ...]]
  25. def doesnt_support_saved_tensors_hooks(f):
  26. message = (
  27. "torch.func transforms don't yet support saved tensor hooks. "
  28. "Please open an issue with your use case."
  29. )
  30. @functools.wraps(f)
  31. def fn(*args, **kwargs):
  32. with torch.autograd.graph.disable_saved_tensors_hooks(message):
  33. return f(*args, **kwargs)
  34. return fn
  35. # Checks that all args-to-be-batched have the same batch dim size
  36. def _validate_and_get_batch_size(
  37. flat_in_dims: List[Optional[int]],
  38. flat_args: List) -> int:
  39. batch_sizes = [arg.size(in_dim) for in_dim, arg in zip(flat_in_dims, flat_args)
  40. if in_dim is not None]
  41. if len(batch_sizes) == 0:
  42. raise ValueError('vmap: Expected at least one Tensor to vmap over')
  43. if batch_sizes and any(size != batch_sizes[0] for size in batch_sizes):
  44. raise ValueError(
  45. f'vmap: Expected all tensors to have the same size in the mapped '
  46. f'dimension, got sizes {batch_sizes} for the mapped dimension')
  47. return batch_sizes[0]
  48. def _num_outputs(batched_outputs: Union[Tensor, Tuple[Tensor, ...]]) -> int:
  49. if isinstance(batched_outputs, tuple):
  50. return len(batched_outputs)
  51. return 1
  52. # If value is a tuple, check it has length `num_elements`.
  53. # If value is not a tuple, make a tuple with `value` repeated `num_elements` times
  54. def _as_tuple(value: Any, num_elements: int, error_message_lambda: Callable[[], str]) -> Tuple:
  55. if not isinstance(value, tuple):
  56. return (value,) * num_elements
  57. if len(value) != num_elements:
  58. raise ValueError(error_message_lambda())
  59. return value
  60. def _process_batched_inputs(
  61. in_dims: in_dims_t, args: Tuple, func: Callable
  62. ) -> Tuple[int, List[Any], List[Any], TreeSpec]:
  63. if not isinstance(in_dims, int) and not isinstance(in_dims, tuple):
  64. raise ValueError(
  65. f'vmap({_get_name(func)}, in_dims={in_dims}, ...)(<inputs>): '
  66. f'expected `in_dims` to be int or a (potentially nested) tuple '
  67. f'matching the structure of inputs, got: {type(in_dims)}.')
  68. if len(args) == 0:
  69. raise ValueError(
  70. f'vmap({_get_name(func)})(<inputs>): got no inputs. Maybe you forgot to add '
  71. f'inputs, or you are trying to vmap over a function with no inputs. '
  72. f'The latter is unsupported.')
  73. flat_args, args_spec = tree_flatten(args)
  74. flat_in_dims = _broadcast_to_and_flatten(in_dims, args_spec)
  75. if flat_in_dims is None:
  76. raise ValueError(
  77. f'vmap({_get_name(func)}, in_dims={in_dims}, ...)(<inputs>): '
  78. f'in_dims is not compatible with the structure of `inputs`. '
  79. f'in_dims has structure {tree_flatten(in_dims)[1]} but inputs '
  80. f'has structure {args_spec}.')
  81. for i, (arg, in_dim) in enumerate(zip(flat_args, flat_in_dims)):
  82. if not isinstance(in_dim, int) and in_dim is not None:
  83. raise ValueError(
  84. f'vmap({_get_name(func)}, in_dims={in_dims}, ...)(<inputs>): '
  85. f'Got in_dim={in_dim} for an input but in_dim must be either '
  86. f'an integer dimension or None.')
  87. if isinstance(in_dim, int) and not isinstance(arg, Tensor):
  88. raise ValueError(
  89. f'vmap({_get_name(func)}, in_dims={in_dims}, ...)(<inputs>): '
  90. f'Got in_dim={in_dim} for an input but the input is of type '
  91. f'{type(arg)}. We cannot vmap over non-Tensor arguments, '
  92. f'please use None as the respective in_dim')
  93. if in_dim is not None and (in_dim < -arg.dim() or in_dim >= arg.dim()):
  94. raise ValueError(
  95. f'vmap({_get_name(func)}, in_dims={in_dims}, ...)(<inputs>): '
  96. f'Got in_dim={in_dim} for some input, but that input is a Tensor '
  97. f'of dimensionality {arg.dim()} so expected in_dim to satisfy '
  98. f'-{arg.dim()} <= in_dim < {arg.dim()}.')
  99. if in_dim is not None and in_dim < 0:
  100. flat_in_dims[i] = in_dim % arg.dim()
  101. return _validate_and_get_batch_size(flat_in_dims, flat_args), flat_in_dims, flat_args, args_spec
  102. # Creates BatchedTensors for every Tensor in arg that should be batched.
  103. # Returns the (potentially) batched arguments and the batch_size.
  104. def _create_batched_inputs(
  105. flat_in_dims: List[Any], flat_args: List[Any], vmap_level: int, args_spec) -> Tuple:
  106. # See NOTE [Ignored _remove_batch_dim, _add_batch_dim]
  107. batched_inputs = [arg if in_dim is None else
  108. _add_batch_dim(arg, in_dim, vmap_level)
  109. for in_dim, arg in zip(flat_in_dims, flat_args)]
  110. return tree_unflatten(batched_inputs, args_spec)
  111. def _maybe_remove_batch_dim(name, batched_output, vmap_level, batch_size, out_dim):
  112. if out_dim is None:
  113. if isinstance(batched_output, torch.Tensor) and is_batchedtensor(batched_output):
  114. raise ValueError(
  115. f'vmap({name}, ...): `{name}` can not return a '
  116. f'BatchedTensor when out_dim is None'
  117. )
  118. return batched_output
  119. # out_dim is non None
  120. if not isinstance(batched_output, torch.Tensor):
  121. raise ValueError(f'vmap({name}, ...): `{name}` must only return '
  122. f'Tensors, got type {type(batched_output)}. '
  123. 'Did you mean to set out_dim= to None for output?')
  124. return _remove_batch_dim(batched_output, vmap_level, batch_size, out_dim)
  125. # Undos the batching (and any batch dimensions) associated with the `vmap_level`.
  126. def _unwrap_batched(
  127. batched_outputs: Union[Tensor, Tuple[Tensor, ...]],
  128. out_dims: out_dims_t,
  129. vmap_level: int, batch_size: int, func: Callable) -> Tuple:
  130. flat_batched_outputs, output_spec = tree_flatten(batched_outputs)
  131. def incompatible_error():
  132. raise ValueError(
  133. f'vmap({_get_name(func)}, ..., out_dims={out_dims})(<inputs>): '
  134. f'out_dims is not compatible with the structure of `outputs`. '
  135. f'out_dims has structure {tree_flatten(out_dims)[1]} but outputs '
  136. f'has structure {output_spec}.')
  137. if isinstance(batched_outputs, torch.Tensor):
  138. # Some weird edge case requires us to spell out the following
  139. # see test_out_dims_edge_case
  140. if isinstance(out_dims, int):
  141. flat_out_dims = [out_dims]
  142. elif isinstance(out_dims, tuple) and len(out_dims) == 1:
  143. flat_out_dims = out_dims
  144. elif out_dims is None:
  145. flat_out_dims = [out_dims]
  146. else:
  147. incompatible_error()
  148. else:
  149. flat_out_dims = _broadcast_to_and_flatten(out_dims, output_spec)
  150. if flat_out_dims is None:
  151. incompatible_error()
  152. flat_outputs = [
  153. _maybe_remove_batch_dim(_get_name(func), batched_output, vmap_level, batch_size, out_dim)
  154. for batched_output, out_dim in zip(flat_batched_outputs, flat_out_dims)
  155. ]
  156. return tree_unflatten(flat_outputs, output_spec)
  157. def _check_int_or_none(x, func, out_dims):
  158. if isinstance(x, int):
  159. return
  160. if x is None:
  161. return
  162. raise ValueError(
  163. f'vmap({_get_name(func)}, ..., out_dims={out_dims}): `out_dims` must be '
  164. f'an int, None or a python collection of ints representing where in the outputs the '
  165. f'vmapped dimension should appear.')
  166. def _check_out_dims_is_int_or_int_pytree(out_dims: out_dims_t, func: Callable) -> None:
  167. if isinstance(out_dims, int):
  168. return
  169. tree_map_(partial(_check_int_or_none, func=func, out_dims=out_dims), out_dims)
  170. def _get_name(func: Callable):
  171. if hasattr(func, '__name__'):
  172. return func.__name__
  173. # Not all callables have __name__, in fact, only static functions/methods do.
  174. # A callable created via functools.partial or an nn.Module, to name some
  175. # examples, don't have a __name__.
  176. return repr(func)
  177. DECOMPOSITIONS_LOADED = False
  178. VMAP_DECOMPOSITIONS_LIB = None
  179. # torch.package, Python 3.11, and torch.jit-less environments are unhappy with
  180. # decompositions. Only load them when needed if possible.
  181. def lazy_load_decompositions():
  182. global DECOMPOSITIONS_LOADED
  183. if DECOMPOSITIONS_LOADED:
  184. return
  185. DECOMPOSITIONS_LOADED = True
  186. if not (os.environ.get("PYTORCH_JIT", "1") == "1" and __debug__):
  187. return
  188. # use an alternate way to register an operator into the decomposition table
  189. # _register_jit_decomposition doesn't work for some operators, e.g. addr,
  190. # because the Tensor types generated cannot be unioned by torchscript
  191. # decomp should be type OpOverload
  192. global VMAP_DECOMPOSITIONS_LIB
  193. VMAP_DECOMPOSITIONS_LIB = torch.library.Library("aten", "IMPL", "FuncTorchBatched")
  194. from torch._decomp import decomposition_table
  195. def _register_python_decomposition_vmap(decomp):
  196. if decomp in decomposition_table:
  197. VMAP_DECOMPOSITIONS_LIB.impl(decomp, decomposition_table[decomp])
  198. else:
  199. raise RuntimeError(f"could not find decomposition for {decomp}")
  200. _register_python_decomposition_vmap(torch.ops.aten.mse_loss_backward.default)
  201. _register_python_decomposition_vmap(torch.ops.aten.addr.default)
  202. # vmap(func)(inputs) wraps all Tensor inputs to be batched in BatchedTensors,
  203. # sends those into func, and then unwraps the output BatchedTensors. Operations
  204. # on BatchedTensors perform the batched operations that the user is asking for.
  205. #
  206. # vmap's randomness behavior differs from JAX's, which would require a PRNG key
  207. # to be passed everywhere.
  208. @exposed_in('torch.func')
  209. def vmap(
  210. func: Callable,
  211. in_dims: in_dims_t = 0,
  212. out_dims: out_dims_t = 0,
  213. randomness: str = 'error',
  214. *,
  215. chunk_size=None) -> Callable:
  216. """
  217. vmap is the vectorizing map; ``vmap(func)`` returns a new function that
  218. maps ``func`` over some dimension of the inputs. Semantically, vmap
  219. pushes the map into PyTorch operations called by ``func``, effectively
  220. vectorizing those operations.
  221. vmap is useful for handling batch dimensions: one can write a function
  222. ``func`` that runs on examples and then lift it to a function that can
  223. take batches of examples with ``vmap(func)``. vmap can also be used to
  224. compute batched gradients when composed with autograd.
  225. .. note::
  226. :func:`torch.vmap` is aliased to :func:`torch.func.vmap` for
  227. convenience. Use whichever one you'd like.
  228. Args:
  229. func (function): A Python function that takes one or more arguments.
  230. Must return one or more Tensors.
  231. in_dims (int or nested structure): Specifies which dimension of the
  232. inputs should be mapped over. ``in_dims`` should have a
  233. structure like the inputs. If the ``in_dim`` for a particular
  234. input is None, then that indicates there is no map dimension.
  235. Default: 0.
  236. out_dims (int or Tuple[int]): Specifies where the mapped dimension
  237. should appear in the outputs. If ``out_dims`` is a Tuple, then
  238. it should have one element per output. Default: 0.
  239. randomness (str): Specifies whether the randomness in this
  240. vmap should be the same or different across batches. If 'different',
  241. the randomness for each batch will be different. If 'same', the
  242. randomness will be the same across batches. If 'error', any calls to
  243. random functions will error. Default: 'error'. WARNING: this flag
  244. only applies to random PyTorch operations and does not apply to
  245. Python's random module or numpy randomness.
  246. chunk_size (None or int): If None (default), apply a single vmap over inputs.
  247. If not None, then compute the vmap :attr:`chunk_size` samples at a time.
  248. Note that :attr:`chunk_size=1` is equivalent to computing the vmap with a for-loop.
  249. If you run into memory issues computing the vmap, please try a non-None chunk_size.
  250. Returns:
  251. Returns a new "batched" function. It takes the same inputs as
  252. ``func``, except each input has an extra dimension at the index
  253. specified by ``in_dims``. It takes returns the same outputs as
  254. ``func``, except each output has an extra dimension at the index
  255. specified by ``out_dims``.
  256. .. warning:
  257. :func:`vmap` works best with functional-style code. Please do not
  258. perform any side-effects in ``func``, with the exception of
  259. in-place PyTorch operations. Examples of side-effects include mutating
  260. Python data structures and assigning values to variables not captured
  261. in ``func``.
  262. One example of using :func:`vmap` is to compute batched dot products. PyTorch
  263. doesn't provide a batched ``torch.dot`` API; instead of unsuccessfully
  264. rummaging through docs, use :func:`vmap` to construct a new function.
  265. >>> torch.dot # [D], [D] -> []
  266. >>> batched_dot = torch.func.vmap(torch.dot) # [N, D], [N, D] -> [N]
  267. >>> x, y = torch.randn(2, 5), torch.randn(2, 5)
  268. >>> batched_dot(x, y)
  269. :func:`vmap` can be helpful in hiding batch dimensions, leading to a simpler
  270. model authoring experience.
  271. >>> batch_size, feature_size = 3, 5
  272. >>> weights = torch.randn(feature_size, requires_grad=True)
  273. >>>
  274. >>> def model(feature_vec):
  275. >>> # Very simple linear model with activation
  276. >>> return feature_vec.dot(weights).relu()
  277. >>>
  278. >>> examples = torch.randn(batch_size, feature_size)
  279. >>> result = torch.vmap(model)(examples)
  280. :func:`vmap` can also help vectorize computations that were previously difficult
  281. or impossible to batch. One example is higher-order gradient computation.
  282. The PyTorch autograd engine computes vjps (vector-Jacobian products).
  283. Computing a full Jacobian matrix for some function f: R^N -> R^N usually
  284. requires N calls to ``autograd.grad``, one per Jacobian row. Using :func:`vmap`,
  285. we can vectorize the whole computation, computing the Jacobian in a single
  286. call to ``autograd.grad``.
  287. >>> # Setup
  288. >>> N = 5
  289. >>> f = lambda x: x ** 2
  290. >>> x = torch.randn(N, requires_grad=True)
  291. >>> y = f(x)
  292. >>> I_N = torch.eye(N)
  293. >>>
  294. >>> # Sequential approach
  295. >>> jacobian_rows = [torch.autograd.grad(y, x, v, retain_graph=True)[0]
  296. >>> for v in I_N.unbind()]
  297. >>> jacobian = torch.stack(jacobian_rows)
  298. >>>
  299. >>> # vectorized gradient computation
  300. >>> def get_vjp(v):
  301. >>> return torch.autograd.grad(y, x, v)
  302. >>> jacobian = torch.vmap(get_vjp)(I_N)
  303. :func:`vmap` can also be nested, producing an output with multiple batched dimensions
  304. >>> torch.dot # [D], [D] -> []
  305. >>> batched_dot = torch.vmap(torch.vmap(torch.dot)) # [N1, N0, D], [N1, N0, D] -> [N1, N0]
  306. >>> x, y = torch.randn(2, 3, 5), torch.randn(2, 3, 5)
  307. >>> batched_dot(x, y) # tensor of size [2, 3]
  308. If the inputs are not batched along the first dimension, ``in_dims`` specifies
  309. the dimension that each inputs are batched along as
  310. >>> torch.dot # [N], [N] -> []
  311. >>> batched_dot = torch.vmap(torch.dot, in_dims=1) # [N, D], [N, D] -> [D]
  312. >>> x, y = torch.randn(2, 5), torch.randn(2, 5)
  313. >>> batched_dot(x, y) # output is [5] instead of [2] if batched along the 0th dimension
  314. If there are multiple inputs each of which is batched along different dimensions,
  315. ``in_dims`` must be a tuple with the batch dimension for each input as
  316. >>> torch.dot # [D], [D] -> []
  317. >>> batched_dot = torch.vmap(torch.dot, in_dims=(0, None)) # [N, D], [D] -> [N]
  318. >>> x, y = torch.randn(2, 5), torch.randn(5)
  319. >>> batched_dot(x, y) # second arg doesn't have a batch dim because in_dim[1] was None
  320. If the input is a Python struct, ``in_dims`` must be a tuple containing a struct
  321. matching the shape of the input:
  322. >>> f = lambda dict: torch.dot(dict['x'], dict['y'])
  323. >>> x, y = torch.randn(2, 5), torch.randn(5)
  324. >>> input = {'x': x, 'y': y}
  325. >>> batched_dot = torch.vmap(f, in_dims=({'x': 0, 'y': None},))
  326. >>> batched_dot(input)
  327. By default, the output is batched along the first dimension. However, it can be batched
  328. along any dimension by using ``out_dims``
  329. >>> f = lambda x: x ** 2
  330. >>> x = torch.randn(2, 5)
  331. >>> batched_pow = torch.vmap(f, out_dims=1)
  332. >>> batched_pow(x) # [5, 2]
  333. For any function that uses kwargs, the returned function will not batch the kwargs but will
  334. accept kwargs
  335. >>> x = torch.randn([2, 5])
  336. >>> def fn(x, scale=4.):
  337. >>> return x * scale
  338. >>>
  339. >>> batched_pow = torch.vmap(fn)
  340. >>> assert torch.allclose(batched_pow(x), x * 4)
  341. >>> batched_pow(x, scale=x) # scale is not batched, output has shape [2, 2, 5]
  342. .. note::
  343. vmap does not provide general autobatching or handle variable-length
  344. sequences out of the box.
  345. """
  346. _check_randomness_arg(randomness)
  347. if not (chunk_size is None or chunk_size > 0):
  348. raise ValueError(f"vmap: chunk_size should be None or greater than 0. (got {chunk_size})")
  349. @functools.wraps(func)
  350. def wrapped(*args, **kwargs):
  351. lazy_load_decompositions()
  352. _check_out_dims_is_int_or_int_pytree(out_dims, func)
  353. batch_size, flat_in_dims, flat_args, args_spec = _process_batched_inputs(in_dims, args, func)
  354. if chunk_size is not None:
  355. chunks_flat_args = _get_chunked_inputs(flat_args, flat_in_dims, batch_size, chunk_size)
  356. return _chunked_vmap(func, flat_in_dims, chunks_flat_args,
  357. args_spec, out_dims, randomness, **kwargs)
  358. # If chunk_size is not specified.
  359. return _flat_vmap(
  360. func, batch_size, flat_in_dims, flat_args, args_spec, out_dims, randomness, **kwargs
  361. )
  362. return wrapped
  363. def get_chunk_sizes(total_elems, chunk_size):
  364. n_chunks = n_chunks = total_elems // chunk_size
  365. chunk_sizes = [chunk_size] * n_chunks
  366. # remainder chunk
  367. remainder = total_elems % chunk_size
  368. if remainder != 0:
  369. chunk_sizes.append(remainder)
  370. return chunk_sizes
  371. def _get_chunked_inputs(flat_args, flat_in_dims, batch_size, chunk_size):
  372. split_idxs = (batch_size,)
  373. if chunk_size is not None:
  374. chunk_sizes = get_chunk_sizes(batch_size, chunk_size)
  375. split_idxs = tuple(itertools.accumulate(chunk_sizes))
  376. flat_args_chunks = tuple(
  377. t.tensor_split(split_idxs, dim=in_dim) if in_dim is not None else [t, ] * len(split_idxs)
  378. for t, in_dim in zip(flat_args, flat_in_dims)
  379. )
  380. # transpose chunk dim and flatten structure
  381. # chunks_flat_args is a list of flatten args
  382. chunks_flat_args = zip(*flat_args_chunks)
  383. return chunks_flat_args
  384. def _flatten_chunks_output(chunks_output_):
  385. # chunks_output is a list of chunked outputs
  386. # flatten chunked outputs:
  387. flat_chunks_output = []
  388. arg_spec = None
  389. for output in chunks_output_:
  390. flat_output, arg_specs = tree_flatten(output)
  391. flat_chunks_output.append(flat_output)
  392. if arg_spec is None:
  393. arg_spec = arg_specs
  394. # transpose chunk dim and flatten structure
  395. # flat_output_chunks is flat list of chunks
  396. flat_output_chunks = list(zip(*flat_chunks_output))
  397. return flat_output_chunks, arg_spec
  398. def _concat_chunked_outputs(out_dims, arg_spec, flat_output_chunks):
  399. # concat chunks on out_dim
  400. flat_out_dims = _broadcast_to_and_flatten(out_dims, arg_spec)
  401. assert len(flat_out_dims) == len(flat_output_chunks)
  402. flat_output = []
  403. for idx, out_dim in enumerate(flat_out_dims):
  404. flat_output.append(torch.cat(flat_output_chunks[idx], dim=out_dim))
  405. # release tensors
  406. flat_output_chunks[idx] = None
  407. return flat_output
  408. # Applies vmap on chunked_input and returns concatenated output over the chunks.
  409. def _chunked_vmap(func, flat_in_dims, chunks_flat_args, args_spec, out_dims, randomness, **kwargs):
  410. chunks_output = []
  411. rs = torch.get_rng_state() if randomness == "same" else None
  412. for flat_args in chunks_flat_args:
  413. batch_size = _validate_and_get_batch_size(flat_in_dims, flat_args)
  414. # The way we compute split the input in `_get_chunked_inputs`,
  415. # we may get a tensor with `0` batch-size. We skip any computation
  416. # in that case.
  417. # Eg.
  418. # >>> chunk_size = 1
  419. # >>> batch_size = 6
  420. # >>> t = torch.zeros(batch_size, 1)
  421. # >>> t.tensor_split([1, 2, 3, 4, 5, 6])
  422. # (tensor([[0.]]), tensor([[0.]]), tensor([[0.]]), tensor([[0.]]),
  423. # tensor([[0.]]), tensor([[0.]]), tensor([], size=(0, 1)))
  424. if batch_size == 0:
  425. continue
  426. if rs is not None:
  427. torch.set_rng_state(rs)
  428. chunks_output.append(
  429. _flat_vmap(
  430. func, batch_size, flat_in_dims, flat_args, args_spec, out_dims, randomness, **kwargs
  431. )
  432. )
  433. flat_output_chunks, arg_spec = _flatten_chunks_output(chunks_output)
  434. # chunked output tensors are held by both `flat_output_chunks` and `chunks_output`.
  435. # eagerly remove the reference from `chunks_output`.
  436. del chunks_output
  437. # concat chunks on out_dim
  438. flat_output = _concat_chunked_outputs(out_dims, arg_spec, flat_output_chunks)
  439. # finally unflatten the output
  440. return tree_unflatten(flat_output, arg_spec)
  441. def chunk_vmap(
  442. func: Callable,
  443. in_dims: in_dims_t = 0,
  444. out_dims: out_dims_t = 0,
  445. randomness: str = 'error',
  446. chunks=2) -> Callable:
  447. """
  448. chunk_vmap is the vectorizing map (vmap) using chunks of input data. It is a mix of vmap (which vectorizes
  449. everything) and map (which executes things sequentially). ``chunk_vmap`` vectorizes the input with number of
  450. chunks at a time. For more details about vectorizing map, see :func:`vmap`.
  451. .. note::
  452. Please use :func:`vmap` with ``chunk_size`` argument instead of this API.
  453. Args:
  454. func (function): A Python function that takes one or more arguments.
  455. Must return one or more Tensors.
  456. in_dims (int or nested structure): Specifies which dimension of the
  457. inputs should be mapped over. ``in_dims`` should have a
  458. structure like the inputs. If the ``in_dim`` for a particular
  459. input is None, then that indicates there is no map dimension.
  460. Default: 0.
  461. out_dims (int or Tuple[int]): Specifies where the mapped dimension
  462. should appear in the outputs. If ``out_dims`` is a Tuple, then
  463. it should have one element per output. Default: 0.
  464. randomness (str): Specifies whether the randomness in this
  465. vmap should be the same or different across batches. If 'different',
  466. the randomness for each batch will be different. If 'same', the
  467. randomness will be the same across batches. If 'error', any calls to
  468. random functions will error. Default: 'error'. WARNING: this flag
  469. only applies to random PyTorch operations and does not apply to
  470. Python's random module or numpy randomness.
  471. chunks (int): Number of chunks to use to split the input data. Default is 2.
  472. If equals to 1 then :func:`vmap` is called.
  473. Returns:
  474. Returns a new "batched" function. It takes the same inputs as
  475. ``func``, except each input has an extra dimension at the index
  476. specified by ``in_dims``. It takes returns the same outputs as
  477. ``func``, except each output has an extra dimension at the index
  478. specified by ``out_dims``.
  479. """
  480. _check_randomness_arg(randomness)
  481. if chunks == 1:
  482. return vmap(func, in_dims=in_dims, out_dims=out_dims, randomness=randomness)
  483. def _get_chunk_flat_args(flat_args_, flat_in_dims_, chunks_):
  484. flat_args_chunks = tuple(
  485. t.chunk(chunks_, dim=in_dim) if in_dim is not None else [t, ] * chunks_
  486. for t, in_dim in zip(flat_args_, flat_in_dims_)
  487. )
  488. # transpose chunk dim and flatten structure
  489. # chunks_flat_args is a list of flatten args
  490. chunks_flat_args = zip(*flat_args_chunks)
  491. return chunks_flat_args
  492. @functools.wraps(func)
  493. def wrapped_with_chunks(*args, **kwargs):
  494. _check_out_dims_is_int_or_int_pytree(out_dims, func)
  495. _, flat_in_dims, flat_args, args_spec = _process_batched_inputs(in_dims, args, func)
  496. # Chunk flat arguments
  497. chunks_flat_args = _get_chunk_flat_args(flat_args, flat_in_dims, chunks)
  498. # Apply vmap on chunks
  499. return _chunked_vmap(func, flat_in_dims, chunks_flat_args, args_spec, out_dims, randomness, **kwargs)
  500. return wrapped_with_chunks
  501. # Vmap refactored helper funcions:
  502. def _check_randomness_arg(randomness):
  503. if randomness not in ['error', 'different', 'same']:
  504. raise RuntimeError(f"Only allowed values for randomness are 'error', 'different', or 'same'. Got {randomness}")
  505. @doesnt_support_saved_tensors_hooks
  506. def _flat_vmap(func, batch_size, flat_in_dims, flat_args, args_spec, out_dims, randomness, **kwargs):
  507. vmap_level = _vmap_increment_nesting(batch_size, randomness)
  508. try:
  509. batched_inputs = _create_batched_inputs(flat_in_dims, flat_args, vmap_level, args_spec)
  510. batched_outputs = func(*batched_inputs, **kwargs)
  511. return _unwrap_batched(batched_outputs, out_dims, vmap_level, batch_size, func)
  512. finally:
  513. _vmap_decrement_nesting()
  514. # `restore_vmap` is a private helper function. It is vmap but has the following
  515. # differences:
  516. # - instead of returning outputs, it returns an (outputs, out_dims) tuple.
  517. # out_dims is a pytree of same shape as outputs and contains Optional[int]
  518. # specifying where the vmapped dimension, if it exists, is in the corresponding output.
  519. # - does no validation on in_dims or inputs (vmap expects at least one Tensor to be vmapped).
  520. # restore_vmap allows for no inputs to have the vmap dimension
  521. # - does no validation on outputs (vmap expects only Tensor outputs)
  522. # restore_vmap allows for return of arbitrary outputs (not just Tensors)
  523. #
  524. # The TL;DR is that restore_vmap is more general than vmap and has a slightly
  525. # different API. The relaxations are so that we can "pause" vmap in the middle
  526. # of its execution and then "restore" it later (this is what we do in
  527. # the generate_vmap_rule=True implementation of autograd.Function).
  528. #
  529. # restore_vmap can be technically used in the implementation of vmap, but doing
  530. # that refactor is a bit technically challenging because:
  531. # - vmap couples the tensor-wrapping code with error checking
  532. # - vmap's tensor unwrapping code is in C++; we would need to rewrite part of it
  533. # in python because it overlaps with unwrap_batched
  534. @doesnt_support_saved_tensors_hooks
  535. def restore_vmap(func, in_dims, batch_size, randomness):
  536. def inner(*args, **kwargs):
  537. vmap_level = _vmap_increment_nesting(batch_size, randomness)
  538. try:
  539. batched_inputs = wrap_batched(args, in_dims, vmap_level)
  540. batched_outputs = func(*batched_inputs, **kwargs)
  541. return unwrap_batched(batched_outputs, vmap_level)
  542. finally:
  543. _vmap_decrement_nesting()
  544. return inner
  545. def wrap_batched(args, bdims, level):
  546. flat_args, spec = tree_flatten(args)
  547. flat_bdims = _broadcast_to_and_flatten(bdims, spec)
  548. assert flat_bdims is not None
  549. result = _create_batched_inputs(flat_bdims, flat_args, level, spec)
  550. return result
  551. def unwrap_batched(args, level):
  552. flat_args, spec = tree_flatten(args)
  553. if len(flat_args) == 0:
  554. return args, ()
  555. result = [torch._C._functorch._unwrap_batched(arg, level) if isinstance(arg, torch.Tensor)
  556. else (arg, None) for arg in flat_args]
  557. output, bdims = zip(*result)
  558. return tree_unflatten(output, spec), tree_unflatten(bdims, spec)