logging_tensor.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. import torch
  2. from torch.utils._pytree import tree_map
  3. from typing import Iterator, List
  4. import logging
  5. import contextlib
  6. import itertools
  7. from torch.utils._python_dispatch import TorchDispatchMode
  8. # How the chain of calls works for LoggingTensor:
  9. # 1. Call torch.sin
  10. # 2. Attempt __torch_function__. In LoggingTensor torch function is disabled so we bypass it entirely
  11. # 3. Enter dispatcher, wind your way through Autograd
  12. # 4. Hit Python dispatch key, call __torch_dispatch__
  13. # This Tensor can work with autograd in two ways:
  14. # - The wrapped Tensor does not require gradients. In that case, the LoggingTensor
  15. # can require gradients if the user asks for it as a constructor kwarg.
  16. # - The wrapped Tensor can require gradients. In that case autograd will be tracked
  17. # for the wrapped Tensor and the LoggingTensor itself cannot require gradients.
  18. # WARNING: We allow these two possibilities for testing purposes. You should NEVER use both in a single
  19. # test or you might get surprising behavior.
  20. # TODO: TensorBase should work
  21. class LoggingTensor(torch.Tensor):
  22. elem: torch.Tensor
  23. __slots__ = ['elem']
  24. context = contextlib.nullcontext
  25. __torch_function__ = torch._C._disabled_torch_function_impl
  26. @staticmethod
  27. def __new__(cls, elem, *args, **kwargs):
  28. # The wrapping tensor (LoggingTensor) shouldn't hold any
  29. # memory for the class in question, but it should still
  30. # advertise the same device as before
  31. r = torch.Tensor._make_wrapper_subclass( # type: ignore[attr-defined]
  32. cls, elem.size(),
  33. strides=elem.stride(), storage_offset=elem.storage_offset(),
  34. # TODO: clone storage aliasing
  35. dtype=elem.dtype, layout=elem.layout,
  36. device=elem.device, requires_grad=kwargs.get("requires_grad", False)
  37. )
  38. # ...the real tensor is held as an element on the tensor.
  39. r.elem = elem.detach() if r.requires_grad else elem
  40. return r
  41. def __repr__(self):
  42. return super().__repr__(tensor_contents=f"{self.elem}")
  43. @classmethod
  44. def __torch_dispatch__(cls, func, types, args=(), kwargs=None):
  45. def unwrap(e):
  46. return e.elem if isinstance(e, cls) else e
  47. def wrap(e):
  48. return cls(e) if isinstance(e, torch.Tensor) else e
  49. with cls.context():
  50. rs = tree_map(wrap, func(*tree_map(unwrap, args), **tree_map(unwrap, kwargs)))
  51. logging.getLogger("LoggingTensor").info(f"{func.__module__}.{func.__name__}", args, kwargs, rs)
  52. return rs
  53. class LoggingTensorMode(TorchDispatchMode):
  54. def __torch_dispatch__(self, func, types, args=(), kwargs=None):
  55. if kwargs is None:
  56. kwargs = {}
  57. rs = func(*args, **kwargs)
  58. logging.getLogger("LoggingTensor").info(f"{func.__module__}.{func.__name__}", args, kwargs, rs)
  59. return rs
  60. class LoggingTensorReentrant(LoggingTensor):
  61. context = torch.overrides.enable_reentrant_dispatch
  62. # https://stackoverflow.com/questions/36408496/python-logging-handler-to-append-to-list
  63. class LoggingTensorHandler(logging.Handler):
  64. log_list: List[str]
  65. next_shortid: int
  66. def __init__(self, log_list: List[str], use_shortid_for_all_tensors: bool) -> None:
  67. logging.Handler.__init__(self)
  68. self.log_list = log_list
  69. self.next_shortid = 0
  70. self.use_shortid_for_all_tensors = use_shortid_for_all_tensors
  71. # WARNING: not deterministic over multiple threads, this matters for
  72. # autograd
  73. def _shortid(self, o: object) -> int:
  74. if not hasattr(o, '_shortid'):
  75. o._shortid = self.next_shortid # type: ignore[attr-defined]
  76. self.next_shortid += 1
  77. return o._shortid # type: ignore[attr-defined]
  78. def _fmt(self, a: object) -> str:
  79. cond_cls = torch.Tensor if self.use_shortid_for_all_tensors else LoggingTensor
  80. return f'${self._shortid(a)}' if isinstance(a, cond_cls) else repr(a)
  81. def emit(self, record):
  82. fmt_args = ", ".join(itertools.chain(
  83. (self._fmt(a) for a in record.args[0]),
  84. (f"{k}={self._fmt(v)}" for k, v in record.args[1].items())
  85. ))
  86. fmt_rets = ", ".join(self._fmt(a) for a in record.args[2]) \
  87. if isinstance(record.args[2], (list, tuple)) else self._fmt(record.args[2])
  88. self.log_list.append(f'{fmt_rets} = {record.msg}({fmt_args})')
  89. def log_input(name: str, var: object):
  90. logging.getLogger("LoggingTensor").info("input", (name,), {}, (var,))
  91. @contextlib.contextmanager
  92. def capture_logs(is_mode=False) -> Iterator[List[str]]:
  93. logger = logging.getLogger("LoggingTensor")
  94. log_list: List[str] = []
  95. handler = LoggingTensorHandler(log_list, use_shortid_for_all_tensors=is_mode)
  96. logger.addHandler(handler)
  97. logger.setLevel(logging.INFO)
  98. logger.propagate = False
  99. try:
  100. yield log_list
  101. finally:
  102. logger.removeHandler(handler)
  103. @contextlib.contextmanager
  104. def capture_logs_with_logging_tensor_mode():
  105. with LoggingTensorMode(), capture_logs(True) as logs:
  106. yield logs