bytecode_analysis.py 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. import dataclasses
  2. import dis
  3. import sys
  4. from numbers import Real
  5. TERMINAL_OPCODES = {
  6. dis.opmap["RETURN_VALUE"],
  7. dis.opmap["JUMP_FORWARD"],
  8. dis.opmap["RAISE_VARARGS"],
  9. # TODO(jansel): double check exception handling
  10. }
  11. if sys.version_info >= (3, 9):
  12. TERMINAL_OPCODES.add(dis.opmap["RERAISE"])
  13. if sys.version_info >= (3, 11):
  14. TERMINAL_OPCODES.add(dis.opmap["JUMP_BACKWARD"])
  15. else:
  16. TERMINAL_OPCODES.add(dis.opmap["JUMP_ABSOLUTE"])
  17. JUMP_OPCODES = set(dis.hasjrel + dis.hasjabs)
  18. JUMP_OPNAMES = {dis.opname[opcode] for opcode in JUMP_OPCODES}
  19. HASLOCAL = set(dis.haslocal)
  20. HASFREE = set(dis.hasfree)
  21. stack_effect = dis.stack_effect
  22. def remove_dead_code(instructions):
  23. """Dead code elimination"""
  24. indexof = {id(inst): i for i, inst in enumerate(instructions)}
  25. live_code = set()
  26. def find_live_code(start):
  27. for i in range(start, len(instructions)):
  28. if i in live_code:
  29. return
  30. live_code.add(i)
  31. inst = instructions[i]
  32. if inst.opcode in JUMP_OPCODES:
  33. find_live_code(indexof[id(inst.target)])
  34. if inst.opcode in TERMINAL_OPCODES:
  35. return
  36. find_live_code(0)
  37. return [inst for i, inst in enumerate(instructions) if i in live_code]
  38. def remove_pointless_jumps(instructions):
  39. """Eliminate jumps to the next instruction"""
  40. pointless_jumps = {
  41. id(a)
  42. for a, b in zip(instructions, instructions[1:])
  43. if a.opname == "JUMP_ABSOLUTE" and a.target is b
  44. }
  45. return [inst for inst in instructions if id(inst) not in pointless_jumps]
  46. def propagate_line_nums(instructions):
  47. """Ensure every instruction has line number set in case some are removed"""
  48. cur_line_no = None
  49. def populate_line_num(inst):
  50. nonlocal cur_line_no
  51. if inst.starts_line:
  52. cur_line_no = inst.starts_line
  53. inst.starts_line = cur_line_no
  54. for inst in instructions:
  55. populate_line_num(inst)
  56. def remove_extra_line_nums(instructions):
  57. """Remove extra starts line properties before packing bytecode"""
  58. cur_line_no = None
  59. def remove_line_num(inst):
  60. nonlocal cur_line_no
  61. if inst.starts_line is None:
  62. return
  63. elif inst.starts_line == cur_line_no:
  64. inst.starts_line = None
  65. else:
  66. cur_line_no = inst.starts_line
  67. for inst in instructions:
  68. remove_line_num(inst)
  69. @dataclasses.dataclass
  70. class ReadsWrites:
  71. reads: set
  72. writes: set
  73. visited: set
  74. def livevars_analysis(instructions, instruction):
  75. indexof = {id(inst): i for i, inst in enumerate(instructions)}
  76. must = ReadsWrites(set(), set(), set())
  77. may = ReadsWrites(set(), set(), set())
  78. def walk(state, start):
  79. if start in state.visited:
  80. return
  81. state.visited.add(start)
  82. for i in range(start, len(instructions)):
  83. inst = instructions[i]
  84. if inst.opcode in HASLOCAL or inst.opcode in HASFREE:
  85. if "LOAD" in inst.opname or "DELETE" in inst.opname:
  86. if inst.argval not in must.writes:
  87. state.reads.add(inst.argval)
  88. elif "STORE" in inst.opname:
  89. state.writes.add(inst.argval)
  90. else:
  91. raise NotImplementedError(f"unhandled {inst.opname}")
  92. if inst.opcode in JUMP_OPCODES:
  93. walk(may, indexof[id(inst.target)])
  94. state = may
  95. if inst.opcode in TERMINAL_OPCODES:
  96. return
  97. walk(must, indexof[id(instruction)])
  98. return must.reads | may.reads
  99. @dataclasses.dataclass
  100. class FixedPointBox:
  101. value: bool = True
  102. @dataclasses.dataclass
  103. class StackSize:
  104. low: Real
  105. high: Real
  106. fixed_point: FixedPointBox
  107. def zero(self):
  108. self.low = 0
  109. self.high = 0
  110. self.fixed_point.value = False
  111. def offset_of(self, other, n):
  112. prior = (self.low, self.high)
  113. self.low = min(self.low, other.low + n)
  114. self.high = max(self.high, other.high + n)
  115. if (self.low, self.high) != prior:
  116. self.fixed_point.value = False
  117. def stacksize_analysis(instructions):
  118. assert instructions
  119. fixed_point = FixedPointBox()
  120. stack_sizes = {
  121. inst: StackSize(float("inf"), float("-inf"), fixed_point)
  122. for inst in instructions
  123. }
  124. stack_sizes[instructions[0]].zero()
  125. for _ in range(100):
  126. if fixed_point.value:
  127. break
  128. fixed_point.value = True
  129. for inst, next_inst in zip(instructions, instructions[1:] + [None]):
  130. stack_size = stack_sizes[inst]
  131. if inst.opcode not in TERMINAL_OPCODES:
  132. assert next_inst is not None, f"missing next inst: {inst}"
  133. stack_sizes[next_inst].offset_of(
  134. stack_size, stack_effect(inst.opcode, inst.arg, jump=False)
  135. )
  136. if inst.opcode in JUMP_OPCODES:
  137. stack_sizes[inst.target].offset_of(
  138. stack_size, stack_effect(inst.opcode, inst.arg, jump=True)
  139. )
  140. if False:
  141. for inst in instructions:
  142. stack_size = stack_sizes[inst]
  143. print(stack_size.low, stack_size.high, inst)
  144. low = min([x.low for x in stack_sizes.values()])
  145. high = max([x.high for x in stack_sizes.values()])
  146. assert fixed_point.value, "failed to reach fixed point"
  147. assert low >= 0
  148. return high