relocate.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380
  1. """Helper script to package wheels and relocate binaries."""
  2. import glob
  3. import hashlib
  4. # Standard library imports
  5. import os
  6. import os.path as osp
  7. import platform
  8. import shutil
  9. import subprocess
  10. import sys
  11. import zipfile
  12. from base64 import urlsafe_b64encode
  13. # Third party imports
  14. if sys.platform == "linux":
  15. from auditwheel.lddtree import lddtree
  16. ALLOWLIST = {
  17. "libgcc_s.so.1",
  18. "libstdc++.so.6",
  19. "libm.so.6",
  20. "libdl.so.2",
  21. "librt.so.1",
  22. "libc.so.6",
  23. "libnsl.so.1",
  24. "libutil.so.1",
  25. "libpthread.so.0",
  26. "libresolv.so.2",
  27. "libX11.so.6",
  28. "libXext.so.6",
  29. "libXrender.so.1",
  30. "libICE.so.6",
  31. "libSM.so.6",
  32. "libGL.so.1",
  33. "libgobject-2.0.so.0",
  34. "libgthread-2.0.so.0",
  35. "libglib-2.0.so.0",
  36. "ld-linux-x86-64.so.2",
  37. "ld-2.17.so",
  38. }
  39. WINDOWS_ALLOWLIST = {
  40. "MSVCP140.dll",
  41. "KERNEL32.dll",
  42. "VCRUNTIME140_1.dll",
  43. "VCRUNTIME140.dll",
  44. "api-ms-win-crt-heap-l1-1-0.dll",
  45. "api-ms-win-crt-runtime-l1-1-0.dll",
  46. "api-ms-win-crt-stdio-l1-1-0.dll",
  47. "api-ms-win-crt-filesystem-l1-1-0.dll",
  48. "api-ms-win-crt-string-l1-1-0.dll",
  49. "api-ms-win-crt-environment-l1-1-0.dll",
  50. "api-ms-win-crt-math-l1-1-0.dll",
  51. "api-ms-win-crt-convert-l1-1-0.dll",
  52. }
  53. HERE = osp.dirname(osp.abspath(__file__))
  54. PACKAGE_ROOT = osp.dirname(osp.dirname(HERE))
  55. PLATFORM_ARCH = platform.machine()
  56. PYTHON_VERSION = sys.version_info
  57. def rehash(path, blocksize=1 << 20):
  58. """Return (hash, length) for path using hashlib.sha256()"""
  59. h = hashlib.sha256()
  60. length = 0
  61. with open(path, "rb") as f:
  62. while block := f.read(blocksize):
  63. length += len(block)
  64. h.update(block)
  65. digest = "sha256=" + urlsafe_b64encode(h.digest()).decode("latin1").rstrip("=")
  66. # unicode/str python2 issues
  67. return (digest, str(length)) # type: ignore
  68. def unzip_file(file, dest):
  69. """Decompress zip `file` into directory `dest`."""
  70. with zipfile.ZipFile(file, "r") as zip_ref:
  71. zip_ref.extractall(dest)
  72. def is_program_installed(basename):
  73. """
  74. Return program absolute path if installed in PATH.
  75. Otherwise, return None
  76. On macOS systems, a .app is considered installed if
  77. it exists.
  78. """
  79. if sys.platform == "darwin" and basename.endswith(".app") and osp.exists(basename):
  80. return basename
  81. for path in os.environ["PATH"].split(os.pathsep):
  82. abspath = osp.join(path, basename)
  83. if osp.isfile(abspath):
  84. return abspath
  85. def find_program(basename):
  86. """
  87. Find program in PATH and return absolute path
  88. Try adding .exe or .bat to basename on Windows platforms
  89. (return None if not found)
  90. """
  91. names = [basename]
  92. if os.name == "nt":
  93. # Windows platforms
  94. extensions = (".exe", ".bat", ".cmd", ".dll")
  95. if not basename.endswith(extensions):
  96. names = [basename + ext for ext in extensions] + [basename]
  97. for name in names:
  98. path = is_program_installed(name)
  99. if path:
  100. return path
  101. def patch_new_path(library_path, new_dir):
  102. library = osp.basename(library_path)
  103. name, *rest = library.split(".")
  104. rest = ".".join(rest)
  105. hash_id = hashlib.sha256(library_path.encode("utf-8")).hexdigest()[:8]
  106. new_name = ".".join([name, hash_id, rest])
  107. return osp.join(new_dir, new_name)
  108. def find_dll_dependencies(dumpbin, binary):
  109. out = subprocess.run([dumpbin, "/dependents", binary], stdout=subprocess.PIPE)
  110. out = out.stdout.strip().decode("utf-8")
  111. start_index = out.find("dependencies:") + len("dependencies:")
  112. end_index = out.find("Summary")
  113. dlls = out[start_index:end_index].strip()
  114. dlls = dlls.split(os.linesep)
  115. dlls = [dll.strip() for dll in dlls]
  116. return dlls
  117. def relocate_elf_library(patchelf, output_dir, output_library, binary):
  118. """
  119. Relocate an ELF shared library to be packaged on a wheel.
  120. Given a shared library, find the transitive closure of its dependencies,
  121. rename and copy them into the wheel while updating their respective rpaths.
  122. """
  123. print(f"Relocating {binary}")
  124. binary_path = osp.join(output_library, binary)
  125. ld_tree = lddtree(binary_path)
  126. tree_libs = ld_tree["libs"]
  127. binary_queue = [(n, binary) for n in ld_tree["needed"]]
  128. binary_paths = {binary: binary_path}
  129. binary_dependencies = {}
  130. while binary_queue != []:
  131. library, parent = binary_queue.pop(0)
  132. library_info = tree_libs[library]
  133. print(library)
  134. if library_info["path"] is None:
  135. print(f"Omitting {library}")
  136. continue
  137. if library in ALLOWLIST:
  138. # Omit glibc/gcc/system libraries
  139. print(f"Omitting {library}")
  140. continue
  141. parent_dependencies = binary_dependencies.get(parent, [])
  142. parent_dependencies.append(library)
  143. binary_dependencies[parent] = parent_dependencies
  144. if library in binary_paths:
  145. continue
  146. binary_paths[library] = library_info["path"]
  147. binary_queue += [(n, library) for n in library_info["needed"]]
  148. print("Copying dependencies to wheel directory")
  149. new_libraries_path = osp.join(output_dir, "torchvision.libs")
  150. os.makedirs(new_libraries_path, exist_ok=True)
  151. new_names = {binary: binary_path}
  152. for library in binary_paths:
  153. if library != binary:
  154. library_path = binary_paths[library]
  155. new_library_path = patch_new_path(library_path, new_libraries_path)
  156. print(f"{library} -> {new_library_path}")
  157. shutil.copyfile(library_path, new_library_path)
  158. new_names[library] = new_library_path
  159. print("Updating dependency names by new files")
  160. for library in binary_paths:
  161. if library != binary:
  162. if library not in binary_dependencies:
  163. continue
  164. library_dependencies = binary_dependencies[library]
  165. new_library_name = new_names[library]
  166. for dep in library_dependencies:
  167. new_dep = osp.basename(new_names[dep])
  168. print(f"{library}: {dep} -> {new_dep}")
  169. subprocess.check_output(
  170. [patchelf, "--replace-needed", dep, new_dep, new_library_name], cwd=new_libraries_path
  171. )
  172. print("Updating library rpath")
  173. subprocess.check_output([patchelf, "--set-rpath", "$ORIGIN", new_library_name], cwd=new_libraries_path)
  174. subprocess.check_output([patchelf, "--print-rpath", new_library_name], cwd=new_libraries_path)
  175. print("Update library dependencies")
  176. library_dependencies = binary_dependencies[binary]
  177. for dep in library_dependencies:
  178. new_dep = osp.basename(new_names[dep])
  179. print(f"{binary}: {dep} -> {new_dep}")
  180. subprocess.check_output([patchelf, "--replace-needed", dep, new_dep, binary], cwd=output_library)
  181. print("Update library rpath")
  182. subprocess.check_output(
  183. [patchelf, "--set-rpath", "$ORIGIN:$ORIGIN/../torchvision.libs", binary_path], cwd=output_library
  184. )
  185. def relocate_dll_library(dumpbin, output_dir, output_library, binary):
  186. """
  187. Relocate a DLL/PE shared library to be packaged on a wheel.
  188. Given a shared library, find the transitive closure of its dependencies,
  189. rename and copy them into the wheel.
  190. """
  191. print(f"Relocating {binary}")
  192. binary_path = osp.join(output_library, binary)
  193. library_dlls = find_dll_dependencies(dumpbin, binary_path)
  194. binary_queue = [(dll, binary) for dll in library_dlls]
  195. binary_paths = {binary: binary_path}
  196. binary_dependencies = {}
  197. while binary_queue != []:
  198. library, parent = binary_queue.pop(0)
  199. if library in WINDOWS_ALLOWLIST or library.startswith("api-ms-win"):
  200. print(f"Omitting {library}")
  201. continue
  202. library_path = find_program(library)
  203. if library_path is None:
  204. print(f"{library} not found")
  205. continue
  206. if osp.basename(osp.dirname(library_path)) == "system32":
  207. continue
  208. print(f"{library}: {library_path}")
  209. parent_dependencies = binary_dependencies.get(parent, [])
  210. parent_dependencies.append(library)
  211. binary_dependencies[parent] = parent_dependencies
  212. if library in binary_paths:
  213. continue
  214. binary_paths[library] = library_path
  215. downstream_dlls = find_dll_dependencies(dumpbin, library_path)
  216. binary_queue += [(n, library) for n in downstream_dlls]
  217. print("Copying dependencies to wheel directory")
  218. package_dir = osp.join(output_dir, "torchvision")
  219. for library in binary_paths:
  220. if library != binary:
  221. library_path = binary_paths[library]
  222. new_library_path = osp.join(package_dir, library)
  223. print(f"{library} -> {new_library_path}")
  224. shutil.copyfile(library_path, new_library_path)
  225. def compress_wheel(output_dir, wheel, wheel_dir, wheel_name):
  226. """Create RECORD file and compress wheel distribution."""
  227. print("Update RECORD file in wheel")
  228. dist_info = glob.glob(osp.join(output_dir, "*.dist-info"))[0]
  229. record_file = osp.join(dist_info, "RECORD")
  230. with open(record_file, "w") as f:
  231. for root, _, files in os.walk(output_dir):
  232. for this_file in files:
  233. full_file = osp.join(root, this_file)
  234. rel_file = osp.relpath(full_file, output_dir)
  235. if full_file == record_file:
  236. f.write(f"{rel_file},,\n")
  237. else:
  238. digest, size = rehash(full_file)
  239. f.write(f"{rel_file},{digest},{size}\n")
  240. print("Compressing wheel")
  241. base_wheel_name = osp.join(wheel_dir, wheel_name)
  242. shutil.make_archive(base_wheel_name, "zip", output_dir)
  243. os.remove(wheel)
  244. shutil.move(f"{base_wheel_name}.zip", wheel)
  245. shutil.rmtree(output_dir)
  246. def patch_linux():
  247. # Get patchelf location
  248. patchelf = find_program("patchelf")
  249. if patchelf is None:
  250. raise FileNotFoundError("Patchelf was not found in the system, please make sure that is available on the PATH.")
  251. # Find wheel
  252. print("Finding wheels...")
  253. wheels = glob.glob(osp.join(PACKAGE_ROOT, "dist", "*.whl"))
  254. output_dir = osp.join(PACKAGE_ROOT, "dist", ".wheel-process")
  255. image_binary = "image.so"
  256. video_binary = "video_reader.so"
  257. torchvision_binaries = [image_binary, video_binary]
  258. for wheel in wheels:
  259. if osp.exists(output_dir):
  260. shutil.rmtree(output_dir)
  261. os.makedirs(output_dir)
  262. print("Unzipping wheel...")
  263. wheel_file = osp.basename(wheel)
  264. wheel_dir = osp.dirname(wheel)
  265. print(f"{wheel_file}")
  266. wheel_name, _ = osp.splitext(wheel_file)
  267. unzip_file(wheel, output_dir)
  268. print("Finding ELF dependencies...")
  269. output_library = osp.join(output_dir, "torchvision")
  270. for binary in torchvision_binaries:
  271. if osp.exists(osp.join(output_library, binary)):
  272. relocate_elf_library(patchelf, output_dir, output_library, binary)
  273. compress_wheel(output_dir, wheel, wheel_dir, wheel_name)
  274. def patch_win():
  275. # Get dumpbin location
  276. dumpbin = find_program("dumpbin")
  277. if dumpbin is None:
  278. raise FileNotFoundError("Dumpbin was not found in the system, please make sure that is available on the PATH.")
  279. # Find wheel
  280. print("Finding wheels...")
  281. wheels = glob.glob(osp.join(PACKAGE_ROOT, "dist", "*.whl"))
  282. output_dir = osp.join(PACKAGE_ROOT, "dist", ".wheel-process")
  283. image_binary = "image.pyd"
  284. video_binary = "video_reader.pyd"
  285. torchvision_binaries = [image_binary, video_binary]
  286. for wheel in wheels:
  287. if osp.exists(output_dir):
  288. shutil.rmtree(output_dir)
  289. os.makedirs(output_dir)
  290. print("Unzipping wheel...")
  291. wheel_file = osp.basename(wheel)
  292. wheel_dir = osp.dirname(wheel)
  293. print(f"{wheel_file}")
  294. wheel_name, _ = osp.splitext(wheel_file)
  295. unzip_file(wheel, output_dir)
  296. print("Finding DLL/PE dependencies...")
  297. output_library = osp.join(output_dir, "torchvision")
  298. for binary in torchvision_binaries:
  299. if osp.exists(osp.join(output_library, binary)):
  300. relocate_dll_library(dumpbin, output_dir, output_library, binary)
  301. compress_wheel(output_dir, wheel, wheel_dir, wheel_name)
  302. if __name__ == "__main__":
  303. if sys.platform == "linux":
  304. patch_linux()
  305. elif sys.platform == "win32":
  306. patch_win()