123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380 |
- """Helper script to package wheels and relocate binaries."""
- import glob
- import hashlib
- # Standard library imports
- import os
- import os.path as osp
- import platform
- import shutil
- import subprocess
- import sys
- import zipfile
- from base64 import urlsafe_b64encode
- # Third party imports
- if sys.platform == "linux":
- from auditwheel.lddtree import lddtree
- ALLOWLIST = {
- "libgcc_s.so.1",
- "libstdc++.so.6",
- "libm.so.6",
- "libdl.so.2",
- "librt.so.1",
- "libc.so.6",
- "libnsl.so.1",
- "libutil.so.1",
- "libpthread.so.0",
- "libresolv.so.2",
- "libX11.so.6",
- "libXext.so.6",
- "libXrender.so.1",
- "libICE.so.6",
- "libSM.so.6",
- "libGL.so.1",
- "libgobject-2.0.so.0",
- "libgthread-2.0.so.0",
- "libglib-2.0.so.0",
- "ld-linux-x86-64.so.2",
- "ld-2.17.so",
- }
- WINDOWS_ALLOWLIST = {
- "MSVCP140.dll",
- "KERNEL32.dll",
- "VCRUNTIME140_1.dll",
- "VCRUNTIME140.dll",
- "api-ms-win-crt-heap-l1-1-0.dll",
- "api-ms-win-crt-runtime-l1-1-0.dll",
- "api-ms-win-crt-stdio-l1-1-0.dll",
- "api-ms-win-crt-filesystem-l1-1-0.dll",
- "api-ms-win-crt-string-l1-1-0.dll",
- "api-ms-win-crt-environment-l1-1-0.dll",
- "api-ms-win-crt-math-l1-1-0.dll",
- "api-ms-win-crt-convert-l1-1-0.dll",
- }
- HERE = osp.dirname(osp.abspath(__file__))
- PACKAGE_ROOT = osp.dirname(osp.dirname(HERE))
- PLATFORM_ARCH = platform.machine()
- PYTHON_VERSION = sys.version_info
- def rehash(path, blocksize=1 << 20):
- """Return (hash, length) for path using hashlib.sha256()"""
- h = hashlib.sha256()
- length = 0
- with open(path, "rb") as f:
- while block := f.read(blocksize):
- length += len(block)
- h.update(block)
- digest = "sha256=" + urlsafe_b64encode(h.digest()).decode("latin1").rstrip("=")
- # unicode/str python2 issues
- return (digest, str(length)) # type: ignore
- def unzip_file(file, dest):
- """Decompress zip `file` into directory `dest`."""
- with zipfile.ZipFile(file, "r") as zip_ref:
- zip_ref.extractall(dest)
- def is_program_installed(basename):
- """
- Return program absolute path if installed in PATH.
- Otherwise, return None
- On macOS systems, a .app is considered installed if
- it exists.
- """
- if sys.platform == "darwin" and basename.endswith(".app") and osp.exists(basename):
- return basename
- for path in os.environ["PATH"].split(os.pathsep):
- abspath = osp.join(path, basename)
- if osp.isfile(abspath):
- return abspath
- def find_program(basename):
- """
- Find program in PATH and return absolute path
- Try adding .exe or .bat to basename on Windows platforms
- (return None if not found)
- """
- names = [basename]
- if os.name == "nt":
- # Windows platforms
- extensions = (".exe", ".bat", ".cmd", ".dll")
- if not basename.endswith(extensions):
- names = [basename + ext for ext in extensions] + [basename]
- for name in names:
- path = is_program_installed(name)
- if path:
- return path
- def patch_new_path(library_path, new_dir):
- library = osp.basename(library_path)
- name, *rest = library.split(".")
- rest = ".".join(rest)
- hash_id = hashlib.sha256(library_path.encode("utf-8")).hexdigest()[:8]
- new_name = ".".join([name, hash_id, rest])
- return osp.join(new_dir, new_name)
- def find_dll_dependencies(dumpbin, binary):
- out = subprocess.run([dumpbin, "/dependents", binary], stdout=subprocess.PIPE)
- out = out.stdout.strip().decode("utf-8")
- start_index = out.find("dependencies:") + len("dependencies:")
- end_index = out.find("Summary")
- dlls = out[start_index:end_index].strip()
- dlls = dlls.split(os.linesep)
- dlls = [dll.strip() for dll in dlls]
- return dlls
- def relocate_elf_library(patchelf, output_dir, output_library, binary):
- """
- Relocate an ELF shared library to be packaged on a wheel.
- Given a shared library, find the transitive closure of its dependencies,
- rename and copy them into the wheel while updating their respective rpaths.
- """
- print(f"Relocating {binary}")
- binary_path = osp.join(output_library, binary)
- ld_tree = lddtree(binary_path)
- tree_libs = ld_tree["libs"]
- binary_queue = [(n, binary) for n in ld_tree["needed"]]
- binary_paths = {binary: binary_path}
- binary_dependencies = {}
- while binary_queue != []:
- library, parent = binary_queue.pop(0)
- library_info = tree_libs[library]
- print(library)
- if library_info["path"] is None:
- print(f"Omitting {library}")
- continue
- if library in ALLOWLIST:
- # Omit glibc/gcc/system libraries
- print(f"Omitting {library}")
- continue
- parent_dependencies = binary_dependencies.get(parent, [])
- parent_dependencies.append(library)
- binary_dependencies[parent] = parent_dependencies
- if library in binary_paths:
- continue
- binary_paths[library] = library_info["path"]
- binary_queue += [(n, library) for n in library_info["needed"]]
- print("Copying dependencies to wheel directory")
- new_libraries_path = osp.join(output_dir, "torchvision.libs")
- os.makedirs(new_libraries_path, exist_ok=True)
- new_names = {binary: binary_path}
- for library in binary_paths:
- if library != binary:
- library_path = binary_paths[library]
- new_library_path = patch_new_path(library_path, new_libraries_path)
- print(f"{library} -> {new_library_path}")
- shutil.copyfile(library_path, new_library_path)
- new_names[library] = new_library_path
- print("Updating dependency names by new files")
- for library in binary_paths:
- if library != binary:
- if library not in binary_dependencies:
- continue
- library_dependencies = binary_dependencies[library]
- new_library_name = new_names[library]
- for dep in library_dependencies:
- new_dep = osp.basename(new_names[dep])
- print(f"{library}: {dep} -> {new_dep}")
- subprocess.check_output(
- [patchelf, "--replace-needed", dep, new_dep, new_library_name], cwd=new_libraries_path
- )
- print("Updating library rpath")
- subprocess.check_output([patchelf, "--set-rpath", "$ORIGIN", new_library_name], cwd=new_libraries_path)
- subprocess.check_output([patchelf, "--print-rpath", new_library_name], cwd=new_libraries_path)
- print("Update library dependencies")
- library_dependencies = binary_dependencies[binary]
- for dep in library_dependencies:
- new_dep = osp.basename(new_names[dep])
- print(f"{binary}: {dep} -> {new_dep}")
- subprocess.check_output([patchelf, "--replace-needed", dep, new_dep, binary], cwd=output_library)
- print("Update library rpath")
- subprocess.check_output(
- [patchelf, "--set-rpath", "$ORIGIN:$ORIGIN/../torchvision.libs", binary_path], cwd=output_library
- )
- def relocate_dll_library(dumpbin, output_dir, output_library, binary):
- """
- Relocate a DLL/PE shared library to be packaged on a wheel.
- Given a shared library, find the transitive closure of its dependencies,
- rename and copy them into the wheel.
- """
- print(f"Relocating {binary}")
- binary_path = osp.join(output_library, binary)
- library_dlls = find_dll_dependencies(dumpbin, binary_path)
- binary_queue = [(dll, binary) for dll in library_dlls]
- binary_paths = {binary: binary_path}
- binary_dependencies = {}
- while binary_queue != []:
- library, parent = binary_queue.pop(0)
- if library in WINDOWS_ALLOWLIST or library.startswith("api-ms-win"):
- print(f"Omitting {library}")
- continue
- library_path = find_program(library)
- if library_path is None:
- print(f"{library} not found")
- continue
- if osp.basename(osp.dirname(library_path)) == "system32":
- continue
- print(f"{library}: {library_path}")
- parent_dependencies = binary_dependencies.get(parent, [])
- parent_dependencies.append(library)
- binary_dependencies[parent] = parent_dependencies
- if library in binary_paths:
- continue
- binary_paths[library] = library_path
- downstream_dlls = find_dll_dependencies(dumpbin, library_path)
- binary_queue += [(n, library) for n in downstream_dlls]
- print("Copying dependencies to wheel directory")
- package_dir = osp.join(output_dir, "torchvision")
- for library in binary_paths:
- if library != binary:
- library_path = binary_paths[library]
- new_library_path = osp.join(package_dir, library)
- print(f"{library} -> {new_library_path}")
- shutil.copyfile(library_path, new_library_path)
- def compress_wheel(output_dir, wheel, wheel_dir, wheel_name):
- """Create RECORD file and compress wheel distribution."""
- print("Update RECORD file in wheel")
- dist_info = glob.glob(osp.join(output_dir, "*.dist-info"))[0]
- record_file = osp.join(dist_info, "RECORD")
- with open(record_file, "w") as f:
- for root, _, files in os.walk(output_dir):
- for this_file in files:
- full_file = osp.join(root, this_file)
- rel_file = osp.relpath(full_file, output_dir)
- if full_file == record_file:
- f.write(f"{rel_file},,\n")
- else:
- digest, size = rehash(full_file)
- f.write(f"{rel_file},{digest},{size}\n")
- print("Compressing wheel")
- base_wheel_name = osp.join(wheel_dir, wheel_name)
- shutil.make_archive(base_wheel_name, "zip", output_dir)
- os.remove(wheel)
- shutil.move(f"{base_wheel_name}.zip", wheel)
- shutil.rmtree(output_dir)
- def patch_linux():
- # Get patchelf location
- patchelf = find_program("patchelf")
- if patchelf is None:
- raise FileNotFoundError("Patchelf was not found in the system, please make sure that is available on the PATH.")
- # Find wheel
- print("Finding wheels...")
- wheels = glob.glob(osp.join(PACKAGE_ROOT, "dist", "*.whl"))
- output_dir = osp.join(PACKAGE_ROOT, "dist", ".wheel-process")
- image_binary = "image.so"
- video_binary = "video_reader.so"
- torchvision_binaries = [image_binary, video_binary]
- for wheel in wheels:
- if osp.exists(output_dir):
- shutil.rmtree(output_dir)
- os.makedirs(output_dir)
- print("Unzipping wheel...")
- wheel_file = osp.basename(wheel)
- wheel_dir = osp.dirname(wheel)
- print(f"{wheel_file}")
- wheel_name, _ = osp.splitext(wheel_file)
- unzip_file(wheel, output_dir)
- print("Finding ELF dependencies...")
- output_library = osp.join(output_dir, "torchvision")
- for binary in torchvision_binaries:
- if osp.exists(osp.join(output_library, binary)):
- relocate_elf_library(patchelf, output_dir, output_library, binary)
- compress_wheel(output_dir, wheel, wheel_dir, wheel_name)
- def patch_win():
- # Get dumpbin location
- dumpbin = find_program("dumpbin")
- if dumpbin is None:
- raise FileNotFoundError("Dumpbin was not found in the system, please make sure that is available on the PATH.")
- # Find wheel
- print("Finding wheels...")
- wheels = glob.glob(osp.join(PACKAGE_ROOT, "dist", "*.whl"))
- output_dir = osp.join(PACKAGE_ROOT, "dist", ".wheel-process")
- image_binary = "image.pyd"
- video_binary = "video_reader.pyd"
- torchvision_binaries = [image_binary, video_binary]
- for wheel in wheels:
- if osp.exists(output_dir):
- shutil.rmtree(output_dir)
- os.makedirs(output_dir)
- print("Unzipping wheel...")
- wheel_file = osp.basename(wheel)
- wheel_dir = osp.dirname(wheel)
- print(f"{wheel_file}")
- wheel_name, _ = osp.splitext(wheel_file)
- unzip_file(wheel, output_dir)
- print("Finding DLL/PE dependencies...")
- output_library = osp.join(output_dir, "torchvision")
- for binary in torchvision_binaries:
- if osp.exists(osp.join(output_library, binary)):
- relocate_dll_library(dumpbin, output_dir, output_library, binary)
- compress_wheel(output_dir, wheel, wheel_dir, wheel_name)
- if __name__ == "__main__":
- if sys.platform == "linux":
- patch_linux()
- elif sys.platform == "win32":
- patch_win()
|