__init__.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428
  1. import io
  2. import posixpath
  3. import zipfile
  4. import itertools
  5. import contextlib
  6. import pathlib
  7. import re
  8. from .py310compat import text_encoding
  9. from .glob import translate
  10. __all__ = ['Path']
  11. def _parents(path):
  12. """
  13. Given a path with elements separated by
  14. posixpath.sep, generate all parents of that path.
  15. >>> list(_parents('b/d'))
  16. ['b']
  17. >>> list(_parents('/b/d/'))
  18. ['/b']
  19. >>> list(_parents('b/d/f/'))
  20. ['b/d', 'b']
  21. >>> list(_parents('b'))
  22. []
  23. >>> list(_parents(''))
  24. []
  25. """
  26. return itertools.islice(_ancestry(path), 1, None)
  27. def _ancestry(path):
  28. """
  29. Given a path with elements separated by
  30. posixpath.sep, generate all elements of that path
  31. >>> list(_ancestry('b/d'))
  32. ['b/d', 'b']
  33. >>> list(_ancestry('/b/d/'))
  34. ['/b/d', '/b']
  35. >>> list(_ancestry('b/d/f/'))
  36. ['b/d/f', 'b/d', 'b']
  37. >>> list(_ancestry('b'))
  38. ['b']
  39. >>> list(_ancestry(''))
  40. []
  41. """
  42. path = path.rstrip(posixpath.sep)
  43. while path and path != posixpath.sep:
  44. yield path
  45. path, tail = posixpath.split(path)
  46. _dedupe = dict.fromkeys
  47. """Deduplicate an iterable in original order"""
  48. def _difference(minuend, subtrahend):
  49. """
  50. Return items in minuend not in subtrahend, retaining order
  51. with O(1) lookup.
  52. """
  53. return itertools.filterfalse(set(subtrahend).__contains__, minuend)
  54. class InitializedState:
  55. """
  56. Mix-in to save the initialization state for pickling.
  57. """
  58. def __init__(self, *args, **kwargs):
  59. self.__args = args
  60. self.__kwargs = kwargs
  61. super().__init__(*args, **kwargs)
  62. def __getstate__(self):
  63. return self.__args, self.__kwargs
  64. def __setstate__(self, state):
  65. args, kwargs = state
  66. super().__init__(*args, **kwargs)
  67. class CompleteDirs(InitializedState, zipfile.ZipFile):
  68. """
  69. A ZipFile subclass that ensures that implied directories
  70. are always included in the namelist.
  71. >>> list(CompleteDirs._implied_dirs(['foo/bar.txt', 'foo/bar/baz.txt']))
  72. ['foo/', 'foo/bar/']
  73. >>> list(CompleteDirs._implied_dirs(['foo/bar.txt', 'foo/bar/baz.txt', 'foo/bar/']))
  74. ['foo/']
  75. """
  76. @staticmethod
  77. def _implied_dirs(names):
  78. parents = itertools.chain.from_iterable(map(_parents, names))
  79. as_dirs = (p + posixpath.sep for p in parents)
  80. return _dedupe(_difference(as_dirs, names))
  81. def namelist(self):
  82. names = super().namelist()
  83. return names + list(self._implied_dirs(names))
  84. def _name_set(self):
  85. return set(self.namelist())
  86. def resolve_dir(self, name):
  87. """
  88. If the name represents a directory, return that name
  89. as a directory (with the trailing slash).
  90. """
  91. names = self._name_set()
  92. dirname = name + '/'
  93. dir_match = name not in names and dirname in names
  94. return dirname if dir_match else name
  95. def getinfo(self, name):
  96. """
  97. Supplement getinfo for implied dirs.
  98. """
  99. try:
  100. return super().getinfo(name)
  101. except KeyError:
  102. if not name.endswith('/') or name not in self._name_set():
  103. raise
  104. return zipfile.ZipInfo(filename=name)
  105. @classmethod
  106. def make(cls, source):
  107. """
  108. Given a source (filename or zipfile), return an
  109. appropriate CompleteDirs subclass.
  110. """
  111. if isinstance(source, CompleteDirs):
  112. return source
  113. if not isinstance(source, zipfile.ZipFile):
  114. return cls(source)
  115. # Only allow for FastLookup when supplied zipfile is read-only
  116. if 'r' not in source.mode:
  117. cls = CompleteDirs
  118. source.__class__ = cls
  119. return source
  120. @classmethod
  121. def inject(cls, zf: zipfile.ZipFile) -> zipfile.ZipFile:
  122. """
  123. Given a writable zip file zf, inject directory entries for
  124. any directories implied by the presence of children.
  125. """
  126. for name in cls._implied_dirs(zf.namelist()):
  127. zf.writestr(name, b"")
  128. return zf
  129. class FastLookup(CompleteDirs):
  130. """
  131. ZipFile subclass to ensure implicit
  132. dirs exist and are resolved rapidly.
  133. """
  134. def namelist(self):
  135. with contextlib.suppress(AttributeError):
  136. return self.__names
  137. self.__names = super().namelist()
  138. return self.__names
  139. def _name_set(self):
  140. with contextlib.suppress(AttributeError):
  141. return self.__lookup
  142. self.__lookup = super()._name_set()
  143. return self.__lookup
  144. def _extract_text_encoding(encoding=None, *args, **kwargs):
  145. # stacklevel=3 so that the caller of the caller see any warning.
  146. return text_encoding(encoding, 3), args, kwargs
  147. class Path:
  148. """
  149. A pathlib-compatible interface for zip files.
  150. Consider a zip file with this structure::
  151. .
  152. ├── a.txt
  153. └── b
  154. ├── c.txt
  155. └── d
  156. └── e.txt
  157. >>> data = io.BytesIO()
  158. >>> zf = zipfile.ZipFile(data, 'w')
  159. >>> zf.writestr('a.txt', 'content of a')
  160. >>> zf.writestr('b/c.txt', 'content of c')
  161. >>> zf.writestr('b/d/e.txt', 'content of e')
  162. >>> zf.filename = 'mem/abcde.zip'
  163. Path accepts the zipfile object itself or a filename
  164. >>> path = Path(zf)
  165. From there, several path operations are available.
  166. Directory iteration (including the zip file itself):
  167. >>> a, b = path.iterdir()
  168. >>> a
  169. Path('mem/abcde.zip', 'a.txt')
  170. >>> b
  171. Path('mem/abcde.zip', 'b/')
  172. name property:
  173. >>> b.name
  174. 'b'
  175. join with divide operator:
  176. >>> c = b / 'c.txt'
  177. >>> c
  178. Path('mem/abcde.zip', 'b/c.txt')
  179. >>> c.name
  180. 'c.txt'
  181. Read text:
  182. >>> c.read_text(encoding='utf-8')
  183. 'content of c'
  184. existence:
  185. >>> c.exists()
  186. True
  187. >>> (b / 'missing.txt').exists()
  188. False
  189. Coercion to string:
  190. >>> import os
  191. >>> str(c).replace(os.sep, posixpath.sep)
  192. 'mem/abcde.zip/b/c.txt'
  193. At the root, ``name``, ``filename``, and ``parent``
  194. resolve to the zipfile.
  195. >>> str(path)
  196. 'mem/abcde.zip/'
  197. >>> path.name
  198. 'abcde.zip'
  199. >>> path.filename == pathlib.Path('mem/abcde.zip')
  200. True
  201. >>> str(path.parent)
  202. 'mem'
  203. If the zipfile has no filename, such attribtues are not
  204. valid and accessing them will raise an Exception.
  205. >>> zf.filename = None
  206. >>> path.name
  207. Traceback (most recent call last):
  208. ...
  209. TypeError: ...
  210. >>> path.filename
  211. Traceback (most recent call last):
  212. ...
  213. TypeError: ...
  214. >>> path.parent
  215. Traceback (most recent call last):
  216. ...
  217. TypeError: ...
  218. # workaround python/cpython#106763
  219. >>> pass
  220. """
  221. __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
  222. def __init__(self, root, at=""):
  223. """
  224. Construct a Path from a ZipFile or filename.
  225. Note: When the source is an existing ZipFile object,
  226. its type (__class__) will be mutated to a
  227. specialized type. If the caller wishes to retain the
  228. original type, the caller should either create a
  229. separate ZipFile object or pass a filename.
  230. """
  231. self.root = FastLookup.make(root)
  232. self.at = at
  233. def __eq__(self, other):
  234. """
  235. >>> Path(zipfile.ZipFile(io.BytesIO(), 'w')) == 'foo'
  236. False
  237. """
  238. if self.__class__ is not other.__class__:
  239. return NotImplemented
  240. return (self.root, self.at) == (other.root, other.at)
  241. def __hash__(self):
  242. return hash((self.root, self.at))
  243. def open(self, mode='r', *args, pwd=None, **kwargs):
  244. """
  245. Open this entry as text or binary following the semantics
  246. of ``pathlib.Path.open()`` by passing arguments through
  247. to io.TextIOWrapper().
  248. """
  249. if self.is_dir():
  250. raise IsADirectoryError(self)
  251. zip_mode = mode[0]
  252. if not self.exists() and zip_mode == 'r':
  253. raise FileNotFoundError(self)
  254. stream = self.root.open(self.at, zip_mode, pwd=pwd)
  255. if 'b' in mode:
  256. if args or kwargs:
  257. raise ValueError("encoding args invalid for binary operation")
  258. return stream
  259. # Text mode:
  260. encoding, args, kwargs = _extract_text_encoding(*args, **kwargs)
  261. return io.TextIOWrapper(stream, encoding, *args, **kwargs)
  262. def _base(self):
  263. return pathlib.PurePosixPath(self.at or self.root.filename)
  264. @property
  265. def name(self):
  266. return self._base().name
  267. @property
  268. def suffix(self):
  269. return self._base().suffix
  270. @property
  271. def suffixes(self):
  272. return self._base().suffixes
  273. @property
  274. def stem(self):
  275. return self._base().stem
  276. @property
  277. def filename(self):
  278. return pathlib.Path(self.root.filename).joinpath(self.at)
  279. def read_text(self, *args, **kwargs):
  280. encoding, args, kwargs = _extract_text_encoding(*args, **kwargs)
  281. with self.open('r', encoding, *args, **kwargs) as strm:
  282. return strm.read()
  283. def read_bytes(self):
  284. with self.open('rb') as strm:
  285. return strm.read()
  286. def _is_child(self, path):
  287. return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
  288. def _next(self, at):
  289. return self.__class__(self.root, at)
  290. def is_dir(self):
  291. return not self.at or self.at.endswith("/")
  292. def is_file(self):
  293. return self.exists() and not self.is_dir()
  294. def exists(self):
  295. return self.at in self.root._name_set()
  296. def iterdir(self):
  297. if not self.is_dir():
  298. raise ValueError("Can't listdir a file")
  299. subs = map(self._next, self.root.namelist())
  300. return filter(self._is_child, subs)
  301. def match(self, path_pattern):
  302. return pathlib.PurePosixPath(self.at).match(path_pattern)
  303. def is_symlink(self):
  304. """
  305. Return whether this path is a symlink. Always false (python/cpython#82102).
  306. """
  307. return False
  308. def glob(self, pattern):
  309. if not pattern:
  310. raise ValueError(f"Unacceptable pattern: {pattern!r}")
  311. prefix = re.escape(self.at)
  312. matches = re.compile(prefix + translate(pattern)).fullmatch
  313. return map(self._next, filter(matches, self.root.namelist()))
  314. def rglob(self, pattern):
  315. return self.glob(f'**/{pattern}')
  316. def relative_to(self, other, *extra):
  317. return posixpath.relpath(str(self), str(other.joinpath(*extra)))
  318. def __str__(self):
  319. return posixpath.join(self.root.filename, self.at)
  320. def __repr__(self):
  321. return self.__repr.format(self=self)
  322. def joinpath(self, *other):
  323. next = posixpath.join(self.at, *other)
  324. return self._next(self.root.resolve_dir(next))
  325. __truediv__ = joinpath
  326. @property
  327. def parent(self):
  328. if not self.at:
  329. return self.filename.parent
  330. parent_at = posixpath.dirname(self.at.rstrip('/'))
  331. if parent_at:
  332. parent_at += '/'
  333. return self._next(parent_at)