__init__.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681
  1. import collections.abc
  2. import re
  3. from typing import (
  4. Any,
  5. Callable,
  6. Dict,
  7. List,
  8. Mapping,
  9. MutableMapping,
  10. Optional,
  11. Sequence,
  12. Type,
  13. Union,
  14. IO,
  15. )
  16. import warnings
  17. from io import BytesIO
  18. from datetime import datetime
  19. from base64 import b64encode, b64decode
  20. from numbers import Integral
  21. from types import SimpleNamespace
  22. from functools import singledispatch
  23. from fontTools.misc import etree
  24. from fontTools.misc.textTools import tostr
  25. # By default, we
  26. # - deserialize <data> elements as bytes and
  27. # - serialize bytes as <data> elements.
  28. # Before, on Python 2, we
  29. # - deserialized <data> elements as plistlib.Data objects, in order to
  30. # distinguish them from the built-in str type (which is bytes on python2)
  31. # - serialized bytes as <string> elements (they must have only contained
  32. # ASCII characters in this case)
  33. # You can pass use_builtin_types=[True|False] to the load/dump etc. functions
  34. # to enforce a specific treatment.
  35. # NOTE that unicode type always maps to <string> element, and plistlib.Data
  36. # always maps to <data> element, regardless of use_builtin_types.
  37. USE_BUILTIN_TYPES = True
  38. XML_DECLARATION = b"""<?xml version='1.0' encoding='UTF-8'?>"""
  39. PLIST_DOCTYPE = (
  40. b'<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" '
  41. b'"http://www.apple.com/DTDs/PropertyList-1.0.dtd">'
  42. )
  43. # Date should conform to a subset of ISO 8601:
  44. # YYYY '-' MM '-' DD 'T' HH ':' MM ':' SS 'Z'
  45. _date_parser = re.compile(
  46. r"(?P<year>\d\d\d\d)"
  47. r"(?:-(?P<month>\d\d)"
  48. r"(?:-(?P<day>\d\d)"
  49. r"(?:T(?P<hour>\d\d)"
  50. r"(?::(?P<minute>\d\d)"
  51. r"(?::(?P<second>\d\d))"
  52. r"?)?)?)?)?Z",
  53. re.ASCII,
  54. )
  55. def _date_from_string(s: str) -> datetime:
  56. order = ("year", "month", "day", "hour", "minute", "second")
  57. m = _date_parser.match(s)
  58. if m is None:
  59. raise ValueError(f"Expected ISO 8601 date string, but got '{s:r}'.")
  60. gd = m.groupdict()
  61. lst = []
  62. for key in order:
  63. val = gd[key]
  64. if val is None:
  65. break
  66. lst.append(int(val))
  67. # NOTE: mypy doesn't know that lst is 6 elements long.
  68. return datetime(*lst) # type:ignore
  69. def _date_to_string(d: datetime) -> str:
  70. return "%04d-%02d-%02dT%02d:%02d:%02dZ" % (
  71. d.year,
  72. d.month,
  73. d.day,
  74. d.hour,
  75. d.minute,
  76. d.second,
  77. )
  78. class Data:
  79. """Represents binary data when ``use_builtin_types=False.``
  80. This class wraps binary data loaded from a plist file when the
  81. ``use_builtin_types`` argument to the loading function (:py:func:`fromtree`,
  82. :py:func:`load`, :py:func:`loads`) is false.
  83. The actual binary data is retrieved using the ``data`` attribute.
  84. """
  85. def __init__(self, data: bytes) -> None:
  86. if not isinstance(data, bytes):
  87. raise TypeError("Expected bytes, found %s" % type(data).__name__)
  88. self.data = data
  89. @classmethod
  90. def fromBase64(cls, data: Union[bytes, str]) -> "Data":
  91. return cls(b64decode(data))
  92. def asBase64(self, maxlinelength: int = 76, indent_level: int = 1) -> bytes:
  93. return _encode_base64(
  94. self.data, maxlinelength=maxlinelength, indent_level=indent_level
  95. )
  96. def __eq__(self, other: Any) -> bool:
  97. if isinstance(other, self.__class__):
  98. return self.data == other.data
  99. elif isinstance(other, bytes):
  100. return self.data == other
  101. else:
  102. return NotImplemented
  103. def __repr__(self) -> str:
  104. return "%s(%s)" % (self.__class__.__name__, repr(self.data))
  105. def _encode_base64(
  106. data: bytes, maxlinelength: Optional[int] = 76, indent_level: int = 1
  107. ) -> bytes:
  108. data = b64encode(data)
  109. if data and maxlinelength:
  110. # split into multiple lines right-justified to 'maxlinelength' chars
  111. indent = b"\n" + b" " * indent_level
  112. max_length = max(16, maxlinelength - len(indent))
  113. chunks = []
  114. for i in range(0, len(data), max_length):
  115. chunks.append(indent)
  116. chunks.append(data[i : i + max_length])
  117. chunks.append(indent)
  118. data = b"".join(chunks)
  119. return data
  120. # Mypy does not support recursive type aliases as of 0.782, Pylance does.
  121. # https://github.com/python/mypy/issues/731
  122. # https://devblogs.microsoft.com/python/pylance-introduces-five-new-features-that-enable-type-magic-for-python-developers/#1-support-for-recursive-type-aliases
  123. PlistEncodable = Union[
  124. bool,
  125. bytes,
  126. Data,
  127. datetime,
  128. float,
  129. Integral,
  130. Mapping[str, Any],
  131. Sequence[Any],
  132. str,
  133. ]
  134. class PlistTarget:
  135. """Event handler using the ElementTree Target API that can be
  136. passed to a XMLParser to produce property list objects from XML.
  137. It is based on the CPython plistlib module's _PlistParser class,
  138. but does not use the expat parser.
  139. >>> from fontTools.misc import etree
  140. >>> parser = etree.XMLParser(target=PlistTarget())
  141. >>> result = etree.XML(
  142. ... "<dict>"
  143. ... " <key>something</key>"
  144. ... " <string>blah</string>"
  145. ... "</dict>",
  146. ... parser=parser)
  147. >>> result == {"something": "blah"}
  148. True
  149. Links:
  150. https://github.com/python/cpython/blob/main/Lib/plistlib.py
  151. http://lxml.de/parsing.html#the-target-parser-interface
  152. """
  153. def __init__(
  154. self,
  155. use_builtin_types: Optional[bool] = None,
  156. dict_type: Type[MutableMapping[str, Any]] = dict,
  157. ) -> None:
  158. self.stack: List[PlistEncodable] = []
  159. self.current_key: Optional[str] = None
  160. self.root: Optional[PlistEncodable] = None
  161. if use_builtin_types is None:
  162. self._use_builtin_types = USE_BUILTIN_TYPES
  163. else:
  164. if use_builtin_types is False:
  165. warnings.warn(
  166. "Setting use_builtin_types to False is deprecated and will be "
  167. "removed soon.",
  168. DeprecationWarning,
  169. )
  170. self._use_builtin_types = use_builtin_types
  171. self._dict_type = dict_type
  172. def start(self, tag: str, attrib: Mapping[str, str]) -> None:
  173. self._data: List[str] = []
  174. handler = _TARGET_START_HANDLERS.get(tag)
  175. if handler is not None:
  176. handler(self)
  177. def end(self, tag: str) -> None:
  178. handler = _TARGET_END_HANDLERS.get(tag)
  179. if handler is not None:
  180. handler(self)
  181. def data(self, data: str) -> None:
  182. self._data.append(data)
  183. def close(self) -> PlistEncodable:
  184. if self.root is None:
  185. raise ValueError("No root set.")
  186. return self.root
  187. # helpers
  188. def add_object(self, value: PlistEncodable) -> None:
  189. if self.current_key is not None:
  190. stack_top = self.stack[-1]
  191. if not isinstance(stack_top, collections.abc.MutableMapping):
  192. raise ValueError("unexpected element: %r" % stack_top)
  193. stack_top[self.current_key] = value
  194. self.current_key = None
  195. elif not self.stack:
  196. # this is the root object
  197. self.root = value
  198. else:
  199. stack_top = self.stack[-1]
  200. if not isinstance(stack_top, list):
  201. raise ValueError("unexpected element: %r" % stack_top)
  202. stack_top.append(value)
  203. def get_data(self) -> str:
  204. data = "".join(self._data)
  205. self._data = []
  206. return data
  207. # event handlers
  208. def start_dict(self: PlistTarget) -> None:
  209. d = self._dict_type()
  210. self.add_object(d)
  211. self.stack.append(d)
  212. def end_dict(self: PlistTarget) -> None:
  213. if self.current_key:
  214. raise ValueError("missing value for key '%s'" % self.current_key)
  215. self.stack.pop()
  216. def end_key(self: PlistTarget) -> None:
  217. if self.current_key or not isinstance(self.stack[-1], collections.abc.Mapping):
  218. raise ValueError("unexpected key")
  219. self.current_key = self.get_data()
  220. def start_array(self: PlistTarget) -> None:
  221. a: List[PlistEncodable] = []
  222. self.add_object(a)
  223. self.stack.append(a)
  224. def end_array(self: PlistTarget) -> None:
  225. self.stack.pop()
  226. def end_true(self: PlistTarget) -> None:
  227. self.add_object(True)
  228. def end_false(self: PlistTarget) -> None:
  229. self.add_object(False)
  230. def end_integer(self: PlistTarget) -> None:
  231. self.add_object(int(self.get_data()))
  232. def end_real(self: PlistTarget) -> None:
  233. self.add_object(float(self.get_data()))
  234. def end_string(self: PlistTarget) -> None:
  235. self.add_object(self.get_data())
  236. def end_data(self: PlistTarget) -> None:
  237. if self._use_builtin_types:
  238. self.add_object(b64decode(self.get_data()))
  239. else:
  240. self.add_object(Data.fromBase64(self.get_data()))
  241. def end_date(self: PlistTarget) -> None:
  242. self.add_object(_date_from_string(self.get_data()))
  243. _TARGET_START_HANDLERS: Dict[str, Callable[[PlistTarget], None]] = {
  244. "dict": start_dict,
  245. "array": start_array,
  246. }
  247. _TARGET_END_HANDLERS: Dict[str, Callable[[PlistTarget], None]] = {
  248. "dict": end_dict,
  249. "array": end_array,
  250. "key": end_key,
  251. "true": end_true,
  252. "false": end_false,
  253. "integer": end_integer,
  254. "real": end_real,
  255. "string": end_string,
  256. "data": end_data,
  257. "date": end_date,
  258. }
  259. # functions to build element tree from plist data
  260. def _string_element(value: str, ctx: SimpleNamespace) -> etree.Element:
  261. el = etree.Element("string")
  262. el.text = value
  263. return el
  264. def _bool_element(value: bool, ctx: SimpleNamespace) -> etree.Element:
  265. if value:
  266. return etree.Element("true")
  267. return etree.Element("false")
  268. def _integer_element(value: int, ctx: SimpleNamespace) -> etree.Element:
  269. if -1 << 63 <= value < 1 << 64:
  270. el = etree.Element("integer")
  271. el.text = "%d" % value
  272. return el
  273. raise OverflowError(value)
  274. def _real_element(value: float, ctx: SimpleNamespace) -> etree.Element:
  275. el = etree.Element("real")
  276. el.text = repr(value)
  277. return el
  278. def _dict_element(
  279. d: Mapping[str, PlistEncodable], ctx: SimpleNamespace
  280. ) -> etree.Element:
  281. el = etree.Element("dict")
  282. items = d.items()
  283. if ctx.sort_keys:
  284. items = sorted(items) # type: ignore
  285. ctx.indent_level += 1
  286. for key, value in items:
  287. if not isinstance(key, str):
  288. if ctx.skipkeys:
  289. continue
  290. raise TypeError("keys must be strings")
  291. k = etree.SubElement(el, "key")
  292. k.text = tostr(key, "utf-8")
  293. el.append(_make_element(value, ctx))
  294. ctx.indent_level -= 1
  295. return el
  296. def _array_element(
  297. array: Sequence[PlistEncodable], ctx: SimpleNamespace
  298. ) -> etree.Element:
  299. el = etree.Element("array")
  300. if len(array) == 0:
  301. return el
  302. ctx.indent_level += 1
  303. for value in array:
  304. el.append(_make_element(value, ctx))
  305. ctx.indent_level -= 1
  306. return el
  307. def _date_element(date: datetime, ctx: SimpleNamespace) -> etree.Element:
  308. el = etree.Element("date")
  309. el.text = _date_to_string(date)
  310. return el
  311. def _data_element(data: bytes, ctx: SimpleNamespace) -> etree.Element:
  312. el = etree.Element("data")
  313. # NOTE: mypy is confused about whether el.text should be str or bytes.
  314. el.text = _encode_base64( # type: ignore
  315. data,
  316. maxlinelength=(76 if ctx.pretty_print else None),
  317. indent_level=ctx.indent_level,
  318. )
  319. return el
  320. def _string_or_data_element(raw_bytes: bytes, ctx: SimpleNamespace) -> etree.Element:
  321. if ctx.use_builtin_types:
  322. return _data_element(raw_bytes, ctx)
  323. else:
  324. try:
  325. string = raw_bytes.decode(encoding="ascii", errors="strict")
  326. except UnicodeDecodeError:
  327. raise ValueError(
  328. "invalid non-ASCII bytes; use unicode string instead: %r" % raw_bytes
  329. )
  330. return _string_element(string, ctx)
  331. # The following is probably not entirely correct. The signature should take `Any`
  332. # and return `NoReturn`. At the time of this writing, neither mypy nor Pyright
  333. # can deal with singledispatch properly and will apply the signature of the base
  334. # function to all others. Being slightly dishonest makes it type-check and return
  335. # usable typing information for the optimistic case.
  336. @singledispatch
  337. def _make_element(value: PlistEncodable, ctx: SimpleNamespace) -> etree.Element:
  338. raise TypeError("unsupported type: %s" % type(value))
  339. _make_element.register(str)(_string_element)
  340. _make_element.register(bool)(_bool_element)
  341. _make_element.register(Integral)(_integer_element)
  342. _make_element.register(float)(_real_element)
  343. _make_element.register(collections.abc.Mapping)(_dict_element)
  344. _make_element.register(list)(_array_element)
  345. _make_element.register(tuple)(_array_element)
  346. _make_element.register(datetime)(_date_element)
  347. _make_element.register(bytes)(_string_or_data_element)
  348. _make_element.register(bytearray)(_data_element)
  349. _make_element.register(Data)(lambda v, ctx: _data_element(v.data, ctx))
  350. # Public functions to create element tree from plist-compatible python
  351. # data structures and viceversa, for use when (de)serializing GLIF xml.
  352. def totree(
  353. value: PlistEncodable,
  354. sort_keys: bool = True,
  355. skipkeys: bool = False,
  356. use_builtin_types: Optional[bool] = None,
  357. pretty_print: bool = True,
  358. indent_level: int = 1,
  359. ) -> etree.Element:
  360. """Convert a value derived from a plist into an XML tree.
  361. Args:
  362. value: Any kind of value to be serialized to XML.
  363. sort_keys: Whether keys of dictionaries should be sorted.
  364. skipkeys (bool): Whether to silently skip non-string dictionary
  365. keys.
  366. use_builtin_types (bool): If true, byte strings will be
  367. encoded in Base-64 and wrapped in a ``data`` tag; if
  368. false, they will be either stored as ASCII strings or an
  369. exception raised if they cannot be decoded as such. Defaults
  370. to ``True`` if not present. Deprecated.
  371. pretty_print (bool): Whether to indent the output.
  372. indent_level (int): Level of indentation when serializing.
  373. Returns: an ``etree`` ``Element`` object.
  374. Raises:
  375. ``TypeError``
  376. if non-string dictionary keys are serialized
  377. and ``skipkeys`` is false.
  378. ``ValueError``
  379. if non-ASCII binary data is present
  380. and `use_builtin_types` is false.
  381. """
  382. if use_builtin_types is None:
  383. use_builtin_types = USE_BUILTIN_TYPES
  384. else:
  385. use_builtin_types = use_builtin_types
  386. context = SimpleNamespace(
  387. sort_keys=sort_keys,
  388. skipkeys=skipkeys,
  389. use_builtin_types=use_builtin_types,
  390. pretty_print=pretty_print,
  391. indent_level=indent_level,
  392. )
  393. return _make_element(value, context)
  394. def fromtree(
  395. tree: etree.Element,
  396. use_builtin_types: Optional[bool] = None,
  397. dict_type: Type[MutableMapping[str, Any]] = dict,
  398. ) -> Any:
  399. """Convert an XML tree to a plist structure.
  400. Args:
  401. tree: An ``etree`` ``Element``.
  402. use_builtin_types: If True, binary data is deserialized to
  403. bytes strings. If False, it is wrapped in :py:class:`Data`
  404. objects. Defaults to True if not provided. Deprecated.
  405. dict_type: What type to use for dictionaries.
  406. Returns: An object (usually a dictionary).
  407. """
  408. target = PlistTarget(use_builtin_types=use_builtin_types, dict_type=dict_type)
  409. for action, element in etree.iterwalk(tree, events=("start", "end")):
  410. if action == "start":
  411. target.start(element.tag, element.attrib)
  412. elif action == "end":
  413. # if there are no children, parse the leaf's data
  414. if not len(element):
  415. # always pass str, not None
  416. target.data(element.text or "")
  417. target.end(element.tag)
  418. return target.close()
  419. # python3 plistlib API
  420. def load(
  421. fp: IO[bytes],
  422. use_builtin_types: Optional[bool] = None,
  423. dict_type: Type[MutableMapping[str, Any]] = dict,
  424. ) -> Any:
  425. """Load a plist file into an object.
  426. Args:
  427. fp: An opened file.
  428. use_builtin_types: If True, binary data is deserialized to
  429. bytes strings. If False, it is wrapped in :py:class:`Data`
  430. objects. Defaults to True if not provided. Deprecated.
  431. dict_type: What type to use for dictionaries.
  432. Returns:
  433. An object (usually a dictionary) representing the top level of
  434. the plist file.
  435. """
  436. if not hasattr(fp, "read"):
  437. raise AttributeError("'%s' object has no attribute 'read'" % type(fp).__name__)
  438. target = PlistTarget(use_builtin_types=use_builtin_types, dict_type=dict_type)
  439. parser = etree.XMLParser(target=target)
  440. result = etree.parse(fp, parser=parser)
  441. # lxml returns the target object directly, while ElementTree wraps
  442. # it as the root of an ElementTree object
  443. try:
  444. return result.getroot()
  445. except AttributeError:
  446. return result
  447. def loads(
  448. value: bytes,
  449. use_builtin_types: Optional[bool] = None,
  450. dict_type: Type[MutableMapping[str, Any]] = dict,
  451. ) -> Any:
  452. """Load a plist file from a string into an object.
  453. Args:
  454. value: A bytes string containing a plist.
  455. use_builtin_types: If True, binary data is deserialized to
  456. bytes strings. If False, it is wrapped in :py:class:`Data`
  457. objects. Defaults to True if not provided. Deprecated.
  458. dict_type: What type to use for dictionaries.
  459. Returns:
  460. An object (usually a dictionary) representing the top level of
  461. the plist file.
  462. """
  463. fp = BytesIO(value)
  464. return load(fp, use_builtin_types=use_builtin_types, dict_type=dict_type)
  465. def dump(
  466. value: PlistEncodable,
  467. fp: IO[bytes],
  468. sort_keys: bool = True,
  469. skipkeys: bool = False,
  470. use_builtin_types: Optional[bool] = None,
  471. pretty_print: bool = True,
  472. ) -> None:
  473. """Write a Python object to a plist file.
  474. Args:
  475. value: An object to write.
  476. fp: A file opened for writing.
  477. sort_keys (bool): Whether keys of dictionaries should be sorted.
  478. skipkeys (bool): Whether to silently skip non-string dictionary
  479. keys.
  480. use_builtin_types (bool): If true, byte strings will be
  481. encoded in Base-64 and wrapped in a ``data`` tag; if
  482. false, they will be either stored as ASCII strings or an
  483. exception raised if they cannot be represented. Defaults
  484. pretty_print (bool): Whether to indent the output.
  485. indent_level (int): Level of indentation when serializing.
  486. Raises:
  487. ``TypeError``
  488. if non-string dictionary keys are serialized
  489. and ``skipkeys`` is false.
  490. ``ValueError``
  491. if non-representable binary data is present
  492. and `use_builtin_types` is false.
  493. """
  494. if not hasattr(fp, "write"):
  495. raise AttributeError("'%s' object has no attribute 'write'" % type(fp).__name__)
  496. root = etree.Element("plist", version="1.0")
  497. el = totree(
  498. value,
  499. sort_keys=sort_keys,
  500. skipkeys=skipkeys,
  501. use_builtin_types=use_builtin_types,
  502. pretty_print=pretty_print,
  503. )
  504. root.append(el)
  505. tree = etree.ElementTree(root)
  506. # we write the doctype ourselves instead of using the 'doctype' argument
  507. # of 'write' method, becuse lxml will force adding a '\n' even when
  508. # pretty_print is False.
  509. if pretty_print:
  510. header = b"\n".join((XML_DECLARATION, PLIST_DOCTYPE, b""))
  511. else:
  512. header = XML_DECLARATION + PLIST_DOCTYPE
  513. fp.write(header)
  514. tree.write( # type: ignore
  515. fp,
  516. encoding="utf-8",
  517. pretty_print=pretty_print,
  518. xml_declaration=False,
  519. )
  520. def dumps(
  521. value: PlistEncodable,
  522. sort_keys: bool = True,
  523. skipkeys: bool = False,
  524. use_builtin_types: Optional[bool] = None,
  525. pretty_print: bool = True,
  526. ) -> bytes:
  527. """Write a Python object to a string in plist format.
  528. Args:
  529. value: An object to write.
  530. sort_keys (bool): Whether keys of dictionaries should be sorted.
  531. skipkeys (bool): Whether to silently skip non-string dictionary
  532. keys.
  533. use_builtin_types (bool): If true, byte strings will be
  534. encoded in Base-64 and wrapped in a ``data`` tag; if
  535. false, they will be either stored as strings or an
  536. exception raised if they cannot be represented. Defaults
  537. pretty_print (bool): Whether to indent the output.
  538. indent_level (int): Level of indentation when serializing.
  539. Returns:
  540. string: A plist representation of the Python object.
  541. Raises:
  542. ``TypeError``
  543. if non-string dictionary keys are serialized
  544. and ``skipkeys`` is false.
  545. ``ValueError``
  546. if non-representable binary data is present
  547. and `use_builtin_types` is false.
  548. """
  549. fp = BytesIO()
  550. dump(
  551. value,
  552. fp,
  553. sort_keys=sort_keys,
  554. skipkeys=skipkeys,
  555. use_builtin_types=use_builtin_types,
  556. pretty_print=pretty_print,
  557. )
  558. return fp.getvalue()