formats.py 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275
  1. import logging
  2. import os
  3. import re
  4. import string
  5. import typing
  6. from itertools import chain as _chain
  7. if typing.TYPE_CHECKING:
  8. from typing_extensions import Literal
  9. _logger = logging.getLogger(__name__)
  10. # -------------------------------------------------------------------------------------
  11. # PEP 440
  12. VERSION_PATTERN = r"""
  13. v?
  14. (?:
  15. (?:(?P<epoch>[0-9]+)!)? # epoch
  16. (?P<release>[0-9]+(?:\.[0-9]+)*) # release segment
  17. (?P<pre> # pre-release
  18. [-_\.]?
  19. (?P<pre_l>(a|b|c|rc|alpha|beta|pre|preview))
  20. [-_\.]?
  21. (?P<pre_n>[0-9]+)?
  22. )?
  23. (?P<post> # post release
  24. (?:-(?P<post_n1>[0-9]+))
  25. |
  26. (?:
  27. [-_\.]?
  28. (?P<post_l>post|rev|r)
  29. [-_\.]?
  30. (?P<post_n2>[0-9]+)?
  31. )
  32. )?
  33. (?P<dev> # dev release
  34. [-_\.]?
  35. (?P<dev_l>dev)
  36. [-_\.]?
  37. (?P<dev_n>[0-9]+)?
  38. )?
  39. )
  40. (?:\+(?P<local>[a-z0-9]+(?:[-_\.][a-z0-9]+)*))? # local version
  41. """
  42. VERSION_REGEX = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.X | re.I)
  43. def pep440(version: str) -> bool:
  44. return VERSION_REGEX.match(version) is not None
  45. # -------------------------------------------------------------------------------------
  46. # PEP 508
  47. PEP508_IDENTIFIER_PATTERN = r"([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])"
  48. PEP508_IDENTIFIER_REGEX = re.compile(f"^{PEP508_IDENTIFIER_PATTERN}$", re.I)
  49. def pep508_identifier(name: str) -> bool:
  50. return PEP508_IDENTIFIER_REGEX.match(name) is not None
  51. try:
  52. try:
  53. from packaging import requirements as _req
  54. except ImportError: # pragma: no cover
  55. # let's try setuptools vendored version
  56. from setuptools._vendor.packaging import requirements as _req # type: ignore
  57. def pep508(value: str) -> bool:
  58. try:
  59. _req.Requirement(value)
  60. return True
  61. except _req.InvalidRequirement:
  62. return False
  63. except ImportError: # pragma: no cover
  64. _logger.warning(
  65. "Could not find an installation of `packaging`. Requirements, dependencies and "
  66. "versions might not be validated. "
  67. "To enforce validation, please install `packaging`."
  68. )
  69. def pep508(value: str) -> bool:
  70. return True
  71. def pep508_versionspec(value: str) -> bool:
  72. """Expression that can be used to specify/lock versions (including ranges)"""
  73. if any(c in value for c in (";", "]", "@")):
  74. # In PEP 508:
  75. # conditional markers, extras and URL specs are not included in the
  76. # versionspec
  77. return False
  78. # Let's pretend we have a dependency called `requirement` with the given
  79. # version spec, then we can reuse the pep508 function for validation:
  80. return pep508(f"requirement{value}")
  81. # -------------------------------------------------------------------------------------
  82. # PEP 517
  83. def pep517_backend_reference(value: str) -> bool:
  84. module, _, obj = value.partition(":")
  85. identifiers = (i.strip() for i in _chain(module.split("."), obj.split(".")))
  86. return all(python_identifier(i) for i in identifiers if i)
  87. # -------------------------------------------------------------------------------------
  88. # Classifiers - PEP 301
  89. def _download_classifiers() -> str:
  90. import ssl
  91. from email.message import Message
  92. from urllib.request import urlopen
  93. url = "https://pypi.org/pypi?:action=list_classifiers"
  94. context = ssl.create_default_context()
  95. with urlopen(url, context=context) as response:
  96. headers = Message()
  97. headers["content_type"] = response.getheader("content-type", "text/plain")
  98. return response.read().decode(headers.get_param("charset", "utf-8"))
  99. class _TroveClassifier:
  100. """The ``trove_classifiers`` package is the official way of validating classifiers,
  101. however this package might not be always available.
  102. As a workaround we can still download a list from PyPI.
  103. We also don't want to be over strict about it, so simply skipping silently is an
  104. option (classifiers will be validated anyway during the upload to PyPI).
  105. """
  106. downloaded: typing.Union[None, "Literal[False]", typing.Set[str]]
  107. def __init__(self):
  108. self.downloaded = None
  109. self._skip_download = False
  110. # None => not cached yet
  111. # False => cache not available
  112. self.__name__ = "trove_classifier" # Emulate a public function
  113. def _disable_download(self):
  114. # This is a private API. Only setuptools has the consent of using it.
  115. self._skip_download = True
  116. def __call__(self, value: str) -> bool:
  117. if self.downloaded is False or self._skip_download is True:
  118. return True
  119. if os.getenv("NO_NETWORK") or os.getenv("VALIDATE_PYPROJECT_NO_NETWORK"):
  120. self.downloaded = False
  121. msg = (
  122. "Install ``trove-classifiers`` to ensure proper validation. "
  123. "Skipping download of classifiers list from PyPI (NO_NETWORK)."
  124. )
  125. _logger.debug(msg)
  126. return True
  127. if self.downloaded is None:
  128. msg = (
  129. "Install ``trove-classifiers`` to ensure proper validation. "
  130. "Meanwhile a list of classifiers will be downloaded from PyPI."
  131. )
  132. _logger.debug(msg)
  133. try:
  134. self.downloaded = set(_download_classifiers().splitlines())
  135. except Exception:
  136. self.downloaded = False
  137. _logger.debug("Problem with download, skipping validation")
  138. return True
  139. return value in self.downloaded or value.lower().startswith("private ::")
  140. try:
  141. from trove_classifiers import classifiers as _trove_classifiers
  142. def trove_classifier(value: str) -> bool:
  143. return value in _trove_classifiers or value.lower().startswith("private ::")
  144. except ImportError: # pragma: no cover
  145. trove_classifier = _TroveClassifier()
  146. # -------------------------------------------------------------------------------------
  147. # Stub packages - PEP 561
  148. def pep561_stub_name(value: str) -> bool:
  149. top, *children = value.split(".")
  150. if not top.endswith("-stubs"):
  151. return False
  152. return python_module_name(".".join([top[: -len("-stubs")], *children]))
  153. # -------------------------------------------------------------------------------------
  154. # Non-PEP related
  155. def url(value: str) -> bool:
  156. from urllib.parse import urlparse
  157. try:
  158. parts = urlparse(value)
  159. if not parts.scheme:
  160. _logger.warning(
  161. "For maximum compatibility please make sure to include a "
  162. "`scheme` prefix in your URL (e.g. 'http://'). "
  163. f"Given value: {value}"
  164. )
  165. if not (value.startswith("/") or value.startswith("\\") or "@" in value):
  166. parts = urlparse(f"http://{value}")
  167. return bool(parts.scheme and parts.netloc)
  168. except Exception:
  169. return False
  170. # https://packaging.python.org/specifications/entry-points/
  171. ENTRYPOINT_PATTERN = r"[^\[\s=]([^=]*[^\s=])?"
  172. ENTRYPOINT_REGEX = re.compile(f"^{ENTRYPOINT_PATTERN}$", re.I)
  173. RECOMMEDED_ENTRYPOINT_PATTERN = r"[\w.-]+"
  174. RECOMMEDED_ENTRYPOINT_REGEX = re.compile(f"^{RECOMMEDED_ENTRYPOINT_PATTERN}$", re.I)
  175. ENTRYPOINT_GROUP_PATTERN = r"\w+(\.\w+)*"
  176. ENTRYPOINT_GROUP_REGEX = re.compile(f"^{ENTRYPOINT_GROUP_PATTERN}$", re.I)
  177. def python_identifier(value: str) -> bool:
  178. return value.isidentifier()
  179. def python_qualified_identifier(value: str) -> bool:
  180. if value.startswith(".") or value.endswith("."):
  181. return False
  182. return all(python_identifier(m) for m in value.split("."))
  183. def python_module_name(value: str) -> bool:
  184. return python_qualified_identifier(value)
  185. def python_entrypoint_group(value: str) -> bool:
  186. return ENTRYPOINT_GROUP_REGEX.match(value) is not None
  187. def python_entrypoint_name(value: str) -> bool:
  188. if not ENTRYPOINT_REGEX.match(value):
  189. return False
  190. if not RECOMMEDED_ENTRYPOINT_REGEX.match(value):
  191. msg = f"Entry point `{value}` does not follow recommended pattern: "
  192. msg += RECOMMEDED_ENTRYPOINT_PATTERN
  193. _logger.warning(msg)
  194. return True
  195. def python_entrypoint_reference(value: str) -> bool:
  196. module, _, rest = value.partition(":")
  197. if "[" in rest:
  198. obj, _, extras_ = rest.partition("[")
  199. if extras_.strip()[-1] != "]":
  200. return False
  201. extras = (x.strip() for x in extras_.strip(string.whitespace + "[]").split(","))
  202. if not all(pep508_identifier(e) for e in extras):
  203. return False
  204. _logger.warning(f"`{value}` - using extras for entry points is not recommended")
  205. else:
  206. obj = rest
  207. module_parts = module.split(".")
  208. identifiers = _chain(module_parts, obj.split(".")) if rest else module_parts
  209. return all(python_identifier(i.strip()) for i in identifiers)