_normalization.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. """
  2. Helpers for normalization as expected in wheel/sdist/module file names
  3. and core metadata
  4. """
  5. import re
  6. from pathlib import Path
  7. from typing import Union
  8. from .extern import packaging
  9. _Path = Union[str, Path]
  10. # https://packaging.python.org/en/latest/specifications/core-metadata/#name
  11. _VALID_NAME = re.compile(r"^([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])$", re.I)
  12. _UNSAFE_NAME_CHARS = re.compile(r"[^A-Z0-9.]+", re.I)
  13. _NON_ALPHANUMERIC = re.compile(r"[^A-Z0-9]+", re.I)
  14. _PEP440_FALLBACK = re.compile(r"^v?(?P<safe>(?:[0-9]+!)?[0-9]+(?:\.[0-9]+)*)", re.I)
  15. def safe_identifier(name: str) -> str:
  16. """Make a string safe to be used as Python identifier.
  17. >>> safe_identifier("12abc")
  18. '_12abc'
  19. >>> safe_identifier("__editable__.myns.pkg-78.9.3_local")
  20. '__editable___myns_pkg_78_9_3_local'
  21. """
  22. safe = re.sub(r'\W|^(?=\d)', '_', name)
  23. assert safe.isidentifier()
  24. return safe
  25. def safe_name(component: str) -> str:
  26. """Escape a component used as a project name according to Core Metadata.
  27. >>> safe_name("hello world")
  28. 'hello-world'
  29. >>> safe_name("hello?world")
  30. 'hello-world'
  31. """
  32. # See pkg_resources.safe_name
  33. return _UNSAFE_NAME_CHARS.sub("-", component)
  34. def safe_version(version: str) -> str:
  35. """Convert an arbitrary string into a valid version string.
  36. Can still raise an ``InvalidVersion`` exception.
  37. To avoid exceptions use ``best_effort_version``.
  38. >>> safe_version("1988 12 25")
  39. '1988.12.25'
  40. >>> safe_version("v0.2.1")
  41. '0.2.1'
  42. >>> safe_version("v0.2?beta")
  43. '0.2b0'
  44. >>> safe_version("v0.2 beta")
  45. '0.2b0'
  46. >>> safe_version("ubuntu lts")
  47. Traceback (most recent call last):
  48. ...
  49. setuptools.extern.packaging.version.InvalidVersion: Invalid version: 'ubuntu.lts'
  50. """
  51. v = version.replace(' ', '.')
  52. try:
  53. return str(packaging.version.Version(v))
  54. except packaging.version.InvalidVersion:
  55. attempt = _UNSAFE_NAME_CHARS.sub("-", v)
  56. return str(packaging.version.Version(attempt))
  57. def best_effort_version(version: str) -> str:
  58. """Convert an arbitrary string into a version-like string.
  59. Fallback when ``safe_version`` is not safe enough.
  60. >>> best_effort_version("v0.2 beta")
  61. '0.2b0'
  62. >>> best_effort_version("ubuntu lts")
  63. '0.dev0+sanitized.ubuntu.lts'
  64. >>> best_effort_version("0.23ubuntu1")
  65. '0.23.dev0+sanitized.ubuntu1'
  66. >>> best_effort_version("0.23-")
  67. '0.23.dev0+sanitized'
  68. >>> best_effort_version("0.-_")
  69. '0.dev0+sanitized'
  70. >>> best_effort_version("42.+?1")
  71. '42.dev0+sanitized.1'
  72. """
  73. # See pkg_resources._forgiving_version
  74. try:
  75. return safe_version(version)
  76. except packaging.version.InvalidVersion:
  77. v = version.replace(' ', '.')
  78. match = _PEP440_FALLBACK.search(v)
  79. if match:
  80. safe = match["safe"]
  81. rest = v[len(safe) :]
  82. else:
  83. safe = "0"
  84. rest = version
  85. safe_rest = _NON_ALPHANUMERIC.sub(".", rest).strip(".")
  86. local = f"sanitized.{safe_rest}".strip(".")
  87. return safe_version(f"{safe}.dev0+{local}")
  88. def safe_extra(extra: str) -> str:
  89. """Normalize extra name according to PEP 685
  90. >>> safe_extra("_FrIeNdLy-._.-bArD")
  91. 'friendly-bard'
  92. >>> safe_extra("FrIeNdLy-._.-bArD__._-")
  93. 'friendly-bard'
  94. """
  95. return _NON_ALPHANUMERIC.sub("-", extra).strip("-").lower()
  96. def filename_component(value: str) -> str:
  97. """Normalize each component of a filename (e.g. distribution/version part of wheel)
  98. Note: ``value`` needs to be already normalized.
  99. >>> filename_component("my-pkg")
  100. 'my_pkg'
  101. """
  102. return value.replace("-", "_").strip("_")
  103. def safer_name(value: str) -> str:
  104. """Like ``safe_name`` but can be used as filename component for wheel"""
  105. # See bdist_wheel.safer_name
  106. return filename_component(safe_name(value))
  107. def safer_best_effort_version(value: str) -> str:
  108. """Like ``best_effort_version`` but can be used as filename component for wheel"""
  109. # See bdist_wheel.safer_verion
  110. # TODO: Replace with only safe_version in the future (no need for best effort)
  111. return filename_component(best_effort_version(value))