socks.py 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
  1. """
  2. This module contains provisional support for SOCKS proxies from within
  3. urllib3. This module supports SOCKS4, SOCKS4A (an extension of SOCKS4), and
  4. SOCKS5. To enable its functionality, either install PySocks or install this
  5. module with the ``socks`` extra.
  6. The SOCKS implementation supports the full range of urllib3 features. It also
  7. supports the following SOCKS features:
  8. - SOCKS4A (``proxy_url='socks4a://...``)
  9. - SOCKS4 (``proxy_url='socks4://...``)
  10. - SOCKS5 with remote DNS (``proxy_url='socks5h://...``)
  11. - SOCKS5 with local DNS (``proxy_url='socks5://...``)
  12. - Usernames and passwords for the SOCKS proxy
  13. .. note::
  14. It is recommended to use ``socks5h://`` or ``socks4a://`` schemes in
  15. your ``proxy_url`` to ensure that DNS resolution is done from the remote
  16. server instead of client-side when connecting to a domain name.
  17. SOCKS4 supports IPv4 and domain names with the SOCKS4A extension. SOCKS5
  18. supports IPv4, IPv6, and domain names.
  19. When connecting to a SOCKS4 proxy the ``username`` portion of the ``proxy_url``
  20. will be sent as the ``userid`` section of the SOCKS request:
  21. .. code-block:: python
  22. proxy_url="socks4a://<userid>@proxy-host"
  23. When connecting to a SOCKS5 proxy the ``username`` and ``password`` portion
  24. of the ``proxy_url`` will be sent as the username/password to authenticate
  25. with the proxy:
  26. .. code-block:: python
  27. proxy_url="socks5h://<username>:<password>@proxy-host"
  28. """
  29. from __future__ import annotations
  30. try:
  31. import socks # type: ignore[import]
  32. except ImportError:
  33. import warnings
  34. from ..exceptions import DependencyWarning
  35. warnings.warn(
  36. (
  37. "SOCKS support in urllib3 requires the installation of optional "
  38. "dependencies: specifically, PySocks. For more information, see "
  39. "https://urllib3.readthedocs.io/en/latest/contrib.html#socks-proxies"
  40. ),
  41. DependencyWarning,
  42. )
  43. raise
  44. import typing
  45. from socket import timeout as SocketTimeout
  46. from ..connection import HTTPConnection, HTTPSConnection
  47. from ..connectionpool import HTTPConnectionPool, HTTPSConnectionPool
  48. from ..exceptions import ConnectTimeoutError, NewConnectionError
  49. from ..poolmanager import PoolManager
  50. from ..util.url import parse_url
  51. try:
  52. import ssl
  53. except ImportError:
  54. ssl = None # type: ignore[assignment]
  55. from typing import TypedDict
  56. class _TYPE_SOCKS_OPTIONS(TypedDict):
  57. socks_version: int
  58. proxy_host: str | None
  59. proxy_port: str | None
  60. username: str | None
  61. password: str | None
  62. rdns: bool
  63. class SOCKSConnection(HTTPConnection):
  64. """
  65. A plain-text HTTP connection that connects via a SOCKS proxy.
  66. """
  67. def __init__(
  68. self,
  69. _socks_options: _TYPE_SOCKS_OPTIONS,
  70. *args: typing.Any,
  71. **kwargs: typing.Any,
  72. ) -> None:
  73. self._socks_options = _socks_options
  74. super().__init__(*args, **kwargs)
  75. def _new_conn(self) -> socks.socksocket:
  76. """
  77. Establish a new connection via the SOCKS proxy.
  78. """
  79. extra_kw: dict[str, typing.Any] = {}
  80. if self.source_address:
  81. extra_kw["source_address"] = self.source_address
  82. if self.socket_options:
  83. extra_kw["socket_options"] = self.socket_options
  84. try:
  85. conn = socks.create_connection(
  86. (self.host, self.port),
  87. proxy_type=self._socks_options["socks_version"],
  88. proxy_addr=self._socks_options["proxy_host"],
  89. proxy_port=self._socks_options["proxy_port"],
  90. proxy_username=self._socks_options["username"],
  91. proxy_password=self._socks_options["password"],
  92. proxy_rdns=self._socks_options["rdns"],
  93. timeout=self.timeout,
  94. **extra_kw,
  95. )
  96. except SocketTimeout as e:
  97. raise ConnectTimeoutError(
  98. self,
  99. f"Connection to {self.host} timed out. (connect timeout={self.timeout})",
  100. ) from e
  101. except socks.ProxyError as e:
  102. # This is fragile as hell, but it seems to be the only way to raise
  103. # useful errors here.
  104. if e.socket_err:
  105. error = e.socket_err
  106. if isinstance(error, SocketTimeout):
  107. raise ConnectTimeoutError(
  108. self,
  109. f"Connection to {self.host} timed out. (connect timeout={self.timeout})",
  110. ) from e
  111. else:
  112. # Adding `from e` messes with coverage somehow, so it's omitted.
  113. # See #2386.
  114. raise NewConnectionError(
  115. self, f"Failed to establish a new connection: {error}"
  116. )
  117. else:
  118. raise NewConnectionError(
  119. self, f"Failed to establish a new connection: {e}"
  120. ) from e
  121. except OSError as e: # Defensive: PySocks should catch all these.
  122. raise NewConnectionError(
  123. self, f"Failed to establish a new connection: {e}"
  124. ) from e
  125. return conn
  126. # We don't need to duplicate the Verified/Unverified distinction from
  127. # urllib3/connection.py here because the HTTPSConnection will already have been
  128. # correctly set to either the Verified or Unverified form by that module. This
  129. # means the SOCKSHTTPSConnection will automatically be the correct type.
  130. class SOCKSHTTPSConnection(SOCKSConnection, HTTPSConnection):
  131. pass
  132. class SOCKSHTTPConnectionPool(HTTPConnectionPool):
  133. ConnectionCls = SOCKSConnection
  134. class SOCKSHTTPSConnectionPool(HTTPSConnectionPool):
  135. ConnectionCls = SOCKSHTTPSConnection
  136. class SOCKSProxyManager(PoolManager):
  137. """
  138. A version of the urllib3 ProxyManager that routes connections via the
  139. defined SOCKS proxy.
  140. """
  141. pool_classes_by_scheme = {
  142. "http": SOCKSHTTPConnectionPool,
  143. "https": SOCKSHTTPSConnectionPool,
  144. }
  145. def __init__(
  146. self,
  147. proxy_url: str,
  148. username: str | None = None,
  149. password: str | None = None,
  150. num_pools: int = 10,
  151. headers: typing.Mapping[str, str] | None = None,
  152. **connection_pool_kw: typing.Any,
  153. ):
  154. parsed = parse_url(proxy_url)
  155. if username is None and password is None and parsed.auth is not None:
  156. split = parsed.auth.split(":")
  157. if len(split) == 2:
  158. username, password = split
  159. if parsed.scheme == "socks5":
  160. socks_version = socks.PROXY_TYPE_SOCKS5
  161. rdns = False
  162. elif parsed.scheme == "socks5h":
  163. socks_version = socks.PROXY_TYPE_SOCKS5
  164. rdns = True
  165. elif parsed.scheme == "socks4":
  166. socks_version = socks.PROXY_TYPE_SOCKS4
  167. rdns = False
  168. elif parsed.scheme == "socks4a":
  169. socks_version = socks.PROXY_TYPE_SOCKS4
  170. rdns = True
  171. else:
  172. raise ValueError(f"Unable to determine SOCKS version from {proxy_url}")
  173. self.proxy_url = proxy_url
  174. socks_options = {
  175. "socks_version": socks_version,
  176. "proxy_host": parsed.host,
  177. "proxy_port": parsed.port,
  178. "username": username,
  179. "password": password,
  180. "rdns": rdns,
  181. }
  182. connection_pool_kw["_socks_options"] = socks_options
  183. super().__init__(num_pools, headers, **connection_pool_kw)
  184. self.pool_classes_by_scheme = SOCKSProxyManager.pool_classes_by_scheme