__init__.py 68 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799
  1. # -*- coding: utf-8 -*-
  2. """Small, fast HTTP client library for Python."""
  3. __author__ = "Joe Gregorio (joe@bitworking.org)"
  4. __copyright__ = "Copyright 2006, Joe Gregorio"
  5. __contributors__ = [
  6. "Thomas Broyer (t.broyer@ltgt.net)",
  7. "James Antill",
  8. "Xavier Verges Farrero",
  9. "Jonathan Feinberg",
  10. "Blair Zajac",
  11. "Sam Ruby",
  12. "Louis Nyffenegger",
  13. "Mark Pilgrim",
  14. "Alex Yu",
  15. "Lai Han",
  16. ]
  17. __license__ = "MIT"
  18. __version__ = "0.22.0"
  19. import base64
  20. import calendar
  21. import copy
  22. import email
  23. import email.feedparser
  24. from email import header
  25. import email.message
  26. import email.utils
  27. import errno
  28. from gettext import gettext as _
  29. import gzip
  30. from hashlib import md5 as _md5
  31. from hashlib import sha1 as _sha
  32. import hmac
  33. import http.client
  34. import io
  35. import os
  36. import random
  37. import re
  38. import socket
  39. import ssl
  40. import sys
  41. import time
  42. import urllib.parse
  43. import zlib
  44. try:
  45. import socks
  46. except ImportError:
  47. # TODO: remove this fallback and copypasted socksipy module upon py2/3 merge,
  48. # idea is to have soft-dependency on any compatible module called socks
  49. from . import socks
  50. from . import auth
  51. from .error import *
  52. from .iri2uri import iri2uri
  53. def has_timeout(timeout):
  54. if hasattr(socket, "_GLOBAL_DEFAULT_TIMEOUT"):
  55. return timeout is not None and timeout is not socket._GLOBAL_DEFAULT_TIMEOUT
  56. return timeout is not None
  57. __all__ = [
  58. "debuglevel",
  59. "FailedToDecompressContent",
  60. "Http",
  61. "HttpLib2Error",
  62. "ProxyInfo",
  63. "RedirectLimit",
  64. "RedirectMissingLocation",
  65. "Response",
  66. "RETRIES",
  67. "UnimplementedDigestAuthOptionError",
  68. "UnimplementedHmacDigestAuthOptionError",
  69. ]
  70. # The httplib debug level, set to a non-zero value to get debug output
  71. debuglevel = 0
  72. # A request will be tried 'RETRIES' times if it fails at the socket/connection level.
  73. RETRIES = 2
  74. # Open Items:
  75. # -----------
  76. # Are we removing the cached content too soon on PUT (only delete on 200 Maybe?)
  77. # Pluggable cache storage (supports storing the cache in
  78. # flat files by default. We need a plug-in architecture
  79. # that can support Berkeley DB and Squid)
  80. # == Known Issues ==
  81. # Does not handle a resource that uses conneg and Last-Modified but no ETag as a cache validator.
  82. # Does not handle Cache-Control: max-stale
  83. # Does not use Age: headers when calculating cache freshness.
  84. # The number of redirections to follow before giving up.
  85. # Note that only GET redirects are automatically followed.
  86. # Will also honor 301 requests by saving that info and never
  87. # requesting that URI again.
  88. DEFAULT_MAX_REDIRECTS = 5
  89. # Which headers are hop-by-hop headers by default
  90. HOP_BY_HOP = [
  91. "connection",
  92. "keep-alive",
  93. "proxy-authenticate",
  94. "proxy-authorization",
  95. "te",
  96. "trailers",
  97. "transfer-encoding",
  98. "upgrade",
  99. ]
  100. # https://tools.ietf.org/html/rfc7231#section-8.1.3
  101. SAFE_METHODS = ("GET", "HEAD", "OPTIONS", "TRACE")
  102. # To change, assign to `Http().redirect_codes`
  103. REDIRECT_CODES = frozenset((300, 301, 302, 303, 307, 308))
  104. from httplib2 import certs
  105. CA_CERTS = certs.where()
  106. # PROTOCOL_TLS is python 3.5.3+. PROTOCOL_SSLv23 is deprecated.
  107. # Both PROTOCOL_TLS and PROTOCOL_SSLv23 are equivalent and means:
  108. # > Selects the highest protocol version that both the client and server support.
  109. # > Despite the name, this option can select “TLS” protocols as well as “SSL”.
  110. # source: https://docs.python.org/3.5/library/ssl.html#ssl.PROTOCOL_SSLv23
  111. # PROTOCOL_TLS_CLIENT is python 3.10.0+. PROTOCOL_TLS is deprecated.
  112. # > Auto-negotiate the highest protocol version that both the client and server support, and configure the context client-side connections.
  113. # > The protocol enables CERT_REQUIRED and check_hostname by default.
  114. # source: https://docs.python.org/3.10/library/ssl.html#ssl.PROTOCOL_TLS
  115. DEFAULT_TLS_VERSION = getattr(ssl, "PROTOCOL_TLS_CLIENT", None) or getattr(ssl, "PROTOCOL_TLS", None) or getattr(ssl, "PROTOCOL_SSLv23")
  116. def _build_ssl_context(
  117. disable_ssl_certificate_validation,
  118. ca_certs,
  119. cert_file=None,
  120. key_file=None,
  121. maximum_version=None,
  122. minimum_version=None,
  123. key_password=None,
  124. ):
  125. if not hasattr(ssl, "SSLContext"):
  126. raise RuntimeError("httplib2 requires Python 3.2+ for ssl.SSLContext")
  127. context = ssl.SSLContext(DEFAULT_TLS_VERSION)
  128. # check_hostname and verify_mode should be set in opposite order during disable
  129. # https://bugs.python.org/issue31431
  130. if disable_ssl_certificate_validation and hasattr(context, "check_hostname"):
  131. context.check_hostname = not disable_ssl_certificate_validation
  132. context.verify_mode = ssl.CERT_NONE if disable_ssl_certificate_validation else ssl.CERT_REQUIRED
  133. # SSLContext.maximum_version and SSLContext.minimum_version are python 3.7+.
  134. # source: https://docs.python.org/3/library/ssl.html#ssl.SSLContext.maximum_version
  135. if maximum_version is not None:
  136. if hasattr(context, "maximum_version"):
  137. if isinstance(maximum_version, str):
  138. maximum_version = getattr(ssl.TLSVersion, maximum_version)
  139. context.maximum_version = maximum_version
  140. else:
  141. raise RuntimeError("setting tls_maximum_version requires Python 3.7 and OpenSSL 1.1 or newer")
  142. if minimum_version is not None:
  143. if hasattr(context, "minimum_version"):
  144. if isinstance(minimum_version, str):
  145. minimum_version = getattr(ssl.TLSVersion, minimum_version)
  146. context.minimum_version = minimum_version
  147. else:
  148. raise RuntimeError("setting tls_minimum_version requires Python 3.7 and OpenSSL 1.1 or newer")
  149. # check_hostname requires python 3.4+
  150. # we will perform the equivalent in HTTPSConnectionWithTimeout.connect() by calling ssl.match_hostname
  151. # if check_hostname is not supported.
  152. if hasattr(context, "check_hostname"):
  153. context.check_hostname = not disable_ssl_certificate_validation
  154. context.load_verify_locations(ca_certs)
  155. if cert_file:
  156. context.load_cert_chain(cert_file, key_file, key_password)
  157. return context
  158. def _get_end2end_headers(response):
  159. hopbyhop = list(HOP_BY_HOP)
  160. hopbyhop.extend([x.strip() for x in response.get("connection", "").split(",")])
  161. return [header for header in list(response.keys()) if header not in hopbyhop]
  162. _missing = object()
  163. def _errno_from_exception(e):
  164. # TODO python 3.11+ cheap try: return e.errno except AttributeError: pass
  165. errno = getattr(e, "errno", _missing)
  166. if errno is not _missing:
  167. return errno
  168. # socket.error and common wrap in .args
  169. args = getattr(e, "args", None)
  170. if args:
  171. return _errno_from_exception(args[0])
  172. # pysocks.ProxyError wraps in .socket_err
  173. # https://github.com/httplib2/httplib2/pull/202
  174. socket_err = getattr(e, "socket_err", None)
  175. if socket_err:
  176. return _errno_from_exception(socket_err)
  177. return None
  178. URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
  179. def parse_uri(uri):
  180. """Parses a URI using the regex given in Appendix B of RFC 3986.
  181. (scheme, authority, path, query, fragment) = parse_uri(uri)
  182. """
  183. groups = URI.match(uri).groups()
  184. return (groups[1], groups[3], groups[4], groups[6], groups[8])
  185. def urlnorm(uri):
  186. (scheme, authority, path, query, fragment) = parse_uri(uri)
  187. if not scheme or not authority:
  188. raise RelativeURIError("Only absolute URIs are allowed. uri = %s" % uri)
  189. authority = authority.lower()
  190. scheme = scheme.lower()
  191. if not path:
  192. path = "/"
  193. # Could do syntax based normalization of the URI before
  194. # computing the digest. See Section 6.2.2 of Std 66.
  195. request_uri = query and "?".join([path, query]) or path
  196. scheme = scheme.lower()
  197. defrag_uri = scheme + "://" + authority + request_uri
  198. return scheme, authority, request_uri, defrag_uri
  199. # Cache filename construction (original borrowed from Venus http://intertwingly.net/code/venus/)
  200. re_url_scheme = re.compile(r"^\w+://")
  201. re_unsafe = re.compile(r"[^\w\-_.()=!]+", re.ASCII)
  202. def safename(filename):
  203. """Return a filename suitable for the cache.
  204. Strips dangerous and common characters to create a filename we
  205. can use to store the cache in.
  206. """
  207. if isinstance(filename, bytes):
  208. filename_bytes = filename
  209. filename = filename.decode("utf-8")
  210. else:
  211. filename_bytes = filename.encode("utf-8")
  212. filemd5 = _md5(filename_bytes).hexdigest()
  213. filename = re_url_scheme.sub("", filename)
  214. filename = re_unsafe.sub("", filename)
  215. # limit length of filename (vital for Windows)
  216. # https://github.com/httplib2/httplib2/pull/74
  217. # C:\Users\ <username> \AppData\Local\Temp\ <safe_filename> , <md5>
  218. # 9 chars + max 104 chars + 20 chars + x + 1 + 32 = max 259 chars
  219. # Thus max safe filename x = 93 chars. Let it be 90 to make a round sum:
  220. filename = filename[:90]
  221. return ",".join((filename, filemd5))
  222. NORMALIZE_SPACE = re.compile(r"(?:\r\n)?[ \t]+")
  223. def _normalize_headers(headers):
  224. return dict(
  225. [
  226. (_convert_byte_str(key).lower(), NORMALIZE_SPACE.sub(_convert_byte_str(value), " ").strip(),)
  227. for (key, value) in headers.items()
  228. ]
  229. )
  230. def _convert_byte_str(s):
  231. if not isinstance(s, str):
  232. return str(s, "utf-8")
  233. return s
  234. def _parse_cache_control(headers):
  235. retval = {}
  236. if "cache-control" in headers:
  237. parts = headers["cache-control"].split(",")
  238. parts_with_args = [
  239. tuple([x.strip().lower() for x in part.split("=", 1)]) for part in parts if -1 != part.find("=")
  240. ]
  241. parts_wo_args = [(name.strip().lower(), 1) for name in parts if -1 == name.find("=")]
  242. retval = dict(parts_with_args + parts_wo_args)
  243. return retval
  244. # Whether to use a strict mode to parse WWW-Authenticate headers
  245. # Might lead to bad results in case of ill-formed header value,
  246. # so disabled by default, falling back to relaxed parsing.
  247. # Set to true to turn on, useful for testing servers.
  248. USE_WWW_AUTH_STRICT_PARSING = 0
  249. def _entry_disposition(response_headers, request_headers):
  250. """Determine freshness from the Date, Expires and Cache-Control headers.
  251. We don't handle the following:
  252. 1. Cache-Control: max-stale
  253. 2. Age: headers are not used in the calculations.
  254. Not that this algorithm is simpler than you might think
  255. because we are operating as a private (non-shared) cache.
  256. This lets us ignore 's-maxage'. We can also ignore
  257. 'proxy-invalidate' since we aren't a proxy.
  258. We will never return a stale document as
  259. fresh as a design decision, and thus the non-implementation
  260. of 'max-stale'. This also lets us safely ignore 'must-revalidate'
  261. since we operate as if every server has sent 'must-revalidate'.
  262. Since we are private we get to ignore both 'public' and
  263. 'private' parameters. We also ignore 'no-transform' since
  264. we don't do any transformations.
  265. The 'no-store' parameter is handled at a higher level.
  266. So the only Cache-Control parameters we look at are:
  267. no-cache
  268. only-if-cached
  269. max-age
  270. min-fresh
  271. """
  272. retval = "STALE"
  273. cc = _parse_cache_control(request_headers)
  274. cc_response = _parse_cache_control(response_headers)
  275. if "pragma" in request_headers and request_headers["pragma"].lower().find("no-cache") != -1:
  276. retval = "TRANSPARENT"
  277. if "cache-control" not in request_headers:
  278. request_headers["cache-control"] = "no-cache"
  279. elif "no-cache" in cc:
  280. retval = "TRANSPARENT"
  281. elif "no-cache" in cc_response:
  282. retval = "STALE"
  283. elif "only-if-cached" in cc:
  284. retval = "FRESH"
  285. elif "date" in response_headers:
  286. date = calendar.timegm(email.utils.parsedate_tz(response_headers["date"]))
  287. now = time.time()
  288. current_age = max(0, now - date)
  289. if "max-age" in cc_response:
  290. try:
  291. freshness_lifetime = int(cc_response["max-age"])
  292. except ValueError:
  293. freshness_lifetime = 0
  294. elif "expires" in response_headers:
  295. expires = email.utils.parsedate_tz(response_headers["expires"])
  296. if None == expires:
  297. freshness_lifetime = 0
  298. else:
  299. freshness_lifetime = max(0, calendar.timegm(expires) - date)
  300. else:
  301. freshness_lifetime = 0
  302. if "max-age" in cc:
  303. try:
  304. freshness_lifetime = int(cc["max-age"])
  305. except ValueError:
  306. freshness_lifetime = 0
  307. if "min-fresh" in cc:
  308. try:
  309. min_fresh = int(cc["min-fresh"])
  310. except ValueError:
  311. min_fresh = 0
  312. current_age += min_fresh
  313. if freshness_lifetime > current_age:
  314. retval = "FRESH"
  315. return retval
  316. def _decompressContent(response, new_content):
  317. content = new_content
  318. try:
  319. encoding = response.get("content-encoding", None)
  320. if encoding in ["gzip", "deflate"]:
  321. if encoding == "gzip":
  322. content = gzip.GzipFile(fileobj=io.BytesIO(new_content)).read()
  323. if encoding == "deflate":
  324. try:
  325. content = zlib.decompress(content, zlib.MAX_WBITS)
  326. except (IOError, zlib.error):
  327. content = zlib.decompress(content, -zlib.MAX_WBITS)
  328. response["content-length"] = str(len(content))
  329. # Record the historical presence of the encoding in a way the won't interfere.
  330. response["-content-encoding"] = response["content-encoding"]
  331. del response["content-encoding"]
  332. except (IOError, zlib.error):
  333. content = ""
  334. raise FailedToDecompressContent(
  335. _("Content purported to be compressed with %s but failed to decompress.") % response.get("content-encoding"),
  336. response,
  337. content,
  338. )
  339. return content
  340. def _bind_write_headers(msg):
  341. def _write_headers(self):
  342. # Self refers to the Generator object.
  343. for h, v in msg.items():
  344. print("%s:" % h, end=" ", file=self._fp)
  345. if isinstance(v, header.Header):
  346. print(v.encode(maxlinelen=self._maxheaderlen), file=self._fp)
  347. else:
  348. # email.Header got lots of smarts, so use it.
  349. headers = header.Header(v, maxlinelen=self._maxheaderlen, charset="utf-8", header_name=h)
  350. print(headers.encode(), file=self._fp)
  351. # A blank line always separates headers from body.
  352. print(file=self._fp)
  353. return _write_headers
  354. def _updateCache(request_headers, response_headers, content, cache, cachekey):
  355. if cachekey:
  356. cc = _parse_cache_control(request_headers)
  357. cc_response = _parse_cache_control(response_headers)
  358. if "no-store" in cc or "no-store" in cc_response:
  359. cache.delete(cachekey)
  360. else:
  361. info = email.message.Message()
  362. for key, value in response_headers.items():
  363. if key not in ["status", "content-encoding", "transfer-encoding"]:
  364. info[key] = value
  365. # Add annotations to the cache to indicate what headers
  366. # are variant for this request.
  367. vary = response_headers.get("vary", None)
  368. if vary:
  369. vary_headers = vary.lower().replace(" ", "").split(",")
  370. for header in vary_headers:
  371. key = "-varied-%s" % header
  372. try:
  373. info[key] = request_headers[header]
  374. except KeyError:
  375. pass
  376. status = response_headers.status
  377. if status == 304:
  378. status = 200
  379. status_header = "status: %d\r\n" % status
  380. try:
  381. header_str = info.as_string()
  382. except UnicodeEncodeError:
  383. setattr(info, "_write_headers", _bind_write_headers(info))
  384. header_str = info.as_string()
  385. header_str = re.sub("\r(?!\n)|(?<!\r)\n", "\r\n", header_str)
  386. text = b"".join([status_header.encode("utf-8"), header_str.encode("utf-8"), content])
  387. cache.set(cachekey, text)
  388. def _cnonce():
  389. dig = _md5(
  390. ("%s:%s" % (time.ctime(), ["0123456789"[random.randrange(0, 9)] for i in range(20)])).encode("utf-8")
  391. ).hexdigest()
  392. return dig[:16]
  393. def _wsse_username_token(cnonce, iso_now, password):
  394. return (
  395. base64.b64encode(_sha(("%s%s%s" % (cnonce, iso_now, password)).encode("utf-8")).digest()).strip().decode("utf-8")
  396. )
  397. # For credentials we need two things, first
  398. # a pool of credential to try (not necesarily tied to BAsic, Digest, etc.)
  399. # Then we also need a list of URIs that have already demanded authentication
  400. # That list is tricky since sub-URIs can take the same auth, or the
  401. # auth scheme may change as you descend the tree.
  402. # So we also need each Auth instance to be able to tell us
  403. # how close to the 'top' it is.
  404. class Authentication(object):
  405. def __init__(self, credentials, host, request_uri, headers, response, content, http):
  406. (scheme, authority, path, query, fragment) = parse_uri(request_uri)
  407. self.path = path
  408. self.host = host
  409. self.credentials = credentials
  410. self.http = http
  411. def depth(self, request_uri):
  412. (scheme, authority, path, query, fragment) = parse_uri(request_uri)
  413. return request_uri[len(self.path) :].count("/")
  414. def inscope(self, host, request_uri):
  415. # XXX Should we normalize the request_uri?
  416. (scheme, authority, path, query, fragment) = parse_uri(request_uri)
  417. return (host == self.host) and path.startswith(self.path)
  418. def request(self, method, request_uri, headers, content):
  419. """Modify the request headers to add the appropriate
  420. Authorization header. Over-rise this in sub-classes."""
  421. pass
  422. def response(self, response, content):
  423. """Gives us a chance to update with new nonces
  424. or such returned from the last authorized response.
  425. Over-rise this in sub-classes if necessary.
  426. Return TRUE is the request is to be retried, for
  427. example Digest may return stale=true.
  428. """
  429. return False
  430. def __eq__(self, auth):
  431. return False
  432. def __ne__(self, auth):
  433. return True
  434. def __lt__(self, auth):
  435. return True
  436. def __gt__(self, auth):
  437. return False
  438. def __le__(self, auth):
  439. return True
  440. def __ge__(self, auth):
  441. return False
  442. def __bool__(self):
  443. return True
  444. class BasicAuthentication(Authentication):
  445. def __init__(self, credentials, host, request_uri, headers, response, content, http):
  446. Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
  447. def request(self, method, request_uri, headers, content):
  448. """Modify the request headers to add the appropriate
  449. Authorization header."""
  450. headers["authorization"] = "Basic " + base64.b64encode(
  451. ("%s:%s" % self.credentials).encode("utf-8")
  452. ).strip().decode("utf-8")
  453. class DigestAuthentication(Authentication):
  454. """Only do qop='auth' and MD5, since that
  455. is all Apache currently implements"""
  456. def __init__(self, credentials, host, request_uri, headers, response, content, http):
  457. Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
  458. self.challenge = auth._parse_www_authenticate(response, "www-authenticate")["digest"]
  459. qop = self.challenge.get("qop", "auth")
  460. self.challenge["qop"] = ("auth" in [x.strip() for x in qop.split()]) and "auth" or None
  461. if self.challenge["qop"] is None:
  462. raise UnimplementedDigestAuthOptionError(_("Unsupported value for qop: %s." % qop))
  463. self.challenge["algorithm"] = self.challenge.get("algorithm", "MD5").upper()
  464. if self.challenge["algorithm"] != "MD5":
  465. raise UnimplementedDigestAuthOptionError(
  466. _("Unsupported value for algorithm: %s." % self.challenge["algorithm"])
  467. )
  468. self.A1 = "".join([self.credentials[0], ":", self.challenge["realm"], ":", self.credentials[1],])
  469. self.challenge["nc"] = 1
  470. def request(self, method, request_uri, headers, content, cnonce=None):
  471. """Modify the request headers"""
  472. H = lambda x: _md5(x.encode("utf-8")).hexdigest()
  473. KD = lambda s, d: H("%s:%s" % (s, d))
  474. A2 = "".join([method, ":", request_uri])
  475. self.challenge["cnonce"] = cnonce or _cnonce()
  476. request_digest = '"%s"' % KD(
  477. H(self.A1),
  478. "%s:%s:%s:%s:%s"
  479. % (
  480. self.challenge["nonce"],
  481. "%08x" % self.challenge["nc"],
  482. self.challenge["cnonce"],
  483. self.challenge["qop"],
  484. H(A2),
  485. ),
  486. )
  487. headers["authorization"] = (
  488. 'Digest username="%s", realm="%s", nonce="%s", '
  489. 'uri="%s", algorithm=%s, response=%s, qop=%s, '
  490. 'nc=%08x, cnonce="%s"'
  491. ) % (
  492. self.credentials[0],
  493. self.challenge["realm"],
  494. self.challenge["nonce"],
  495. request_uri,
  496. self.challenge["algorithm"],
  497. request_digest,
  498. self.challenge["qop"],
  499. self.challenge["nc"],
  500. self.challenge["cnonce"],
  501. )
  502. if self.challenge.get("opaque"):
  503. headers["authorization"] += ', opaque="%s"' % self.challenge["opaque"]
  504. self.challenge["nc"] += 1
  505. def response(self, response, content):
  506. if "authentication-info" not in response:
  507. challenge = auth._parse_www_authenticate(response, "www-authenticate").get("digest", {})
  508. if "true" == challenge.get("stale"):
  509. self.challenge["nonce"] = challenge["nonce"]
  510. self.challenge["nc"] = 1
  511. return True
  512. else:
  513. updated_challenge = auth._parse_authentication_info(response, "authentication-info")
  514. if "nextnonce" in updated_challenge:
  515. self.challenge["nonce"] = updated_challenge["nextnonce"]
  516. self.challenge["nc"] = 1
  517. return False
  518. class HmacDigestAuthentication(Authentication):
  519. """Adapted from Robert Sayre's code and DigestAuthentication above."""
  520. __author__ = "Thomas Broyer (t.broyer@ltgt.net)"
  521. def __init__(self, credentials, host, request_uri, headers, response, content, http):
  522. Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
  523. challenge = auth._parse_www_authenticate(response, "www-authenticate")
  524. self.challenge = challenge["hmacdigest"]
  525. # TODO: self.challenge['domain']
  526. self.challenge["reason"] = self.challenge.get("reason", "unauthorized")
  527. if self.challenge["reason"] not in ["unauthorized", "integrity"]:
  528. self.challenge["reason"] = "unauthorized"
  529. self.challenge["salt"] = self.challenge.get("salt", "")
  530. if not self.challenge.get("snonce"):
  531. raise UnimplementedHmacDigestAuthOptionError(
  532. _("The challenge doesn't contain a server nonce, or this one is empty.")
  533. )
  534. self.challenge["algorithm"] = self.challenge.get("algorithm", "HMAC-SHA-1")
  535. if self.challenge["algorithm"] not in ["HMAC-SHA-1", "HMAC-MD5"]:
  536. raise UnimplementedHmacDigestAuthOptionError(
  537. _("Unsupported value for algorithm: %s." % self.challenge["algorithm"])
  538. )
  539. self.challenge["pw-algorithm"] = self.challenge.get("pw-algorithm", "SHA-1")
  540. if self.challenge["pw-algorithm"] not in ["SHA-1", "MD5"]:
  541. raise UnimplementedHmacDigestAuthOptionError(
  542. _("Unsupported value for pw-algorithm: %s." % self.challenge["pw-algorithm"])
  543. )
  544. if self.challenge["algorithm"] == "HMAC-MD5":
  545. self.hashmod = _md5
  546. else:
  547. self.hashmod = _sha
  548. if self.challenge["pw-algorithm"] == "MD5":
  549. self.pwhashmod = _md5
  550. else:
  551. self.pwhashmod = _sha
  552. self.key = "".join(
  553. [
  554. self.credentials[0],
  555. ":",
  556. self.pwhashmod.new("".join([self.credentials[1], self.challenge["salt"]])).hexdigest().lower(),
  557. ":",
  558. self.challenge["realm"],
  559. ]
  560. )
  561. self.key = self.pwhashmod.new(self.key).hexdigest().lower()
  562. def request(self, method, request_uri, headers, content):
  563. """Modify the request headers"""
  564. keys = _get_end2end_headers(headers)
  565. keylist = "".join(["%s " % k for k in keys])
  566. headers_val = "".join([headers[k] for k in keys])
  567. created = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
  568. cnonce = _cnonce()
  569. request_digest = "%s:%s:%s:%s:%s" % (method, request_uri, cnonce, self.challenge["snonce"], headers_val,)
  570. request_digest = hmac.new(self.key, request_digest, self.hashmod).hexdigest().lower()
  571. headers["authorization"] = (
  572. 'HMACDigest username="%s", realm="%s", snonce="%s",'
  573. ' cnonce="%s", uri="%s", created="%s", '
  574. 'response="%s", headers="%s"'
  575. ) % (
  576. self.credentials[0],
  577. self.challenge["realm"],
  578. self.challenge["snonce"],
  579. cnonce,
  580. request_uri,
  581. created,
  582. request_digest,
  583. keylist,
  584. )
  585. def response(self, response, content):
  586. challenge = auth._parse_www_authenticate(response, "www-authenticate").get("hmacdigest", {})
  587. if challenge.get("reason") in ["integrity", "stale"]:
  588. return True
  589. return False
  590. class WsseAuthentication(Authentication):
  591. """This is thinly tested and should not be relied upon.
  592. At this time there isn't any third party server to test against.
  593. Blogger and TypePad implemented this algorithm at one point
  594. but Blogger has since switched to Basic over HTTPS and
  595. TypePad has implemented it wrong, by never issuing a 401
  596. challenge but instead requiring your client to telepathically know that
  597. their endpoint is expecting WSSE profile="UsernameToken"."""
  598. def __init__(self, credentials, host, request_uri, headers, response, content, http):
  599. Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
  600. def request(self, method, request_uri, headers, content):
  601. """Modify the request headers to add the appropriate
  602. Authorization header."""
  603. headers["authorization"] = 'WSSE profile="UsernameToken"'
  604. iso_now = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
  605. cnonce = _cnonce()
  606. password_digest = _wsse_username_token(cnonce, iso_now, self.credentials[1])
  607. headers["X-WSSE"] = ('UsernameToken Username="%s", PasswordDigest="%s", ' 'Nonce="%s", Created="%s"') % (
  608. self.credentials[0],
  609. password_digest,
  610. cnonce,
  611. iso_now,
  612. )
  613. class GoogleLoginAuthentication(Authentication):
  614. def __init__(self, credentials, host, request_uri, headers, response, content, http):
  615. from urllib.parse import urlencode
  616. Authentication.__init__(self, credentials, host, request_uri, headers, response, content, http)
  617. challenge = auth._parse_www_authenticate(response, "www-authenticate")
  618. service = challenge["googlelogin"].get("service", "xapi")
  619. # Bloggger actually returns the service in the challenge
  620. # For the rest we guess based on the URI
  621. if service == "xapi" and request_uri.find("calendar") > 0:
  622. service = "cl"
  623. # No point in guessing Base or Spreadsheet
  624. # elif request_uri.find("spreadsheets") > 0:
  625. # service = "wise"
  626. auth = dict(Email=credentials[0], Passwd=credentials[1], service=service, source=headers["user-agent"],)
  627. resp, content = self.http.request(
  628. "https://www.google.com/accounts/ClientLogin",
  629. method="POST",
  630. body=urlencode(auth),
  631. headers={"Content-Type": "application/x-www-form-urlencoded"},
  632. )
  633. lines = content.split("\n")
  634. d = dict([tuple(line.split("=", 1)) for line in lines if line])
  635. if resp.status == 403:
  636. self.Auth = ""
  637. else:
  638. self.Auth = d["Auth"]
  639. def request(self, method, request_uri, headers, content):
  640. """Modify the request headers to add the appropriate
  641. Authorization header."""
  642. headers["authorization"] = "GoogleLogin Auth=" + self.Auth
  643. AUTH_SCHEME_CLASSES = {
  644. "basic": BasicAuthentication,
  645. "wsse": WsseAuthentication,
  646. "digest": DigestAuthentication,
  647. "hmacdigest": HmacDigestAuthentication,
  648. "googlelogin": GoogleLoginAuthentication,
  649. }
  650. AUTH_SCHEME_ORDER = ["hmacdigest", "googlelogin", "digest", "wsse", "basic"]
  651. class FileCache(object):
  652. """Uses a local directory as a store for cached files.
  653. Not really safe to use if multiple threads or processes are going to
  654. be running on the same cache.
  655. """
  656. def __init__(self, cache, safe=safename): # use safe=lambda x: md5.new(x).hexdigest() for the old behavior
  657. self.cache = cache
  658. self.safe = safe
  659. if not os.path.exists(cache):
  660. os.makedirs(self.cache)
  661. def get(self, key):
  662. retval = None
  663. cacheFullPath = os.path.join(self.cache, self.safe(key))
  664. try:
  665. f = open(cacheFullPath, "rb")
  666. retval = f.read()
  667. f.close()
  668. except IOError:
  669. pass
  670. return retval
  671. def set(self, key, value):
  672. cacheFullPath = os.path.join(self.cache, self.safe(key))
  673. f = open(cacheFullPath, "wb")
  674. f.write(value)
  675. f.close()
  676. def delete(self, key):
  677. cacheFullPath = os.path.join(self.cache, self.safe(key))
  678. if os.path.exists(cacheFullPath):
  679. os.remove(cacheFullPath)
  680. class Credentials(object):
  681. def __init__(self):
  682. self.credentials = []
  683. def add(self, name, password, domain=""):
  684. self.credentials.append((domain.lower(), name, password))
  685. def clear(self):
  686. self.credentials = []
  687. def iter(self, domain):
  688. for (cdomain, name, password) in self.credentials:
  689. if cdomain == "" or domain == cdomain:
  690. yield (name, password)
  691. class KeyCerts(Credentials):
  692. """Identical to Credentials except that
  693. name/password are mapped to key/cert."""
  694. def add(self, key, cert, domain, password):
  695. self.credentials.append((domain.lower(), key, cert, password))
  696. def iter(self, domain):
  697. for (cdomain, key, cert, password) in self.credentials:
  698. if cdomain == "" or domain == cdomain:
  699. yield (key, cert, password)
  700. class AllHosts(object):
  701. pass
  702. class ProxyInfo(object):
  703. """Collect information required to use a proxy."""
  704. bypass_hosts = ()
  705. def __init__(
  706. self, proxy_type, proxy_host, proxy_port, proxy_rdns=True, proxy_user=None, proxy_pass=None, proxy_headers=None,
  707. ):
  708. """Args:
  709. proxy_type: The type of proxy server. This must be set to one of
  710. socks.PROXY_TYPE_XXX constants. For example: p =
  711. ProxyInfo(proxy_type=socks.PROXY_TYPE_HTTP, proxy_host='localhost',
  712. proxy_port=8000)
  713. proxy_host: The hostname or IP address of the proxy server.
  714. proxy_port: The port that the proxy server is running on.
  715. proxy_rdns: If True (default), DNS queries will not be performed
  716. locally, and instead, handed to the proxy to resolve. This is useful
  717. if the network does not allow resolution of non-local names. In
  718. httplib2 0.9 and earlier, this defaulted to False.
  719. proxy_user: The username used to authenticate with the proxy server.
  720. proxy_pass: The password used to authenticate with the proxy server.
  721. proxy_headers: Additional or modified headers for the proxy connect
  722. request.
  723. """
  724. if isinstance(proxy_user, bytes):
  725. proxy_user = proxy_user.decode()
  726. if isinstance(proxy_pass, bytes):
  727. proxy_pass = proxy_pass.decode()
  728. (
  729. self.proxy_type,
  730. self.proxy_host,
  731. self.proxy_port,
  732. self.proxy_rdns,
  733. self.proxy_user,
  734. self.proxy_pass,
  735. self.proxy_headers,
  736. ) = (
  737. proxy_type,
  738. proxy_host,
  739. proxy_port,
  740. proxy_rdns,
  741. proxy_user,
  742. proxy_pass,
  743. proxy_headers,
  744. )
  745. def astuple(self):
  746. return (
  747. self.proxy_type,
  748. self.proxy_host,
  749. self.proxy_port,
  750. self.proxy_rdns,
  751. self.proxy_user,
  752. self.proxy_pass,
  753. self.proxy_headers,
  754. )
  755. def isgood(self):
  756. return socks and (self.proxy_host != None) and (self.proxy_port != None)
  757. def applies_to(self, hostname):
  758. return not self.bypass_host(hostname)
  759. def bypass_host(self, hostname):
  760. """Has this host been excluded from the proxy config"""
  761. if self.bypass_hosts is AllHosts:
  762. return True
  763. hostname = "." + hostname.lstrip(".")
  764. for skip_name in self.bypass_hosts:
  765. # *.suffix
  766. if skip_name.startswith(".") and hostname.endswith(skip_name):
  767. return True
  768. # exact match
  769. if hostname == "." + skip_name:
  770. return True
  771. return False
  772. def __repr__(self):
  773. return (
  774. "<ProxyInfo type={p.proxy_type} "
  775. "host:port={p.proxy_host}:{p.proxy_port} rdns={p.proxy_rdns}"
  776. + " user={p.proxy_user} headers={p.proxy_headers}>"
  777. ).format(p=self)
  778. def proxy_info_from_environment(method="http"):
  779. """Read proxy info from the environment variables.
  780. """
  781. if method not in ("http", "https"):
  782. return
  783. env_var = method + "_proxy"
  784. url = os.environ.get(env_var, os.environ.get(env_var.upper()))
  785. if not url:
  786. return
  787. return proxy_info_from_url(url, method, noproxy=None)
  788. def proxy_info_from_url(url, method="http", noproxy=None):
  789. """Construct a ProxyInfo from a URL (such as http_proxy env var)
  790. """
  791. url = urllib.parse.urlparse(url)
  792. proxy_type = 3 # socks.PROXY_TYPE_HTTP
  793. pi = ProxyInfo(
  794. proxy_type=proxy_type,
  795. proxy_host=url.hostname,
  796. proxy_port=url.port or dict(https=443, http=80)[method],
  797. proxy_user=url.username or None,
  798. proxy_pass=url.password or None,
  799. proxy_headers=None,
  800. )
  801. bypass_hosts = []
  802. # If not given an explicit noproxy value, respect values in env vars.
  803. if noproxy is None:
  804. noproxy = os.environ.get("no_proxy", os.environ.get("NO_PROXY", ""))
  805. # Special case: A single '*' character means all hosts should be bypassed.
  806. if noproxy == "*":
  807. bypass_hosts = AllHosts
  808. elif noproxy.strip():
  809. bypass_hosts = noproxy.split(",")
  810. bypass_hosts = tuple(filter(bool, bypass_hosts)) # To exclude empty string.
  811. pi.bypass_hosts = bypass_hosts
  812. return pi
  813. class HTTPConnectionWithTimeout(http.client.HTTPConnection):
  814. """HTTPConnection subclass that supports timeouts
  815. HTTPConnection subclass that supports timeouts
  816. All timeouts are in seconds. If None is passed for timeout then
  817. Python's default timeout for sockets will be used. See for example
  818. the docs of socket.setdefaulttimeout():
  819. http://docs.python.org/library/socket.html#socket.setdefaulttimeout
  820. """
  821. def __init__(self, host, port=None, timeout=None, proxy_info=None):
  822. http.client.HTTPConnection.__init__(self, host, port=port, timeout=timeout)
  823. self.proxy_info = proxy_info
  824. if proxy_info and not isinstance(proxy_info, ProxyInfo):
  825. self.proxy_info = proxy_info("http")
  826. def connect(self):
  827. """Connect to the host and port specified in __init__."""
  828. if self.proxy_info and socks is None:
  829. raise ProxiesUnavailableError("Proxy support missing but proxy use was requested!")
  830. if self.proxy_info and self.proxy_info.isgood() and self.proxy_info.applies_to(self.host):
  831. use_proxy = True
  832. (
  833. proxy_type,
  834. proxy_host,
  835. proxy_port,
  836. proxy_rdns,
  837. proxy_user,
  838. proxy_pass,
  839. proxy_headers,
  840. ) = self.proxy_info.astuple()
  841. host = proxy_host
  842. port = proxy_port
  843. else:
  844. use_proxy = False
  845. host = self.host
  846. port = self.port
  847. proxy_type = None
  848. socket_err = None
  849. for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
  850. af, socktype, proto, canonname, sa = res
  851. try:
  852. if use_proxy:
  853. self.sock = socks.socksocket(af, socktype, proto)
  854. self.sock.setproxy(
  855. proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass,
  856. )
  857. else:
  858. self.sock = socket.socket(af, socktype, proto)
  859. self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
  860. if has_timeout(self.timeout):
  861. self.sock.settimeout(self.timeout)
  862. if self.debuglevel > 0:
  863. print("connect: ({0}, {1}) ************".format(self.host, self.port))
  864. if use_proxy:
  865. print(
  866. "proxy: {0} ************".format(
  867. str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass, proxy_headers,))
  868. )
  869. )
  870. self.sock.connect((self.host, self.port) + sa[2:])
  871. except socket.error as e:
  872. socket_err = e
  873. if self.debuglevel > 0:
  874. print("connect fail: ({0}, {1})".format(self.host, self.port))
  875. if use_proxy:
  876. print(
  877. "proxy: {0}".format(
  878. str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass, proxy_headers,))
  879. )
  880. )
  881. if self.sock:
  882. self.sock.close()
  883. self.sock = None
  884. continue
  885. break
  886. if not self.sock:
  887. raise socket_err
  888. class HTTPSConnectionWithTimeout(http.client.HTTPSConnection):
  889. """This class allows communication via SSL.
  890. All timeouts are in seconds. If None is passed for timeout then
  891. Python's default timeout for sockets will be used. See for example
  892. the docs of socket.setdefaulttimeout():
  893. http://docs.python.org/library/socket.html#socket.setdefaulttimeout
  894. """
  895. def __init__(
  896. self,
  897. host,
  898. port=None,
  899. key_file=None,
  900. cert_file=None,
  901. timeout=None,
  902. proxy_info=None,
  903. ca_certs=None,
  904. disable_ssl_certificate_validation=False,
  905. tls_maximum_version=None,
  906. tls_minimum_version=None,
  907. key_password=None,
  908. ):
  909. self.disable_ssl_certificate_validation = disable_ssl_certificate_validation
  910. self.ca_certs = ca_certs if ca_certs else CA_CERTS
  911. self.proxy_info = proxy_info
  912. if proxy_info and not isinstance(proxy_info, ProxyInfo):
  913. self.proxy_info = proxy_info("https")
  914. context = _build_ssl_context(
  915. self.disable_ssl_certificate_validation,
  916. self.ca_certs,
  917. cert_file,
  918. key_file,
  919. maximum_version=tls_maximum_version,
  920. minimum_version=tls_minimum_version,
  921. key_password=key_password,
  922. )
  923. super(HTTPSConnectionWithTimeout, self).__init__(
  924. host, port=port, timeout=timeout, context=context,
  925. )
  926. self.key_file = key_file
  927. self.cert_file = cert_file
  928. self.key_password = key_password
  929. def connect(self):
  930. """Connect to a host on a given (SSL) port."""
  931. if self.proxy_info and self.proxy_info.isgood() and self.proxy_info.applies_to(self.host):
  932. use_proxy = True
  933. (
  934. proxy_type,
  935. proxy_host,
  936. proxy_port,
  937. proxy_rdns,
  938. proxy_user,
  939. proxy_pass,
  940. proxy_headers,
  941. ) = self.proxy_info.astuple()
  942. host = proxy_host
  943. port = proxy_port
  944. else:
  945. use_proxy = False
  946. host = self.host
  947. port = self.port
  948. proxy_type = None
  949. proxy_headers = None
  950. socket_err = None
  951. address_info = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
  952. for family, socktype, proto, canonname, sockaddr in address_info:
  953. try:
  954. if use_proxy:
  955. sock = socks.socksocket(family, socktype, proto)
  956. sock.setproxy(
  957. proxy_type, proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass,
  958. )
  959. else:
  960. sock = socket.socket(family, socktype, proto)
  961. sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
  962. if has_timeout(self.timeout):
  963. sock.settimeout(self.timeout)
  964. sock.connect((self.host, self.port))
  965. self.sock = self._context.wrap_socket(sock, server_hostname=self.host)
  966. # Python 3.3 compatibility: emulate the check_hostname behavior
  967. if not hasattr(self._context, "check_hostname") and not self.disable_ssl_certificate_validation:
  968. try:
  969. ssl.match_hostname(self.sock.getpeercert(), self.host)
  970. except Exception:
  971. self.sock.shutdown(socket.SHUT_RDWR)
  972. self.sock.close()
  973. raise
  974. if self.debuglevel > 0:
  975. print("connect: ({0}, {1})".format(self.host, self.port))
  976. if use_proxy:
  977. print(
  978. "proxy: {0}".format(
  979. str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass, proxy_headers,))
  980. )
  981. )
  982. except (ssl.SSLError, ssl.CertificateError) as e:
  983. if sock:
  984. sock.close()
  985. if self.sock:
  986. self.sock.close()
  987. self.sock = None
  988. raise
  989. except (socket.timeout, socket.gaierror):
  990. raise
  991. except socket.error as e:
  992. socket_err = e
  993. if self.debuglevel > 0:
  994. print("connect fail: ({0}, {1})".format(self.host, self.port))
  995. if use_proxy:
  996. print(
  997. "proxy: {0}".format(
  998. str((proxy_host, proxy_port, proxy_rdns, proxy_user, proxy_pass, proxy_headers,))
  999. )
  1000. )
  1001. if self.sock:
  1002. self.sock.close()
  1003. self.sock = None
  1004. continue
  1005. break
  1006. if not self.sock:
  1007. raise socket_err
  1008. SCHEME_TO_CONNECTION = {
  1009. "http": HTTPConnectionWithTimeout,
  1010. "https": HTTPSConnectionWithTimeout,
  1011. }
  1012. class Http(object):
  1013. """An HTTP client that handles:
  1014. - all methods
  1015. - caching
  1016. - ETags
  1017. - compression,
  1018. - HTTPS
  1019. - Basic
  1020. - Digest
  1021. - WSSE
  1022. and more.
  1023. """
  1024. def __init__(
  1025. self,
  1026. cache=None,
  1027. timeout=None,
  1028. proxy_info=proxy_info_from_environment,
  1029. ca_certs=None,
  1030. disable_ssl_certificate_validation=False,
  1031. tls_maximum_version=None,
  1032. tls_minimum_version=None,
  1033. ):
  1034. """If 'cache' is a string then it is used as a directory name for
  1035. a disk cache. Otherwise it must be an object that supports the
  1036. same interface as FileCache.
  1037. All timeouts are in seconds. If None is passed for timeout
  1038. then Python's default timeout for sockets will be used. See
  1039. for example the docs of socket.setdefaulttimeout():
  1040. http://docs.python.org/library/socket.html#socket.setdefaulttimeout
  1041. `proxy_info` may be:
  1042. - a callable that takes the http scheme ('http' or 'https') and
  1043. returns a ProxyInfo instance per request. By default, uses
  1044. proxy_info_from_environment.
  1045. - a ProxyInfo instance (static proxy config).
  1046. - None (proxy disabled).
  1047. ca_certs is the path of a file containing root CA certificates for SSL
  1048. server certificate validation. By default, a CA cert file bundled with
  1049. httplib2 is used.
  1050. If disable_ssl_certificate_validation is true, SSL cert validation will
  1051. not be performed.
  1052. tls_maximum_version / tls_minimum_version require Python 3.7+ /
  1053. OpenSSL 1.1.0g+. A value of "TLSv1_3" requires OpenSSL 1.1.1+.
  1054. """
  1055. self.proxy_info = proxy_info
  1056. self.ca_certs = ca_certs
  1057. self.disable_ssl_certificate_validation = disable_ssl_certificate_validation
  1058. self.tls_maximum_version = tls_maximum_version
  1059. self.tls_minimum_version = tls_minimum_version
  1060. # Map domain name to an httplib connection
  1061. self.connections = {}
  1062. # The location of the cache, for now a directory
  1063. # where cached responses are held.
  1064. if cache and isinstance(cache, str):
  1065. self.cache = FileCache(cache)
  1066. else:
  1067. self.cache = cache
  1068. # Name/password
  1069. self.credentials = Credentials()
  1070. # Key/cert
  1071. self.certificates = KeyCerts()
  1072. # authorization objects
  1073. self.authorizations = []
  1074. # If set to False then no redirects are followed, even safe ones.
  1075. self.follow_redirects = True
  1076. self.redirect_codes = REDIRECT_CODES
  1077. # Which HTTP methods do we apply optimistic concurrency to, i.e.
  1078. # which methods get an "if-match:" etag header added to them.
  1079. self.optimistic_concurrency_methods = ["PUT", "PATCH"]
  1080. self.safe_methods = list(SAFE_METHODS)
  1081. # If 'follow_redirects' is True, and this is set to True then
  1082. # all redirecs are followed, including unsafe ones.
  1083. self.follow_all_redirects = False
  1084. self.ignore_etag = False
  1085. self.force_exception_to_status_code = False
  1086. self.timeout = timeout
  1087. # Keep Authorization: headers on a redirect.
  1088. self.forward_authorization_headers = False
  1089. def close(self):
  1090. """Close persistent connections, clear sensitive data.
  1091. Not thread-safe, requires external synchronization against concurrent requests.
  1092. """
  1093. existing, self.connections = self.connections, {}
  1094. for _, c in existing.items():
  1095. c.close()
  1096. self.certificates.clear()
  1097. self.clear_credentials()
  1098. def __getstate__(self):
  1099. state_dict = copy.copy(self.__dict__)
  1100. # In case request is augmented by some foreign object such as
  1101. # credentials which handle auth
  1102. if "request" in state_dict:
  1103. del state_dict["request"]
  1104. if "connections" in state_dict:
  1105. del state_dict["connections"]
  1106. return state_dict
  1107. def __setstate__(self, state):
  1108. self.__dict__.update(state)
  1109. self.connections = {}
  1110. def _auth_from_challenge(self, host, request_uri, headers, response, content):
  1111. """A generator that creates Authorization objects
  1112. that can be applied to requests.
  1113. """
  1114. challenges = auth._parse_www_authenticate(response, "www-authenticate")
  1115. for cred in self.credentials.iter(host):
  1116. for scheme in AUTH_SCHEME_ORDER:
  1117. if scheme in challenges:
  1118. yield AUTH_SCHEME_CLASSES[scheme](cred, host, request_uri, headers, response, content, self)
  1119. def add_credentials(self, name, password, domain=""):
  1120. """Add a name and password that will be used
  1121. any time a request requires authentication."""
  1122. self.credentials.add(name, password, domain)
  1123. def add_certificate(self, key, cert, domain, password=None):
  1124. """Add a key and cert that will be used
  1125. any time a request requires authentication."""
  1126. self.certificates.add(key, cert, domain, password)
  1127. def clear_credentials(self):
  1128. """Remove all the names and passwords
  1129. that are used for authentication"""
  1130. self.credentials.clear()
  1131. self.authorizations = []
  1132. def _conn_request(self, conn, request_uri, method, body, headers):
  1133. i = 0
  1134. seen_bad_status_line = False
  1135. while i < RETRIES:
  1136. i += 1
  1137. try:
  1138. if conn.sock is None:
  1139. conn.connect()
  1140. conn.request(method, request_uri, body, headers)
  1141. except socket.timeout:
  1142. conn.close()
  1143. raise
  1144. except socket.gaierror:
  1145. conn.close()
  1146. raise ServerNotFoundError("Unable to find the server at %s" % conn.host)
  1147. except socket.error as e:
  1148. errno_ = _errno_from_exception(e)
  1149. if errno_ in (errno.ENETUNREACH, errno.EADDRNOTAVAIL) and i < RETRIES:
  1150. continue # retry on potentially transient errors
  1151. raise
  1152. except http.client.HTTPException:
  1153. if conn.sock is None:
  1154. if i < RETRIES - 1:
  1155. conn.close()
  1156. conn.connect()
  1157. continue
  1158. else:
  1159. conn.close()
  1160. raise
  1161. if i < RETRIES - 1:
  1162. conn.close()
  1163. conn.connect()
  1164. continue
  1165. # Just because the server closed the connection doesn't apparently mean
  1166. # that the server didn't send a response.
  1167. pass
  1168. try:
  1169. response = conn.getresponse()
  1170. except (http.client.BadStatusLine, http.client.ResponseNotReady):
  1171. # If we get a BadStatusLine on the first try then that means
  1172. # the connection just went stale, so retry regardless of the
  1173. # number of RETRIES set.
  1174. if not seen_bad_status_line and i == 1:
  1175. i = 0
  1176. seen_bad_status_line = True
  1177. conn.close()
  1178. conn.connect()
  1179. continue
  1180. else:
  1181. conn.close()
  1182. raise
  1183. except socket.timeout:
  1184. raise
  1185. except (socket.error, http.client.HTTPException):
  1186. conn.close()
  1187. if i == 0:
  1188. conn.close()
  1189. conn.connect()
  1190. continue
  1191. else:
  1192. raise
  1193. else:
  1194. content = b""
  1195. if method == "HEAD":
  1196. conn.close()
  1197. else:
  1198. content = response.read()
  1199. response = Response(response)
  1200. if method != "HEAD":
  1201. content = _decompressContent(response, content)
  1202. break
  1203. return (response, content)
  1204. def _request(
  1205. self, conn, host, absolute_uri, request_uri, method, body, headers, redirections, cachekey,
  1206. ):
  1207. """Do the actual request using the connection object
  1208. and also follow one level of redirects if necessary"""
  1209. auths = [(auth.depth(request_uri), auth) for auth in self.authorizations if auth.inscope(host, request_uri)]
  1210. auth = auths and sorted(auths)[0][1] or None
  1211. if auth:
  1212. auth.request(method, request_uri, headers, body)
  1213. (response, content) = self._conn_request(conn, request_uri, method, body, headers)
  1214. if auth:
  1215. if auth.response(response, body):
  1216. auth.request(method, request_uri, headers, body)
  1217. (response, content) = self._conn_request(conn, request_uri, method, body, headers)
  1218. response._stale_digest = 1
  1219. if response.status == 401:
  1220. for authorization in self._auth_from_challenge(host, request_uri, headers, response, content):
  1221. authorization.request(method, request_uri, headers, body)
  1222. (response, content) = self._conn_request(conn, request_uri, method, body, headers)
  1223. if response.status != 401:
  1224. self.authorizations.append(authorization)
  1225. authorization.response(response, body)
  1226. break
  1227. if self.follow_all_redirects or method in self.safe_methods or response.status in (303, 308):
  1228. if self.follow_redirects and response.status in self.redirect_codes:
  1229. # Pick out the location header and basically start from the beginning
  1230. # remembering first to strip the ETag header and decrement our 'depth'
  1231. if redirections:
  1232. if "location" not in response and response.status != 300:
  1233. raise RedirectMissingLocation(
  1234. _("Redirected but the response is missing a Location: header."), response, content,
  1235. )
  1236. # Fix-up relative redirects (which violate an RFC 2616 MUST)
  1237. if "location" in response:
  1238. location = response["location"]
  1239. (scheme, authority, path, query, fragment) = parse_uri(location)
  1240. if authority == None:
  1241. response["location"] = urllib.parse.urljoin(absolute_uri, location)
  1242. if response.status == 308 or (response.status == 301 and (method in self.safe_methods)):
  1243. response["-x-permanent-redirect-url"] = response["location"]
  1244. if "content-location" not in response:
  1245. response["content-location"] = absolute_uri
  1246. _updateCache(headers, response, content, self.cache, cachekey)
  1247. if "if-none-match" in headers:
  1248. del headers["if-none-match"]
  1249. if "if-modified-since" in headers:
  1250. del headers["if-modified-since"]
  1251. if "authorization" in headers and not self.forward_authorization_headers:
  1252. del headers["authorization"]
  1253. if "location" in response:
  1254. location = response["location"]
  1255. old_response = copy.deepcopy(response)
  1256. if "content-location" not in old_response:
  1257. old_response["content-location"] = absolute_uri
  1258. redirect_method = method
  1259. if response.status in [302, 303]:
  1260. redirect_method = "GET"
  1261. body = None
  1262. (response, content) = self.request(
  1263. location, method=redirect_method, body=body, headers=headers, redirections=redirections - 1,
  1264. )
  1265. response.previous = old_response
  1266. else:
  1267. raise RedirectLimit(
  1268. "Redirected more times than redirection_limit allows.", response, content,
  1269. )
  1270. elif response.status in [200, 203] and method in self.safe_methods:
  1271. # Don't cache 206's since we aren't going to handle byte range requests
  1272. if "content-location" not in response:
  1273. response["content-location"] = absolute_uri
  1274. _updateCache(headers, response, content, self.cache, cachekey)
  1275. return (response, content)
  1276. def _normalize_headers(self, headers):
  1277. return _normalize_headers(headers)
  1278. # Need to catch and rebrand some exceptions
  1279. # Then need to optionally turn all exceptions into status codes
  1280. # including all socket.* and httplib.* exceptions.
  1281. def request(
  1282. self, uri, method="GET", body=None, headers=None, redirections=DEFAULT_MAX_REDIRECTS, connection_type=None,
  1283. ):
  1284. """ Performs a single HTTP request.
  1285. The 'uri' is the URI of the HTTP resource and can begin
  1286. with either 'http' or 'https'. The value of 'uri' must be an absolute URI.
  1287. The 'method' is the HTTP method to perform, such as GET, POST, DELETE, etc.
  1288. There is no restriction on the methods allowed.
  1289. The 'body' is the entity body to be sent with the request. It is a string
  1290. object.
  1291. Any extra headers that are to be sent with the request should be provided in the
  1292. 'headers' dictionary.
  1293. The maximum number of redirect to follow before raising an
  1294. exception is 'redirections. The default is 5.
  1295. The return value is a tuple of (response, content), the first
  1296. being and instance of the 'Response' class, the second being
  1297. a string that contains the response entity body.
  1298. """
  1299. conn_key = ""
  1300. try:
  1301. if headers is None:
  1302. headers = {}
  1303. else:
  1304. headers = self._normalize_headers(headers)
  1305. if "user-agent" not in headers:
  1306. headers["user-agent"] = "Python-httplib2/%s (gzip)" % __version__
  1307. uri = iri2uri(uri)
  1308. # Prevent CWE-75 space injection to manipulate request via part of uri.
  1309. # Prevent CWE-93 CRLF injection to modify headers via part of uri.
  1310. uri = uri.replace(" ", "%20").replace("\r", "%0D").replace("\n", "%0A")
  1311. (scheme, authority, request_uri, defrag_uri) = urlnorm(uri)
  1312. conn_key = scheme + ":" + authority
  1313. conn = self.connections.get(conn_key)
  1314. if conn is None:
  1315. if not connection_type:
  1316. connection_type = SCHEME_TO_CONNECTION[scheme]
  1317. certs = list(self.certificates.iter(authority))
  1318. if issubclass(connection_type, HTTPSConnectionWithTimeout):
  1319. if certs:
  1320. conn = self.connections[conn_key] = connection_type(
  1321. authority,
  1322. key_file=certs[0][0],
  1323. cert_file=certs[0][1],
  1324. timeout=self.timeout,
  1325. proxy_info=self.proxy_info,
  1326. ca_certs=self.ca_certs,
  1327. disable_ssl_certificate_validation=self.disable_ssl_certificate_validation,
  1328. tls_maximum_version=self.tls_maximum_version,
  1329. tls_minimum_version=self.tls_minimum_version,
  1330. key_password=certs[0][2],
  1331. )
  1332. else:
  1333. conn = self.connections[conn_key] = connection_type(
  1334. authority,
  1335. timeout=self.timeout,
  1336. proxy_info=self.proxy_info,
  1337. ca_certs=self.ca_certs,
  1338. disable_ssl_certificate_validation=self.disable_ssl_certificate_validation,
  1339. tls_maximum_version=self.tls_maximum_version,
  1340. tls_minimum_version=self.tls_minimum_version,
  1341. )
  1342. else:
  1343. conn = self.connections[conn_key] = connection_type(
  1344. authority, timeout=self.timeout, proxy_info=self.proxy_info
  1345. )
  1346. conn.set_debuglevel(debuglevel)
  1347. if "range" not in headers and "accept-encoding" not in headers:
  1348. headers["accept-encoding"] = "gzip, deflate"
  1349. info = email.message.Message()
  1350. cachekey = None
  1351. cached_value = None
  1352. if self.cache:
  1353. cachekey = defrag_uri
  1354. cached_value = self.cache.get(cachekey)
  1355. if cached_value:
  1356. try:
  1357. info, content = cached_value.split(b"\r\n\r\n", 1)
  1358. info = email.message_from_bytes(info)
  1359. for k, v in info.items():
  1360. if v.startswith("=?") and v.endswith("?="):
  1361. info.replace_header(k, str(*email.header.decode_header(v)[0]))
  1362. except (IndexError, ValueError):
  1363. self.cache.delete(cachekey)
  1364. cachekey = None
  1365. cached_value = None
  1366. if (
  1367. method in self.optimistic_concurrency_methods
  1368. and self.cache
  1369. and "etag" in info
  1370. and not self.ignore_etag
  1371. and "if-match" not in headers
  1372. ):
  1373. # http://www.w3.org/1999/04/Editing/
  1374. headers["if-match"] = info["etag"]
  1375. # https://tools.ietf.org/html/rfc7234
  1376. # A cache MUST invalidate the effective Request URI as well as [...] Location and Content-Location
  1377. # when a non-error status code is received in response to an unsafe request method.
  1378. if self.cache and cachekey and method not in self.safe_methods:
  1379. self.cache.delete(cachekey)
  1380. # Check the vary header in the cache to see if this request
  1381. # matches what varies in the cache.
  1382. if method in self.safe_methods and "vary" in info:
  1383. vary = info["vary"]
  1384. vary_headers = vary.lower().replace(" ", "").split(",")
  1385. for header in vary_headers:
  1386. key = "-varied-%s" % header
  1387. value = info[key]
  1388. if headers.get(header, None) != value:
  1389. cached_value = None
  1390. break
  1391. if (
  1392. self.cache
  1393. and cached_value
  1394. and (method in self.safe_methods or info["status"] == "308")
  1395. and "range" not in headers
  1396. ):
  1397. redirect_method = method
  1398. if info["status"] not in ("307", "308"):
  1399. redirect_method = "GET"
  1400. if "-x-permanent-redirect-url" in info:
  1401. # Should cached permanent redirects be counted in our redirection count? For now, yes.
  1402. if redirections <= 0:
  1403. raise RedirectLimit(
  1404. "Redirected more times than redirection_limit allows.", {}, "",
  1405. )
  1406. (response, new_content) = self.request(
  1407. info["-x-permanent-redirect-url"],
  1408. method=redirect_method,
  1409. headers=headers,
  1410. redirections=redirections - 1,
  1411. )
  1412. response.previous = Response(info)
  1413. response.previous.fromcache = True
  1414. else:
  1415. # Determine our course of action:
  1416. # Is the cached entry fresh or stale?
  1417. # Has the client requested a non-cached response?
  1418. #
  1419. # There seems to be three possible answers:
  1420. # 1. [FRESH] Return the cache entry w/o doing a GET
  1421. # 2. [STALE] Do the GET (but add in cache validators if available)
  1422. # 3. [TRANSPARENT] Do a GET w/o any cache validators (Cache-Control: no-cache) on the request
  1423. entry_disposition = _entry_disposition(info, headers)
  1424. if entry_disposition == "FRESH":
  1425. response = Response(info)
  1426. response.fromcache = True
  1427. return (response, content)
  1428. if entry_disposition == "STALE":
  1429. if "etag" in info and not self.ignore_etag and not "if-none-match" in headers:
  1430. headers["if-none-match"] = info["etag"]
  1431. if "last-modified" in info and not "last-modified" in headers:
  1432. headers["if-modified-since"] = info["last-modified"]
  1433. elif entry_disposition == "TRANSPARENT":
  1434. pass
  1435. (response, new_content) = self._request(
  1436. conn, authority, uri, request_uri, method, body, headers, redirections, cachekey,
  1437. )
  1438. if response.status == 304 and method == "GET":
  1439. # Rewrite the cache entry with the new end-to-end headers
  1440. # Take all headers that are in response
  1441. # and overwrite their values in info.
  1442. # unless they are hop-by-hop, or are listed in the connection header.
  1443. for key in _get_end2end_headers(response):
  1444. info[key] = response[key]
  1445. merged_response = Response(info)
  1446. if hasattr(response, "_stale_digest"):
  1447. merged_response._stale_digest = response._stale_digest
  1448. _updateCache(headers, merged_response, content, self.cache, cachekey)
  1449. response = merged_response
  1450. response.status = 200
  1451. response.fromcache = True
  1452. elif response.status == 200:
  1453. content = new_content
  1454. else:
  1455. self.cache.delete(cachekey)
  1456. content = new_content
  1457. else:
  1458. cc = _parse_cache_control(headers)
  1459. if "only-if-cached" in cc:
  1460. info["status"] = "504"
  1461. response = Response(info)
  1462. content = b""
  1463. else:
  1464. (response, content) = self._request(
  1465. conn, authority, uri, request_uri, method, body, headers, redirections, cachekey,
  1466. )
  1467. except Exception as e:
  1468. is_timeout = isinstance(e, socket.timeout)
  1469. if is_timeout:
  1470. conn = self.connections.pop(conn_key, None)
  1471. if conn:
  1472. conn.close()
  1473. if self.force_exception_to_status_code:
  1474. if isinstance(e, HttpLib2ErrorWithResponse):
  1475. response = e.response
  1476. content = e.content
  1477. response.status = 500
  1478. response.reason = str(e)
  1479. elif isinstance(e, socket.timeout):
  1480. content = b"Request Timeout"
  1481. response = Response({"content-type": "text/plain", "status": "408", "content-length": len(content),})
  1482. response.reason = "Request Timeout"
  1483. else:
  1484. content = str(e).encode("utf-8")
  1485. response = Response({"content-type": "text/plain", "status": "400", "content-length": len(content),})
  1486. response.reason = "Bad Request"
  1487. else:
  1488. raise
  1489. return (response, content)
  1490. class Response(dict):
  1491. """An object more like email.message than httplib.HTTPResponse."""
  1492. """Is this response from our local cache"""
  1493. fromcache = False
  1494. """HTTP protocol version used by server.
  1495. 10 for HTTP/1.0, 11 for HTTP/1.1.
  1496. """
  1497. version = 11
  1498. "Status code returned by server. "
  1499. status = 200
  1500. """Reason phrase returned by server."""
  1501. reason = "Ok"
  1502. previous = None
  1503. def __init__(self, info):
  1504. # info is either an email.message or
  1505. # an httplib.HTTPResponse object.
  1506. if isinstance(info, http.client.HTTPResponse):
  1507. for key, value in info.getheaders():
  1508. key = key.lower()
  1509. prev = self.get(key)
  1510. if prev is not None:
  1511. value = ", ".join((prev, value))
  1512. self[key] = value
  1513. self.status = info.status
  1514. self["status"] = str(self.status)
  1515. self.reason = info.reason
  1516. self.version = info.version
  1517. elif isinstance(info, email.message.Message):
  1518. for key, value in list(info.items()):
  1519. self[key.lower()] = value
  1520. self.status = int(self["status"])
  1521. else:
  1522. for key, value in info.items():
  1523. self[key.lower()] = value
  1524. self.status = int(self.get("status", self.status))
  1525. def __getattr__(self, name):
  1526. if name == "dict":
  1527. return self
  1528. else:
  1529. raise AttributeError(name)