_browser.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546
  1. # Copyright 2008,2012 Canonical Ltd.
  2. # This file is part of lazr.restfulclient.
  3. #
  4. # lazr.restfulclient is free software: you can redistribute it and/or modify
  5. # it under the terms of the GNU Lesser General Public License as
  6. # published by the Free Software Foundation, either version 3 of the
  7. # License, or (at your option) any later version.
  8. #
  9. # lazr.restfulclient is distributed in the hope that it will be useful, but
  10. # WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. # Lesser General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU Lesser General Public
  15. # License along with lazr.restfulclient. If not, see
  16. # <http://www.gnu.org/licenses/>.
  17. """Browser object to make requests of lazr.restful web services.
  18. The `Browser` class does some massage of HTTP requests and responses,
  19. and handles custom caches. It is not part of the public
  20. lazr.restfulclient API. (But maybe it should be?)
  21. """
  22. __metaclass__ = type
  23. __all__ = [
  24. "Browser",
  25. "RestfulHttp",
  26. "ssl_certificate_validation_disabled",
  27. ]
  28. import atexit
  29. import errno
  30. import os
  31. import re
  32. import shutil
  33. import sys
  34. import tempfile
  35. from hashlib import md5
  36. from io import BytesIO
  37. from json import dumps
  38. # Import sleep directly into the module so we can monkey-patch it
  39. # during a test.
  40. from time import sleep
  41. from httplib2 import Http, urlnorm
  42. try:
  43. from httplib2 import proxy_info_from_environment
  44. except ImportError:
  45. from httplib2 import ProxyInfo
  46. proxy_info_from_environment = ProxyInfo.from_environment
  47. try:
  48. # Python 3.
  49. from urllib.parse import urlencode
  50. except ImportError:
  51. from urllib import urlencode
  52. from wadllib.application import Application
  53. from lazr.restfulclient._json import DatetimeJSONEncoder
  54. from lazr.restfulclient.errors import HTTPError, error_for
  55. from lazr.uri import URI
  56. if bytes is str:
  57. # Python 2
  58. unicode_type = unicode # noqa: F821
  59. str_types = basestring # noqa: F821
  60. else:
  61. unicode_type = str
  62. str_types = str
  63. # A drop-in replacement for httplib2's safename. Substantially borrowed
  64. # from httplib2, but its cache name format changed in 0.12.0 and we want to
  65. # stick with the previous version.
  66. re_url_scheme = re.compile(br"^\w+://")
  67. re_url_scheme_s = re.compile(r"^\w+://")
  68. re_slash = re.compile(br"[?/:|]+")
  69. def safename(filename):
  70. """Return a filename suitable for the cache.
  71. Strips dangerous and common characters to create a filename we
  72. can use to store the cache in.
  73. """
  74. try:
  75. if isinstance(filename, bytes):
  76. filename_match = filename.decode("utf-8")
  77. else:
  78. filename_match = filename
  79. if re_url_scheme_s.match(filename_match):
  80. if isinstance(filename, bytes):
  81. filename = filename.decode("utf-8")
  82. filename = filename.encode("idna")
  83. else:
  84. filename = filename.encode("idna")
  85. except UnicodeError:
  86. pass
  87. if isinstance(filename, unicode_type):
  88. filename = filename.encode("utf-8")
  89. filemd5 = md5(filename).hexdigest()
  90. filename = re_url_scheme.sub(b"", filename)
  91. filename = re_slash.sub(b",", filename)
  92. # This is the part that we changed. In stock httplib2, the
  93. # filename is trimmed if it's longer than 200 characters, and then
  94. # a comma and a 32-character md5 sum are appended. This causes
  95. # problems on eCryptfs filesystems, where the maximum safe
  96. # filename length is closer to 143 characters.
  97. #
  98. # We take a (user-hackable) maximum filename length from
  99. # RestfulHttp and subtract 33 characters to make room for the comma
  100. # and the md5 sum.
  101. #
  102. # See:
  103. # http://code.google.com/p/httplib2/issues/detail?id=92
  104. # https://bugs.launchpad.net/bugs/344878
  105. # https://bugs.launchpad.net/bugs/545197
  106. maximum_filename_length = RestfulHttp.maximum_cache_filename_length
  107. maximum_length_before_md5_sum = maximum_filename_length - 32 - 1
  108. if len(filename) > maximum_length_before_md5_sum:
  109. filename = filename[:maximum_length_before_md5_sum]
  110. return ",".join((filename.decode("utf-8"), filemd5))
  111. def ssl_certificate_validation_disabled():
  112. """Whether the user has disabled SSL certificate connection.
  113. Some testing servers have broken certificates. Rather than raising an
  114. error, we allow an environment variable,
  115. ``LP_DISABLE_SSL_CERTIFICATE_VALIDATION`` to disable the check.
  116. """
  117. return bool(os.environ.get("LP_DISABLE_SSL_CERTIFICATE_VALIDATION", False))
  118. if os.path.exists("/etc/ssl/certs/ca-certificates.crt"):
  119. SYSTEM_CA_CERTS = "/etc/ssl/certs/ca-certificates.crt"
  120. else:
  121. from httplib2 import CA_CERTS as SYSTEM_CA_CERTS
  122. class RestfulHttp(Http):
  123. """An Http subclass with some custom behavior.
  124. This Http client uses the TE header instead of the Accept-Encoding
  125. header to ask for compressed representations. It also knows how to
  126. react when its cache is a MultipleRepresentationCache.
  127. """
  128. maximum_cache_filename_length = 143
  129. def __init__(
  130. self,
  131. authorizer=None,
  132. cache=None,
  133. timeout=None,
  134. proxy_info=proxy_info_from_environment,
  135. ):
  136. cert_disabled = ssl_certificate_validation_disabled()
  137. super(RestfulHttp, self).__init__(
  138. cache,
  139. timeout,
  140. proxy_info,
  141. disable_ssl_certificate_validation=cert_disabled,
  142. ca_certs=SYSTEM_CA_CERTS,
  143. )
  144. self.authorizer = authorizer
  145. if self.authorizer is not None:
  146. self.authorizer.authorizeSession(self)
  147. def _request(
  148. self,
  149. conn,
  150. host,
  151. absolute_uri,
  152. request_uri,
  153. method,
  154. body,
  155. headers,
  156. redirections,
  157. cachekey,
  158. ):
  159. """Use the authorizer to authorize an outgoing request."""
  160. if "authorization" in headers:
  161. # There's an authorization header left over from a
  162. # previous request that resulted in a redirect. Resources
  163. # protected by OAuth or HTTP Digest must send a distinct
  164. # Authorization header with each request, to prevent
  165. # playback attacks. Remove the Authorization header and
  166. # start again.
  167. del headers["authorization"]
  168. if self.authorizer is not None:
  169. self.authorizer.authorizeRequest(
  170. absolute_uri, method, body, headers
  171. )
  172. return super(RestfulHttp, self)._request(
  173. conn,
  174. host,
  175. absolute_uri,
  176. request_uri,
  177. method,
  178. body,
  179. headers,
  180. redirections,
  181. cachekey,
  182. )
  183. def _getCachedHeader(self, uri, header):
  184. """Retrieve a cached value for an HTTP header."""
  185. if isinstance(self.cache, MultipleRepresentationCache):
  186. return self.cache._getCachedHeader(uri, header)
  187. return None
  188. class AtomicFileCache(object):
  189. """A FileCache that can be shared by multiple processes.
  190. Based on a patch found at
  191. <http://code.google.com/p/httplib2/issues/detail?id=125>.
  192. """
  193. TEMPFILE_PREFIX = ".temp"
  194. def __init__(self, cache, safe=safename):
  195. """Construct an ``AtomicFileCache``.
  196. :param cache: The directory to use as a cache.
  197. :param safe: A function that takes a key and returns a name that's
  198. safe to use as a filename. The key must never return a string
  199. that begins with ``TEMPFILE_PREFIX``. By default uses
  200. ``safename``.
  201. """
  202. self._cache_dir = os.path.normpath(cache)
  203. self._get_safe_name = safe
  204. try:
  205. os.makedirs(self._cache_dir)
  206. except OSError as e:
  207. if e.errno != errno.EEXIST:
  208. raise
  209. def _get_key_path(self, key):
  210. """Return the path on disk where ``key`` is stored."""
  211. safe_key = self._get_safe_name(key)
  212. if safe_key.startswith(self.TEMPFILE_PREFIX):
  213. # If the cache key starts with the tempfile prefix, then it's
  214. # possible that it will clash with a temporary file that we
  215. # create.
  216. raise ValueError(
  217. "Cache key cannot start with '%s'" % self.TEMPFILE_PREFIX
  218. )
  219. return os.path.join(self._cache_dir, safe_key)
  220. def get(self, key):
  221. """Get the value of ``key`` if set.
  222. This behaves slightly differently to ``FileCache`` in that if
  223. ``set()`` fails to store a key, this ``get()`` will behave as if that
  224. key were never set whereas ``FileCache`` returns the empty string.
  225. :param key: The key to retrieve. Must be either bytes or unicode
  226. text.
  227. :return: The value of ``key`` if set, None otherwise.
  228. """
  229. cache_full_path = self._get_key_path(key)
  230. try:
  231. f = open(cache_full_path, "rb")
  232. try:
  233. return f.read()
  234. finally:
  235. f.close()
  236. except (IOError, OSError) as e:
  237. if e.errno != errno.ENOENT:
  238. raise
  239. def set(self, key, value):
  240. """Set ``key`` to ``value``.
  241. :param key: The key to set. Must be either bytes or unicode text.
  242. :param value: The value to set ``key`` to. Must be bytes.
  243. """
  244. # Open a temporary file
  245. handle, path_name = tempfile.mkstemp(
  246. prefix=self.TEMPFILE_PREFIX, dir=self._cache_dir
  247. )
  248. f = os.fdopen(handle, "wb")
  249. f.write(value)
  250. f.close()
  251. cache_full_path = self._get_key_path(key)
  252. # And rename atomically (on POSIX at least)
  253. if sys.platform == "win32" and os.path.exists(cache_full_path):
  254. os.unlink(cache_full_path)
  255. os.rename(path_name, cache_full_path)
  256. def delete(self, key):
  257. """Delete ``key`` from the cache.
  258. If ``key`` has not already been set then has no effect.
  259. :param key: The key to delete. Must be either bytes or unicode text.
  260. """
  261. cache_full_path = self._get_key_path(key)
  262. try:
  263. os.remove(cache_full_path)
  264. except OSError as e:
  265. if e.errno != errno.ENOENT:
  266. raise
  267. class MultipleRepresentationCache(AtomicFileCache):
  268. """A cache that can hold different representations of the same resource.
  269. If a resource has two representations with two media types,
  270. FileCache will only store the most recently fetched
  271. representation. This cache can keep track of multiple
  272. representations of the same resource.
  273. This class works on the assumption that outside calling code sets
  274. an instance's request_media_type attribute to the value of the
  275. 'Accept' header before initiating the request.
  276. This class is very much not thread-safe, but FileCache isn't
  277. thread-safe anyway.
  278. """
  279. def __init__(self, cache):
  280. """Tell FileCache to call append_media_type when generating keys."""
  281. super(MultipleRepresentationCache, self).__init__(
  282. cache, self.append_media_type
  283. )
  284. self.request_media_type = None
  285. def append_media_type(self, key):
  286. """Append the request media type to the cache key.
  287. This ensures that representations of the same resource will be
  288. cached separately, so long as they're served as different
  289. media types.
  290. """
  291. if self.request_media_type is not None:
  292. key = key + "-" + self.request_media_type
  293. return safename(key)
  294. def _getCachedHeader(self, uri, header):
  295. """Retrieve a cached value for an HTTP header."""
  296. (scheme, authority, request_uri, cachekey) = urlnorm(uri)
  297. cached_value = self.get(cachekey)
  298. header_start = header + ":"
  299. if not isinstance(header_start, bytes):
  300. header_start = header_start.encode("utf-8")
  301. if cached_value is not None:
  302. for line in BytesIO(cached_value):
  303. if line.startswith(header_start):
  304. return line[len(header_start) :].strip()
  305. return None
  306. class Browser:
  307. """A class for making calls to lazr.restful web services."""
  308. NOT_MODIFIED = object()
  309. MAX_RETRIES = 6
  310. def __init__(
  311. self,
  312. service_root,
  313. credentials,
  314. cache=None,
  315. timeout=None,
  316. proxy_info=None,
  317. user_agent=None,
  318. max_retries=MAX_RETRIES,
  319. ):
  320. """Initialize, possibly creating a cache.
  321. If no cache is provided, a temporary directory will be used as
  322. a cache. The temporary directory will be automatically removed
  323. when the Python process exits.
  324. """
  325. if cache is None:
  326. cache = tempfile.mkdtemp()
  327. atexit.register(shutil.rmtree, cache)
  328. if isinstance(cache, str_types):
  329. cache = MultipleRepresentationCache(cache)
  330. self._connection = service_root.httpFactory(
  331. credentials, cache, timeout, proxy_info
  332. )
  333. self.user_agent = user_agent
  334. self.max_retries = max_retries
  335. def _request_and_retry(self, url, method, body, headers):
  336. for retry_count in range(0, self.max_retries + 1):
  337. response, content = self._connection.request(
  338. url, method=method, body=body, headers=headers
  339. )
  340. if (
  341. response.status in [502, 503]
  342. and retry_count < self.max_retries
  343. ):
  344. # The server returned a 502 or 503. Sleep for 0, 1, 2,
  345. # 4, 8, 16, ... seconds and try again.
  346. sleep_for = int(2 ** (retry_count - 1))
  347. sleep(sleep_for)
  348. else:
  349. break
  350. # Either the request succeeded or we gave up.
  351. return response, content
  352. def _request(
  353. self,
  354. url,
  355. data=None,
  356. method="GET",
  357. media_type="application/json",
  358. extra_headers=None,
  359. ):
  360. """Create an authenticated request object."""
  361. # If the user is trying to get data that has been redacted,
  362. # give a helpful message.
  363. if url == "tag:launchpad.net:2008:redacted":
  364. raise ValueError(
  365. "You tried to access a resource that you "
  366. "don't have the server-side permission to see."
  367. )
  368. # Add extra headers for the request.
  369. headers = {"Accept": media_type}
  370. if self.user_agent is not None:
  371. headers["User-Agent"] = self.user_agent
  372. if isinstance(self._connection.cache, MultipleRepresentationCache):
  373. self._connection.cache.request_media_type = media_type
  374. if extra_headers is not None:
  375. headers.update(extra_headers)
  376. response, content = self._request_and_retry(
  377. str(url), method=method, body=data, headers=headers
  378. )
  379. if response.status == 304:
  380. # The resource didn't change.
  381. if content == b"":
  382. if (
  383. "If-None-Match" in headers
  384. or "If-Modified-Since" in headers
  385. ):
  386. # The caller made a conditional request, and the
  387. # condition failed. Rather than send an empty
  388. # representation, which might be misinterpreted,
  389. # send a special object that will let the calling code know
  390. # that the resource was not modified.
  391. return response, self.NOT_MODIFIED
  392. else:
  393. # The caller didn't make a conditional request,
  394. # but the response code is 304 and there's no
  395. # content. The only way to handle this is to raise
  396. # an error.
  397. #
  398. # We don't use error_for() here because 304 is not
  399. # normally considered an error condition.
  400. raise HTTPError(response, content)
  401. else:
  402. # XXX leonardr 2010/04/12 bug=httplib2#97
  403. #
  404. # Why is this check here? Why would there ever be any
  405. # content when the response code is 304? It's because of
  406. # an httplib2 bug that sometimes sets a 304 response
  407. # code when caching retrieved documents. When the
  408. # cached document is retrieved, we get a 304 response
  409. # code and a full representation.
  410. #
  411. # Since the cache lookup succeeded, the 'real'
  412. # response code is 200. This code undoes the bad
  413. # behavior in httplib2.
  414. response.status = 200
  415. return response, content
  416. # Turn non-2xx responses into appropriate HTTPError subclasses.
  417. error = error_for(response, content)
  418. if error is not None:
  419. raise error
  420. return response, content
  421. def get(self, resource_or_uri, headers=None, return_response=False):
  422. """GET a representation of the given resource or URI."""
  423. if isinstance(resource_or_uri, (str_types, URI)):
  424. url = resource_or_uri
  425. else:
  426. method = resource_or_uri.get_method("get")
  427. url = method.build_request_url()
  428. response, content = self._request(url, extra_headers=headers)
  429. if return_response:
  430. return (response, content)
  431. return content
  432. def get_wadl_application(self, url):
  433. """GET a WADL representation of the resource at the requested url."""
  434. wadl_type = "application/vnd.sun.wadl+xml"
  435. response, content = self._request(url, media_type=wadl_type)
  436. url = str(url)
  437. if not isinstance(content, bytes):
  438. content = content.encode("utf-8")
  439. return Application(url, content)
  440. def post(self, url, method_name, **kws):
  441. """POST a request to the web service."""
  442. kws["ws.op"] = method_name
  443. data = urlencode(kws)
  444. return self._request(url, data, "POST")
  445. def put(self, url, representation, media_type, headers=None):
  446. """PUT the given representation to the URL."""
  447. extra_headers = {"Content-Type": media_type}
  448. if headers is not None:
  449. extra_headers.update(headers)
  450. return self._request(
  451. url, representation, "PUT", extra_headers=extra_headers
  452. )
  453. def delete(self, url):
  454. """DELETE the resource at the given URL."""
  455. self._request(url, method="DELETE")
  456. return None
  457. def patch(self, url, representation, headers=None):
  458. """PATCH the object at url with the updated representation."""
  459. extra_headers = {"Content-Type": "application/json"}
  460. if headers is not None:
  461. extra_headers.update(headers)
  462. # httplib2 doesn't know about the PATCH method, so we need to
  463. # do some work ourselves. Pull any cached value of "ETag" out
  464. # and use it as the value for "If-Match".
  465. cached_etag = self._connection._getCachedHeader(str(url), "etag")
  466. if cached_etag is not None and not self._connection.ignore_etag:
  467. # http://www.w3.org/1999/04/Editing/
  468. headers["If-Match"] = cached_etag
  469. return self._request(
  470. url,
  471. dumps(representation, cls=DatetimeJSONEncoder),
  472. "PATCH",
  473. extra_headers=extra_headers,
  474. )