123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546 |
- # Copyright 2008,2012 Canonical Ltd.
- # This file is part of lazr.restfulclient.
- #
- # lazr.restfulclient is free software: you can redistribute it and/or modify
- # it under the terms of the GNU Lesser General Public License as
- # published by the Free Software Foundation, either version 3 of the
- # License, or (at your option) any later version.
- #
- # lazr.restfulclient is distributed in the hope that it will be useful, but
- # WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- # Lesser General Public License for more details.
- #
- # You should have received a copy of the GNU Lesser General Public
- # License along with lazr.restfulclient. If not, see
- # <http://www.gnu.org/licenses/>.
- """Browser object to make requests of lazr.restful web services.
- The `Browser` class does some massage of HTTP requests and responses,
- and handles custom caches. It is not part of the public
- lazr.restfulclient API. (But maybe it should be?)
- """
- __metaclass__ = type
- __all__ = [
- "Browser",
- "RestfulHttp",
- "ssl_certificate_validation_disabled",
- ]
- import atexit
- import errno
- import os
- import re
- import shutil
- import sys
- import tempfile
- from hashlib import md5
- from io import BytesIO
- from json import dumps
- # Import sleep directly into the module so we can monkey-patch it
- # during a test.
- from time import sleep
- from httplib2 import Http, urlnorm
- try:
- from httplib2 import proxy_info_from_environment
- except ImportError:
- from httplib2 import ProxyInfo
- proxy_info_from_environment = ProxyInfo.from_environment
- try:
- # Python 3.
- from urllib.parse import urlencode
- except ImportError:
- from urllib import urlencode
- from wadllib.application import Application
- from lazr.restfulclient._json import DatetimeJSONEncoder
- from lazr.restfulclient.errors import HTTPError, error_for
- from lazr.uri import URI
- if bytes is str:
- # Python 2
- unicode_type = unicode # noqa: F821
- str_types = basestring # noqa: F821
- else:
- unicode_type = str
- str_types = str
- # A drop-in replacement for httplib2's safename. Substantially borrowed
- # from httplib2, but its cache name format changed in 0.12.0 and we want to
- # stick with the previous version.
- re_url_scheme = re.compile(br"^\w+://")
- re_url_scheme_s = re.compile(r"^\w+://")
- re_slash = re.compile(br"[?/:|]+")
- def safename(filename):
- """Return a filename suitable for the cache.
- Strips dangerous and common characters to create a filename we
- can use to store the cache in.
- """
- try:
- if isinstance(filename, bytes):
- filename_match = filename.decode("utf-8")
- else:
- filename_match = filename
- if re_url_scheme_s.match(filename_match):
- if isinstance(filename, bytes):
- filename = filename.decode("utf-8")
- filename = filename.encode("idna")
- else:
- filename = filename.encode("idna")
- except UnicodeError:
- pass
- if isinstance(filename, unicode_type):
- filename = filename.encode("utf-8")
- filemd5 = md5(filename).hexdigest()
- filename = re_url_scheme.sub(b"", filename)
- filename = re_slash.sub(b",", filename)
- # This is the part that we changed. In stock httplib2, the
- # filename is trimmed if it's longer than 200 characters, and then
- # a comma and a 32-character md5 sum are appended. This causes
- # problems on eCryptfs filesystems, where the maximum safe
- # filename length is closer to 143 characters.
- #
- # We take a (user-hackable) maximum filename length from
- # RestfulHttp and subtract 33 characters to make room for the comma
- # and the md5 sum.
- #
- # See:
- # http://code.google.com/p/httplib2/issues/detail?id=92
- # https://bugs.launchpad.net/bugs/344878
- # https://bugs.launchpad.net/bugs/545197
- maximum_filename_length = RestfulHttp.maximum_cache_filename_length
- maximum_length_before_md5_sum = maximum_filename_length - 32 - 1
- if len(filename) > maximum_length_before_md5_sum:
- filename = filename[:maximum_length_before_md5_sum]
- return ",".join((filename.decode("utf-8"), filemd5))
- def ssl_certificate_validation_disabled():
- """Whether the user has disabled SSL certificate connection.
- Some testing servers have broken certificates. Rather than raising an
- error, we allow an environment variable,
- ``LP_DISABLE_SSL_CERTIFICATE_VALIDATION`` to disable the check.
- """
- return bool(os.environ.get("LP_DISABLE_SSL_CERTIFICATE_VALIDATION", False))
- if os.path.exists("/etc/ssl/certs/ca-certificates.crt"):
- SYSTEM_CA_CERTS = "/etc/ssl/certs/ca-certificates.crt"
- else:
- from httplib2 import CA_CERTS as SYSTEM_CA_CERTS
- class RestfulHttp(Http):
- """An Http subclass with some custom behavior.
- This Http client uses the TE header instead of the Accept-Encoding
- header to ask for compressed representations. It also knows how to
- react when its cache is a MultipleRepresentationCache.
- """
- maximum_cache_filename_length = 143
- def __init__(
- self,
- authorizer=None,
- cache=None,
- timeout=None,
- proxy_info=proxy_info_from_environment,
- ):
- cert_disabled = ssl_certificate_validation_disabled()
- super(RestfulHttp, self).__init__(
- cache,
- timeout,
- proxy_info,
- disable_ssl_certificate_validation=cert_disabled,
- ca_certs=SYSTEM_CA_CERTS,
- )
- self.authorizer = authorizer
- if self.authorizer is not None:
- self.authorizer.authorizeSession(self)
- def _request(
- self,
- conn,
- host,
- absolute_uri,
- request_uri,
- method,
- body,
- headers,
- redirections,
- cachekey,
- ):
- """Use the authorizer to authorize an outgoing request."""
- if "authorization" in headers:
- # There's an authorization header left over from a
- # previous request that resulted in a redirect. Resources
- # protected by OAuth or HTTP Digest must send a distinct
- # Authorization header with each request, to prevent
- # playback attacks. Remove the Authorization header and
- # start again.
- del headers["authorization"]
- if self.authorizer is not None:
- self.authorizer.authorizeRequest(
- absolute_uri, method, body, headers
- )
- return super(RestfulHttp, self)._request(
- conn,
- host,
- absolute_uri,
- request_uri,
- method,
- body,
- headers,
- redirections,
- cachekey,
- )
- def _getCachedHeader(self, uri, header):
- """Retrieve a cached value for an HTTP header."""
- if isinstance(self.cache, MultipleRepresentationCache):
- return self.cache._getCachedHeader(uri, header)
- return None
- class AtomicFileCache(object):
- """A FileCache that can be shared by multiple processes.
- Based on a patch found at
- <http://code.google.com/p/httplib2/issues/detail?id=125>.
- """
- TEMPFILE_PREFIX = ".temp"
- def __init__(self, cache, safe=safename):
- """Construct an ``AtomicFileCache``.
- :param cache: The directory to use as a cache.
- :param safe: A function that takes a key and returns a name that's
- safe to use as a filename. The key must never return a string
- that begins with ``TEMPFILE_PREFIX``. By default uses
- ``safename``.
- """
- self._cache_dir = os.path.normpath(cache)
- self._get_safe_name = safe
- try:
- os.makedirs(self._cache_dir)
- except OSError as e:
- if e.errno != errno.EEXIST:
- raise
- def _get_key_path(self, key):
- """Return the path on disk where ``key`` is stored."""
- safe_key = self._get_safe_name(key)
- if safe_key.startswith(self.TEMPFILE_PREFIX):
- # If the cache key starts with the tempfile prefix, then it's
- # possible that it will clash with a temporary file that we
- # create.
- raise ValueError(
- "Cache key cannot start with '%s'" % self.TEMPFILE_PREFIX
- )
- return os.path.join(self._cache_dir, safe_key)
- def get(self, key):
- """Get the value of ``key`` if set.
- This behaves slightly differently to ``FileCache`` in that if
- ``set()`` fails to store a key, this ``get()`` will behave as if that
- key were never set whereas ``FileCache`` returns the empty string.
- :param key: The key to retrieve. Must be either bytes or unicode
- text.
- :return: The value of ``key`` if set, None otherwise.
- """
- cache_full_path = self._get_key_path(key)
- try:
- f = open(cache_full_path, "rb")
- try:
- return f.read()
- finally:
- f.close()
- except (IOError, OSError) as e:
- if e.errno != errno.ENOENT:
- raise
- def set(self, key, value):
- """Set ``key`` to ``value``.
- :param key: The key to set. Must be either bytes or unicode text.
- :param value: The value to set ``key`` to. Must be bytes.
- """
- # Open a temporary file
- handle, path_name = tempfile.mkstemp(
- prefix=self.TEMPFILE_PREFIX, dir=self._cache_dir
- )
- f = os.fdopen(handle, "wb")
- f.write(value)
- f.close()
- cache_full_path = self._get_key_path(key)
- # And rename atomically (on POSIX at least)
- if sys.platform == "win32" and os.path.exists(cache_full_path):
- os.unlink(cache_full_path)
- os.rename(path_name, cache_full_path)
- def delete(self, key):
- """Delete ``key`` from the cache.
- If ``key`` has not already been set then has no effect.
- :param key: The key to delete. Must be either bytes or unicode text.
- """
- cache_full_path = self._get_key_path(key)
- try:
- os.remove(cache_full_path)
- except OSError as e:
- if e.errno != errno.ENOENT:
- raise
- class MultipleRepresentationCache(AtomicFileCache):
- """A cache that can hold different representations of the same resource.
- If a resource has two representations with two media types,
- FileCache will only store the most recently fetched
- representation. This cache can keep track of multiple
- representations of the same resource.
- This class works on the assumption that outside calling code sets
- an instance's request_media_type attribute to the value of the
- 'Accept' header before initiating the request.
- This class is very much not thread-safe, but FileCache isn't
- thread-safe anyway.
- """
- def __init__(self, cache):
- """Tell FileCache to call append_media_type when generating keys."""
- super(MultipleRepresentationCache, self).__init__(
- cache, self.append_media_type
- )
- self.request_media_type = None
- def append_media_type(self, key):
- """Append the request media type to the cache key.
- This ensures that representations of the same resource will be
- cached separately, so long as they're served as different
- media types.
- """
- if self.request_media_type is not None:
- key = key + "-" + self.request_media_type
- return safename(key)
- def _getCachedHeader(self, uri, header):
- """Retrieve a cached value for an HTTP header."""
- (scheme, authority, request_uri, cachekey) = urlnorm(uri)
- cached_value = self.get(cachekey)
- header_start = header + ":"
- if not isinstance(header_start, bytes):
- header_start = header_start.encode("utf-8")
- if cached_value is not None:
- for line in BytesIO(cached_value):
- if line.startswith(header_start):
- return line[len(header_start) :].strip()
- return None
- class Browser:
- """A class for making calls to lazr.restful web services."""
- NOT_MODIFIED = object()
- MAX_RETRIES = 6
- def __init__(
- self,
- service_root,
- credentials,
- cache=None,
- timeout=None,
- proxy_info=None,
- user_agent=None,
- max_retries=MAX_RETRIES,
- ):
- """Initialize, possibly creating a cache.
- If no cache is provided, a temporary directory will be used as
- a cache. The temporary directory will be automatically removed
- when the Python process exits.
- """
- if cache is None:
- cache = tempfile.mkdtemp()
- atexit.register(shutil.rmtree, cache)
- if isinstance(cache, str_types):
- cache = MultipleRepresentationCache(cache)
- self._connection = service_root.httpFactory(
- credentials, cache, timeout, proxy_info
- )
- self.user_agent = user_agent
- self.max_retries = max_retries
- def _request_and_retry(self, url, method, body, headers):
- for retry_count in range(0, self.max_retries + 1):
- response, content = self._connection.request(
- url, method=method, body=body, headers=headers
- )
- if (
- response.status in [502, 503]
- and retry_count < self.max_retries
- ):
- # The server returned a 502 or 503. Sleep for 0, 1, 2,
- # 4, 8, 16, ... seconds and try again.
- sleep_for = int(2 ** (retry_count - 1))
- sleep(sleep_for)
- else:
- break
- # Either the request succeeded or we gave up.
- return response, content
- def _request(
- self,
- url,
- data=None,
- method="GET",
- media_type="application/json",
- extra_headers=None,
- ):
- """Create an authenticated request object."""
- # If the user is trying to get data that has been redacted,
- # give a helpful message.
- if url == "tag:launchpad.net:2008:redacted":
- raise ValueError(
- "You tried to access a resource that you "
- "don't have the server-side permission to see."
- )
- # Add extra headers for the request.
- headers = {"Accept": media_type}
- if self.user_agent is not None:
- headers["User-Agent"] = self.user_agent
- if isinstance(self._connection.cache, MultipleRepresentationCache):
- self._connection.cache.request_media_type = media_type
- if extra_headers is not None:
- headers.update(extra_headers)
- response, content = self._request_and_retry(
- str(url), method=method, body=data, headers=headers
- )
- if response.status == 304:
- # The resource didn't change.
- if content == b"":
- if (
- "If-None-Match" in headers
- or "If-Modified-Since" in headers
- ):
- # The caller made a conditional request, and the
- # condition failed. Rather than send an empty
- # representation, which might be misinterpreted,
- # send a special object that will let the calling code know
- # that the resource was not modified.
- return response, self.NOT_MODIFIED
- else:
- # The caller didn't make a conditional request,
- # but the response code is 304 and there's no
- # content. The only way to handle this is to raise
- # an error.
- #
- # We don't use error_for() here because 304 is not
- # normally considered an error condition.
- raise HTTPError(response, content)
- else:
- # XXX leonardr 2010/04/12 bug=httplib2#97
- #
- # Why is this check here? Why would there ever be any
- # content when the response code is 304? It's because of
- # an httplib2 bug that sometimes sets a 304 response
- # code when caching retrieved documents. When the
- # cached document is retrieved, we get a 304 response
- # code and a full representation.
- #
- # Since the cache lookup succeeded, the 'real'
- # response code is 200. This code undoes the bad
- # behavior in httplib2.
- response.status = 200
- return response, content
- # Turn non-2xx responses into appropriate HTTPError subclasses.
- error = error_for(response, content)
- if error is not None:
- raise error
- return response, content
- def get(self, resource_or_uri, headers=None, return_response=False):
- """GET a representation of the given resource or URI."""
- if isinstance(resource_or_uri, (str_types, URI)):
- url = resource_or_uri
- else:
- method = resource_or_uri.get_method("get")
- url = method.build_request_url()
- response, content = self._request(url, extra_headers=headers)
- if return_response:
- return (response, content)
- return content
- def get_wadl_application(self, url):
- """GET a WADL representation of the resource at the requested url."""
- wadl_type = "application/vnd.sun.wadl+xml"
- response, content = self._request(url, media_type=wadl_type)
- url = str(url)
- if not isinstance(content, bytes):
- content = content.encode("utf-8")
- return Application(url, content)
- def post(self, url, method_name, **kws):
- """POST a request to the web service."""
- kws["ws.op"] = method_name
- data = urlencode(kws)
- return self._request(url, data, "POST")
- def put(self, url, representation, media_type, headers=None):
- """PUT the given representation to the URL."""
- extra_headers = {"Content-Type": media_type}
- if headers is not None:
- extra_headers.update(headers)
- return self._request(
- url, representation, "PUT", extra_headers=extra_headers
- )
- def delete(self, url):
- """DELETE the resource at the given URL."""
- self._request(url, method="DELETE")
- return None
- def patch(self, url, representation, headers=None):
- """PATCH the object at url with the updated representation."""
- extra_headers = {"Content-Type": "application/json"}
- if headers is not None:
- extra_headers.update(headers)
- # httplib2 doesn't know about the PATCH method, so we need to
- # do some work ourselves. Pull any cached value of "ETag" out
- # and use it as the value for "If-Match".
- cached_etag = self._connection._getCachedHeader(str(url), "etag")
- if cached_etag is not None and not self._connection.ignore_etag:
- # http://www.w3.org/1999/04/Editing/
- headers["If-Match"] = cached_etag
- return self._request(
- url,
- dumps(representation, cls=DatetimeJSONEncoder),
- "PATCH",
- extra_headers=extra_headers,
- )
|