#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# The MIT License (MIT)
#
# Copyright (c) 2017 William Forde
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of
# this software and associated documentation files (the "Software"), to deal in
# the Software without restriction, including without limitation the rights to
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
# the Software, and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
Urlquick
--------
A light-weight http client with requests like interface. Featuring persistent connections and caching support.
This project was originally created for use by Kodi add-ons, but has grown into something more.
I found, that while requests has a very nice interface, there was a noticeable lag when importing the library.
The other option available is to use urllib2, but then you loose the benefit of persistent connections that requests
have. Hence the reason for this project.
All GET, HEAD and POST requests are cached locally for a period of 4 hours. When the cache expires,
conditional headers are added to a new request e.g. "Etag" and "Last-modified". Then if the server
returns a 304 Not-Modified response, the cache is reused, saving having to re-download the content body.
Inspired by: urlfetch & requests
urlfetch: https://github.com/ifduyue/urlfetch
requests: http://docs.python-requests.org/en/master/
Github: https://github.com/willforde/urlquick
Documentation: http://urlquick.readthedocs.io/en/stable/?badge=stable
Testing: https://travis-ci.org/willforde/urlquick
Code Coverage: https://coveralls.io/github/willforde/urlquick?branch=master
Code Quality: https://app.codacy.com/app/willforde/urlquick/dashboard
"""
__all__ = ["request", "get", "head", "post", "put", "patch", "delete", "cache_cleanup", "Session"]
__version__ = "0.9.4"
# Standard library imports
from codecs import open as _open, getencoder
from base64 import b64encode, b64decode
from collections import defaultdict
from datetime import datetime
import json as _json
import logging
import hashlib
import socket
import time
import zlib
import ssl
import sys
import re
import os
# Check python version to set the object that can detect non unicode strings
py3 = sys.version_info >= (3, 0)
if py3:
# noinspection PyUnresolvedReferences, PyCompatibility
from http.client import HTTPConnection, HTTPSConnection, HTTPException
# noinspection PyUnresolvedReferences, PyCompatibility
from urllib.parse import urlsplit, urlunsplit, urljoin, SplitResult, urlencode, parse_qsl, quote, unquote
# noinspection PyUnresolvedReferences, PyCompatibility
from http.cookies import SimpleCookie
# noinspection PyUnresolvedReferences, PyCompatibility
from collections.abc import MutableMapping
# Under kodi this constant is set to the addon data directory
# code for whitch is at the bottom of this file
CACHE_LOCATION = os.getcwd()
# noinspection PyShadowingBuiltins
unicode = str
else:
# noinspection PyUnresolvedReferences, PyCompatibility
from httplib import HTTPConnection, HTTPSConnection, HTTPException
# noinspection PyUnresolvedReferences, PyCompatibility
from urlparse import urlsplit, urlunsplit, urljoin, SplitResult, parse_qsl as _parse_qsl
# noinspection PyUnresolvedReferences, PyCompatibility
from urllib import urlencode as _urlencode, quote as _quote, unquote as _unquote
# noinspection PyUnresolvedReferences, PyCompatibility
from Cookie import SimpleCookie
# noinspection PyUnresolvedReferences, PyCompatibility
from collections import MutableMapping
# Under kodi this constant is set to the addon data directory
# code for whitch is at the bottom of this file
CACHE_LOCATION = os.getcwdu()
def quote(data, safe=b"/", encoding="utf8", errors="strict"):
data = data.encode(encoding, errors)
return _quote(data, safe).decode("ascii")
def unquote(data, encoding="utf-8", errors="replace"):
data = data.encode("ascii", errors)
return _unquote(data).decode(encoding, errors)
def parse_qsl(qs, encoding="utf8", errors="replace", **kwargs):
qs = qs.encode(encoding, errors)
qsl = _parse_qsl(qs, **kwargs)
return [(k.decode(encoding, errors), v.decode(encoding, errors)) for k, v in qsl] # pragma: no branch
def urlencode(query, doseq=False, encoding="utf8", errors=""):
# Fetch items as a tuple of (key, value)
items = query.items() if hasattr(query, "items") else query
new_query = []
# Process the items and encode unicode strings
for key, value in items:
key = key.encode(encoding, errors)
if isinstance(value, (list, tuple)):
value = [_value.encode(encoding, errors) for _value in value] # pragma: no branch
else:
value = value.encode(encoding, errors)
new_query.append((key, value))
# Decode the output of urlencode back into unicode and return
return _urlencode(new_query, doseq).decode("ascii")
# Cacheable request types
CACHEABLE_METHODS = (u"GET", u"HEAD", u"POST")
CACHEABLE_CODES = (200, 203, 204, 300, 301, 302, 303, 307, 308, 410, 414)
REDIRECT_CODES = (301, 302, 303, 307, 308)
#: The default max age of the cache in seconds is used when no max age is given in request.
MAX_AGE = 14400 # 4 Hours
# Unique logger for this module
logger = logging.getLogger("urlquick")
[docs]class UrlError(IOError):
"""Base exception. All exceptions and errors will subclass from this."""
[docs]class Timeout(UrlError):
"""Request timed out."""
[docs]class MaxRedirects(UrlError):
"""Too many redirects."""
[docs]class ContentError(UrlError):
"""Failed to decode content."""
[docs]class ConnError(UrlError):
"""A Connection error occurred."""
[docs]class SSLError(ConnError):
"""An SSL error occurred."""
[docs]class HTTPError(UrlError):
"""Raised when HTTP error occurs."""
def __init__(self, url, code, msg, hdrs):
self.code = code
self.msg = msg
self.hdrs = hdrs
self.filename = url
def __str__(self):
error_type = "Client" if self.code < 500 else "Server"
return "HTTP {} Error {}: {}".format(error_type, self.code, self.msg)
class MissingDependency(ImportError):
"""Missing optional Dependency 'HTMLement'"""
class CaseInsensitiveDict(MutableMapping):
"""
A case-insensitive `dict` like object.
Credit goes to requests for this code
http://docs.python-requests.org/en/master/
"""
def __init__(self, *args):
self._store = {}
for _dict in args:
if _dict:
self.update(_dict)
def __repr__(self):
return str(dict(self.items()))
def __setitem__(self, key, value):
if value is not None:
key = make_unicode(key, "ascii")
value = make_unicode(value, "iso-8859-1")
self._store[key.lower()] = (key, value)
def __getitem__(self, key):
return self._store[key.lower()][1]
def __delitem__(self, key):
del self._store[key.lower()]
def __iter__(self):
return (casedkey for casedkey, _ in self._store.values())
def __len__(self):
return len(self._store)
def copy(self):
"""Return a shallow copy of the case-insensitive dictionary."""
return CaseInsensitiveDict(self._store.values())
class CachedProperty(object):
"""
Cached property.
A property that is only computed once per instance and then replaces
itself with an ordinary attribute. Deleting the attribute resets the
property.
"""
def __init__(self, fget=None):
self.__get = fget
self.__doc__ = fget.__doc__
self.__name__ = fget.__name__
self.__module__ = fget.__module__
self.allow_setter = False
def __get__(self, instance, owner):
try:
return instance.__dict__[self.__name__]
except KeyError:
value = instance.__dict__[self.__name__] = self.__get(instance)
return value
def __set__(self, instance, value):
if self.allow_setter:
instance.__dict__[self.__name__] = value
else:
raise AttributeError("Can't set attribute")
def __delete__(self, instance):
instance.__dict__.pop(self.__name__, None)
class CacheHandler(object):
def __init__(self, uid, max_age=MAX_AGE):
self.max_age = max_age
self.response = None
# Filepath to cache file
cache_dir = self.cache_dir()
self.cache_file = cache_file = os.path.join(cache_dir, uid)
if os.path.exists(cache_file):
self.response = self._load()
if self.response is None:
self.delete(cache_file)
@classmethod
def cache_dir(cls):
"""Returns the cache directory."""
cache_dir = cls.safe_path(os.path.join(CACHE_LOCATION, u".cache"))
if not os.path.exists(cache_dir):
os.makedirs(cache_dir)
return cache_dir
@staticmethod
def delete(cache_path):
"""Delete cache from disk."""
try:
os.remove(cache_path)
except EnvironmentError:
logger.error("Faild to remove cache: %s", cache_path)
else:
logger.debug("Removed cache: %s", cache_path)
@staticmethod
def isfilefresh(cache_path, max_age):
return (time.time() - os.stat(cache_path).st_mtime) < max_age
def isfresh(self):
"""Return True if cache is fresh else False."""
# Check that the response is of status 301 or that the cache is not older than the max age
if self.response.status in (301, 308, 414):
return True
elif self.max_age == 0:
return False
else:
return self.isfilefresh(self.cache_file, self.max_age)
def reset_timestamp(self):
"""Reset the last modified timestamp to current time."""
os.utime(self.cache_file, None)
def add_conditional_headers(self, headers):
"""Return a dict of conditional headers from cache."""
# Fetch cached headers
cached_headers = self.response.headers
# Check for conditional headers
if u"Etag" in cached_headers:
logger.debug("Found conditional header: ETag = %s", cached_headers[u"ETag"])
headers[u"If-none-match"] = cached_headers[u"ETag"]
if u"Last-modified" in cached_headers:
logger.debug("Found conditional header: Last-Modified = %s", cached_headers[u"Last-modified"])
headers[u"If-modified-since"] = cached_headers[u"Last-Modified"]
def update(self, headers, body, status, reason, version=11, strict=True):
# Convert headers into a Case Insensitive Dict
headers = CaseInsensitiveDict(headers)
# Remove Transfer-Encoding from header if exists
if u"Transfer-Encoding" in headers:
del headers[u"Transfer-Encoding"]
# Ensure that reason is unicode
# noinspection PyArgumentList
reason = unicode(reason)
# Create response data structure
self.response = CacheResponse(headers, body, status, reason, version, strict)
# Save response to disk
self._save(headers=dict(headers), body=body, status=status, reason=reason, version=version, strict=strict)
def _load(self):
"""Load the cache response that is stored on disk."""
try:
# Atempt to read the raw cache data
with _open(self.cache_file, "rb", encoding="utf8") as stream:
json_data = _json.load(stream)
except (IOError, OSError):
logger.exception("Cache Error: Failed to read cached response.")
return None
except TypeError:
logger.exception("Cache Error: Failed to deserialize cached response.")
return None
# Decode body content using base64
json_data[u"body"] = b64decode(json_data[u"body"].encode("ascii"))
json_data[u"headers"] = CaseInsensitiveDict(json_data[u"headers"])
return CacheResponse(**json_data)
def _save(self, **response):
# Base64 encode the body to make it json serializable
response[u"body"] = b64encode(response["body"]).decode("ascii")
try:
# Save the response to disk using json Serialization
with _open(self.cache_file, "wb", encoding="utf8") as stream:
_json.dump(response, stream, indent=4, separators=(",", ":"))
except (IOError, OSError):
logger.exception("Cache Error: Failed to write response to cache.")
self.delete(self.cache_file)
except TypeError:
logger.exception("Cache Error: Failed to serialize response.")
self.delete(self.cache_file)
@staticmethod
def safe_path(path):
"""
Convert path into a encoding that best suits the platform os.
Unicode when on windows and utf8 when on linux/bsd.
:type path: str
:param path: The path to convert.
:return: Returns the path as unicode or utf8 encoded str.
"""
# Notting needs to be down if on windows as windows works well with unicode already
# We only want to convert to bytes when we are on linux.
if not sys.platform.startswith("win"):
path = path.encode("utf8")
return path
@classmethod
def hash_url(cls, url, data=None):
"""Return url as a sha1 encoded hash."""
# Make sure that url is of type bites
if isinstance(url, unicode):
url = url.encode("utf8")
if data:
# Make sure that data is of type bites
if isinstance(data, unicode):
data = data.encode("utf8")
url += data
# Convert hashed url to unicode
urlhash = hashlib.sha1(url).hexdigest()
if isinstance(urlhash, bytes):
urlhash = unicode(urlhash)
# Append urlhash to the filename
return cls.safe_path(u"cache-{}".format(urlhash))
@classmethod
def from_url(cls, url, data=None, max_age=MAX_AGE):
"""Initialize CacheHandler with url instead of uid."""
uid = cls.hash_url(url, data)
return cls(uid, max_age)
def __bool__(self):
return self.response is not None
def __nonzero__(self):
return self.response is not None
class CacheAdapter(object):
def __init__(self):
self.__cache = None
def cache_check(self, method, url, data, headers, max_age=None):
# Fetch max age from request header
max_age = max_age if max_age is not None else int(headers.pop(u"x-max-age", MAX_AGE))
if method == u"OPTIONS":
return None
# Check if cache exists first
self.__cache = cache = CacheHandler.from_url(url, data, max_age)
if cache:
if method in ("PUT", "DELETE"):
logger.debug("Cache purged, %s request invalidates cache", method)
cache.delete(cache.cache_file)
elif cache.isfresh():
logger.debug("Cache is fresh, returning cached response")
return cache.response
else:
logger.debug("Cache is stale, checking for conditional headers")
cache.add_conditional_headers(headers)
def handle_response(self, method, status, callback):
if status == 304:
logger.debug("Server return 304 Not Modified response, using cached response")
callback()
self.__cache.reset_timestamp()
return self.__cache.response
# Cache any cachable response
elif status in CACHEABLE_CODES and method.upper() in CACHEABLE_METHODS:
response = callback()
logger.debug("Caching %s %s response", status, response[3])
# Save response to cache and return the cached response
self.__cache.update(*response)
return self.__cache.response
class CacheResponse(object):
"""A mock HTTPResponse class"""
def __init__(self, headers, body, status, reason, version=11, strict=True):
self.headers = headers
self.status = status
self.reason = reason
self.version = version
self.strict = strict
self.body = body
def getheaders(self):
"""Return the response headers"""
return self.headers
def read(self):
"""Return the body of the response"""
return self.body
def close(self):
pass
class ConnectionManager(CacheAdapter):
def __init__(self):
self.request_handler = {"http": {}, "https": {}}
super(ConnectionManager, self).__init__()
def make_request(self, req, timeout, verify, max_age):
# Only check cache if max_age set to a valid value
if max_age >= 0:
cached_resp = self.cache_check(req.method, req.url, req.data, req.headers, max_age=max_age)
if cached_resp:
return cached_resp
def callback():
return resp.getheaders(), resp.read(), resp.status, resp.reason
# Request resource and cache it if possible
resp = self.connect(req, timeout, verify)
cached_resp = self.handle_response(req.method, resp.status, callback)
if cached_resp:
return cached_resp
else:
return resp
# Default to un-cached response
return self.connect(req, timeout, verify)
def connect(self, req, timeout, verify):
# Fetch connection from pool and attempt to reuse if available
pool = self.request_handler[req.type]
if req.host in pool:
try:
# noinspection PyTypeChecker
return self.send_request(pool[req.host], req)
except Exception as e:
# Remove the connection from the pool as it's unusable
pool[req.host].close()
del pool[req.host]
# Raise the exception if it's not a subclass of UrlError
if not isinstance(e, UrlError):
raise
# Create a new connection
if req.type == "https":
# noinspection PyProtectedMember
context = ssl._create_unverified_context() if verify is False else None
conn = HTTPSConnection(req.host, timeout=timeout, context=context)
else:
conn = HTTPConnection(req.host, timeout=timeout)
# Make first connection to server
response = self.send_request(conn, req)
# Add connection to the pool if the response is not set to close
if not response.will_close:
pool[req.host] = conn
return response
@staticmethod
def send_request(conn, req):
try:
# Setup request
conn.putrequest(str(req.method), str(req.selector), skip_host=1, skip_accept_encoding=1)
# Add all headers to request
for hdr, value in req.header_items():
conn.putheader(hdr, value)
# Send the body of the request witch will initiate the connection
conn.endheaders(req.data)
return conn.getresponse()
except socket.timeout as e:
raise Timeout(e)
except ssl.SSLError as e:
raise SSLError(e)
except (socket.error, HTTPException) as e:
raise ConnError(e)
def close(self):
"""Close all persistent connections and remove."""
for pool in self.request_handler.values():
for key in list(pool.keys()):
conn = pool.pop(key)
conn.close()
[docs]class Request(object):
"""A Request Object"""
def __init__(self, method, url, headers, data=None, json=None, params=None, referer=None):
#: Tuple of (username, password) for basic authentication.
self.auth = None
# Convert url into a fully ascii unicode string using urlencoding
self._referer_url = referer
self._urlparts = urlparts = self._parse_url(url, params)
# Ensure that method is always unicode
if isinstance(method, bytes):
method = method.decode("ascii")
#: The URI scheme.
self.type = urlparts.scheme
#: The HTTP request method to use.
self.method = method.upper()
#: Dictionary of HTTP headers.
self.headers = headers = headers.copy()
#: Urlencoded url of the remote resource.
self.url = urlunsplit((urlparts.scheme, urlparts.netloc, urlparts.path, urlparts.query, urlparts.fragment))
#: The URI authority, typically a host, but may also contain a port separated by a colon.
self.host = urlparts.netloc.lower()
# Add Referer header if not the original request
if referer:
self.headers[u"Referer"] = referer
# Add host header to be compliant with HTTP/1.1
if u"Host" not in headers:
self.headers[u"Host"] = self._urlparts.hostname
# Construct post data from a json object
if json:
self.headers[u"Content-Type"] = u"application/json"
data = _json.dumps(json)
if data:
# Convert data into a urlencode string if data is a dict
if isinstance(data, dict):
self.headers[u"Content-Type"] = u"application/x-www-form-urlencoded"
data = urlencode(data, True).encode("utf8")
elif isinstance(data, unicode):
data = data.encode("utf8")
if u"Content-Length" not in headers:
# noinspection PyArgumentList
self.headers[u"Content-Length"] = unicode(len(data))
#: Request body, to send to the server.
self.data = data
def _parse_url(self, url, params=None, scheme=u"http"):
"""
Parse a URL into it's individual components.
:param str url: Url to parse
:param dict params: params to add to url as query
:return: A 5-tuple of URL components
:rtype: urllib.parse.SplitResult
"""
# Make sure we have unicode
if isinstance(url, bytes):
url = url.decode("utf8")
# Check for valid url structure
if not url[:4] == u"http":
if self._referer_url:
url = urljoin(self._referer_url, url, allow_fragments=False)
elif url[:3] == u"://":
url = url[1:]
# Parse the url into each element
scheme, netloc, path, query, _ = urlsplit(url.replace(u" ", u"%20"), scheme=scheme)
if scheme not in ("http", "https"):
raise ValueError("Unsupported scheme: {}".format(scheme))
# Insure that all element of the url can be encoded into ascii
self.auth, netloc = self._ascii_netloc(netloc)
path = self._ascii_path(path) if path else u"/"
query = self._ascii_query(query, params)
# noinspection PyArgumentList
return SplitResult(scheme, netloc, path, query, u"")
@staticmethod
def _ascii_netloc(netloc):
"""Make sure that host is ascii compatible."""
auth = None
if u"@" in netloc:
# Extract auth
auth, netloc = netloc.rsplit(u"@", 1)
if u":" in auth:
auth = tuple(auth.split(u":", 1))
else:
auth = (auth, u"")
return auth, netloc.encode("idna").decode("ascii")
@staticmethod
def _ascii_path(path):
"""Make sure that path is url encoded and ascii compatible."""
try:
# If this statement passes then path must contain only ascii characters
return path.encode("ascii").decode("ascii")
except UnicodeEncodeError:
# Path must contain non ascii characters
return quote(path)
@staticmethod
def _ascii_query(query, params):
"""Make sure that query is urlencoded and ascii compatible."""
if query:
# Ensure that query contains only valid characters
qsl = parse_qsl(query, keep_blank_values=True)
query = urlencode(qsl)
if query and params:
extra_query = urlencode(params, doseq=True)
return u"{}&{}".format(query, extra_query)
elif params:
return urlencode(params, doseq=True)
elif query:
return query
else:
return u""
@property
def selector(self):
"""Resource selector, with the url path and query parts."""
if self._urlparts.query:
return u"{}?{}".format(self._urlparts.path, self._urlparts.query)
else:
return self._urlparts.path
def _py2_header_items(self):
"""Return request headers with no unicode value to be compatible with python2"""
# noinspection PyCompatibility
for key, value in self.headers.iteritems():
key = key.encode("ascii")
value = value.encode("iso-8859-1")
yield key, value
class UnicodeDict(dict):
def __init__(self, *mappings):
super(UnicodeDict, self).__init__()
for mapping in mappings:
if mapping:
# noinspection PyUnresolvedReferences
for key, value in mapping.items():
if value is not None:
key = make_unicode(key)
value = make_unicode(value)
self[key] = value
def make_unicode(data, encoding="utf8", errors=""):
"""Ensure that data is a unicode string"""
if isinstance(data, bytes):
return data.decode(encoding, errors)
else:
# noinspection PyArgumentList
return unicode(data)
# ########################## Public API ##########################
[docs]class Session(ConnectionManager):
"""
Provides cookie persistence and connection-pooling plus configuration.
:param kwargs: Default configuration for session variables.
:ivar int max_repeats: Max number of repeat redirects. Defaults to `4`
:ivar int max_redirects: Max number of redirects. Defaults to `10`
:ivar bool allow_redirects: Enable/disable redirection. Defaults to ``True``
:ivar bool raise_for_status: Raise HTTPError if status code is > 400. Defaults to ``False``
:ivar int max_age: Max age the cache can be, before it’s considered stale. -1 will disable caching.
Defaults to :data:`MAX_AGE <urlquick.MAX_AGE>`
"""
# This is here so the kodi related code can change
# this value to True for a better kodi expereance.
default_raise_for_status = False
def __init__(self, **kwargs):
super(Session, self).__init__()
self._headers = CaseInsensitiveDict()
# Set Default headers
self._headers[u"Accept"] = u"*/*"
self._headers[u"Accept-Encoding"] = u"gzip, deflate"
self._headers[u"Accept-language"] = u"en-gb,en-us,en"
self._headers[u"Connection"] = u"keep-alive"
# Session Controls
self._cm = ConnectionManager()
self._cookies = dict()
self._params = dict()
self._auth = None
# Set session configuration settings
self.max_age = kwargs.get("max_age", MAX_AGE)
self.max_repeats = kwargs.get("max_repeats", 4)
self.max_redirects = kwargs.get("max_redirects", 10)
self.allow_redirects = kwargs.get("allow_redirects", True)
self.raise_for_status = kwargs.get("raise_for_status", self.default_raise_for_status)
@property
def auth(self):
"""
Default Authentication tuple to attach to Request.
:return: Default authentication tuple.
:rtype: tuple
"""
return self._auth
@auth.setter
def auth(self, value):
"""Set Default Authentication tuple."""
if isinstance(value, (tuple, list)):
self._auth = value
else:
raise ValueError("Invalid type: {}, dict required".format(type(value)))
@property
def cookies(self):
"""
Dictionary of cookies to attach to each request.
:return: Session cookies
:rtype: dict
"""
return self._cookies
@cookies.setter
def cookies(self, _dict):
"""Replace session cookies with new cookies dict"""
if isinstance(_dict, dict):
self._cookies = _dict
else:
raise ValueError("Invalid type: {}, dict required".format(type(_dict)))
@property
def headers(self):
"""
Dictionary of headers to attach to each request.
:return: Session headers
:rtype: dict
"""
return self._headers
@property
def params(self):
"""
Dictionary of querystrings to attach to each Request. The dictionary values
may be lists for representing multivalued query parameters.
:return: Session params
:rtype: dict
"""
return self._params
@params.setter
def params(self, _dict):
"""Replace session params with new params dict"""
if isinstance(_dict, dict):
self._params = _dict
else:
raise ValueError("Invalid type: {}, dict required".format(type(_dict)))
[docs] def get(self, url, params=None, **kwargs):
"""
Sends a GET request.
Requests data from a specified resource.
:param str url: Url of the remote resource.
:param dict params: [opt] Dictionary of url query key/value pairs.
:param kwargs: Optional arguments that :func:`request <urlquick.request>` takes.
:return: A requests like Response object.
:rtype: urlquick.Response
"""
kwargs["params"] = params
return self.request(u"GET", url, **kwargs)
[docs] def head(self, url, **kwargs):
"""
Sends a HEAD request.
Same as GET but returns only HTTP headers and no document body.
:param str url: Url of the remote resource.
:param kwargs: Optional arguments that :func:`request <urlquick.request>` takes.
:return: A requests like Response object.
:rtype: urlquick.Response
"""
return self.request(u"HEAD", url, **kwargs)
[docs] def post(self, url, data=None, json=None, **kwargs):
"""
Sends a POST request.
Submits data to be processed to a specified resource.
:param str url: Url of the remote resource.
:param data: [opt] Dictionary (will be form-encoded) or bytes sent in the body of the Request.
:param json: [opt] Json data sent in the body of the Request.
:param kwargs: Optional arguments that :func:`request <urlquick.request>` takes.
:return: A requests like Response object.
:rtype: urlquick.Response
"""
return self.request(u"POST", url, data=data, json=json, **kwargs)
[docs] def put(self, url, data=None, **kwargs):
"""
Sends a PUT request.
Uploads a representation of the specified URI.
:param str url: Url of the remote resource.
:param data: [opt] Dictionary (will be form-encoded) or bytes sent in the body of the Request.
:param kwargs: Optional arguments that :func:`request <urlquick.request>` takes.
:return: A requests like Response object.
:rtype: urlquick.Response
"""
return self.request(u"PUT", url, data=data, **kwargs)
[docs] def patch(self, url, data=None, **kwargs):
"""
Sends a PATCH request.
:param str url: Url of the remote resource.
:param data: [opt] Dictionary (will be form-encoded) or bytes sent in the body of the Request.
:param kwargs: Optional arguments that :func:`request <urlquick.request>` takes.
:return: A requests like Response object.
:rtype: urlquick.Response
"""
return self.request(u"PATCH", url, data=data, **kwargs)
[docs] def delete(self, url, **kwargs):
"""
Sends a DELETE request.
:param str url: Url of the remote resource.
:param kwargs: Optional arguments that :func:`request <urlquick.request>` takes.
:return: A requests like Response object.
:rtype: urlquick.Response
"""
return self.request(u"DELETE", url, **kwargs)
[docs] def request(self, method, url, params=None, data=None, headers=None, cookies=None, auth=None,
timeout=10, allow_redirects=None, verify=True, json=None, raise_for_status=None, max_age=None):
"""
Make request for remote resource.
:param str method: HTTP request method, GET, HEAD, POST.
:param str url: Url of the remote resource.
:param dict params: [opt] Dictionary of url query key/value pairs.
:param data: [opt] Dictionary (will be form-encoded) or bytes sent in the body of the Request.
:param dict headers: [opt] HTTP request headers.
:param dict cookies: [opt] Dictionary of cookies to send with the request.
:param tuple auth: [opt] (username, password) for basic authentication.
:param int timeout: [opt] Connection timeout in seconds.
:param bool allow_redirects: [opt] Enable/disable redirection. Defaults to ``True``.
:param bool verify: [opt] Controls whether to verify the server's TLS certificate. Defaults to ``True``
:param json: [opt] Json data sent in the body of the Request.
:param bool raise_for_status: [opt] Raise's HTTPError if status code is > 400. Defaults to ``False``.
:param int max_age: [opt] Age the 'cache' can be, before it’s considered stale. -1 will disable caching.
Defaults to :data:`MAX_AGE <urlquick.MAX_AGE>`
:return: A requests like Response object.
:rtype: urlquick.Response
:raises MaxRedirects: If too many redirects was detected.
:raises ConnError: If connection to server failed.
:raises HTTPError: If response status is greater or equal to 400 and raise_for_status is ``True``.
:raises SSLError: If an SSL error occurs while sending the request.
:raises Timeout: If the connection to server timed out.
"""
# Fetch settings from local or session
allow_redirects = self.allow_redirects if allow_redirects is None else allow_redirects
raise_for_status = self.raise_for_status if raise_for_status is None else raise_for_status
# Ensure that all mappings of unicode data
req_headers = CaseInsensitiveDict(self._headers, headers)
req_cookies = UnicodeDict(self._cookies, cookies)
req_params = UnicodeDict(self._params, params)
# Add cookies to headers
if req_cookies and u"Cookie" not in req_headers:
header = u"; ".join([u"{}={}".format(key, value) for key, value in req_cookies.items()])
req_headers[u"Cookie"] = header
# Fetch max age of cache
max_age = (-1 if self.max_age is None else self.max_age) if max_age is None else max_age
# Parse url into it's individual components including params if given
req = Request(method, url, req_headers, data, json, req_params)
logger.debug("Requesting resource: %s", req.url)
logger.debug("Request headers: %s", req.headers)
if data:
logger.debug("Request data: %s", req.data)
# Add Authorization header if needed
auth = auth or req.auth or self._auth
if auth:
auth = self._auth_header(*auth)
req.headers[u"Authorization"] = auth
# Request monitors
history = []
visited = defaultdict(int)
start_time = datetime.utcnow()
while True:
# Send a request for resource
raw_resp = self.make_request(req, timeout, verify, max_age)
resp = Response(raw_resp, req, start_time, history[:])
visited[req.url] += 1
# Process the response
if allow_redirects and resp.is_redirect:
history.append(resp)
if len(history) >= self.max_redirects:
raise MaxRedirects("max_redirects exceeded")
if visited[req.url] >= self.max_repeats:
raise MaxRedirects("max_repeat_redirects exceeded")
# Create new request for redirect
location = resp.headers.get(u"location")
if resp.status_code == 307:
req = Request(req.method, location, req_headers, req.data, referer=req.url)
else:
req = Request(u"GET", location, req_headers, referer=req.url)
logger.debug("Redirecting to = %s", unquote(req.url))
# And Authorization Credentials if needed
elif auth and resp.status_code == 401 and u"Authorization" not in req.headers:
req.headers[u"Authorization"] = auth
# According to RFC 2616, "2xx" code indicates that the client's
# request was successfully received, understood, and accepted.
# Therefore all other codes will be considered as errors.
elif raise_for_status:
resp.raise_for_status()
return resp
else:
return resp
def __enter__(self):
return self
def __exit__(self, *args):
self.close()
@staticmethod
def _auth_header(username, password):
# Ensure that username & password is of type bytes
if isinstance(username, unicode):
username = username.encode("utf8")
if isinstance(password, unicode):
password = password.encode("utf8")
# Create basic authentication header
auth = username + b":" + password
auth = b64encode(auth).decode("ascii")
return u"Basic {}".format(auth)
[docs]class Response(object):
"""A Response object containing all data returned from the server."""
# noinspection PyArgumentList
def __init__(self, response, org_request, start_time, history):
#: The default encoding, used when no encoding is given.
self.apparent_encoding = "utf8"
#: File-like object representation of response (for advanced usage).
self.raw = response
#: Final URL location of Response.
self.url = org_request.url
#: The :class:`Request <urlquick.Request>` object to which this is a response.
self.request = org_request
#: A list of Response objects from the history of the Request.
#: Any redirect responses will end up here.
self.history = history
#: The amount of time elapsed, between sending the request and
#: the arrival of the response (as a timedelta).
self.elapsed = datetime.utcnow() - start_time
#: Integer Code of responded HTTP Status e.g. 404 or 200.
self.status_code = response.status
#: Textual reason of response HTTP Status e.g. “Not Found” or “OK”.
self.reason = unicode(response.reason)
# Fetch content body
self._body = response.read()
response.close()
# Fetch response headers and convert to CaseInsensitiveDict if needed
headers = response.getheaders()
if isinstance(headers, CaseInsensitiveDict):
self._headers = headers
else:
self._headers = CaseInsensitiveDict(headers)
@CachedProperty
def encoding(self):
"""Encoding, to decode with, when accessing :meth:`resp.text <urlquick.Response.text>`."""
if u"Content-Type" in self._headers:
header = self._headers[u"Content-Type"]
for sec in header.split(u";"):
sec = sec.strip()
if sec.startswith(u"charset"):
_, value = sec.split(u"=", 1)
return value.strip()
# Allow encoding property to be set by the user
encoding.allow_setter = True
@CachedProperty
def content(self):
"""
Content of the response in bytes.
:raises ContentError: If content failes to decompress.
"""
# Check if Response need to be decoded, else return raw response
content_encoding = self._headers.get(u"content-encoding", u"").lower()
if u"gzip" in content_encoding:
decoder = zlib.decompressobj(16 + zlib.MAX_WBITS)
elif u"deflate" in content_encoding:
decoder = zlib.decompressobj()
elif content_encoding:
raise ContentError("Unknown encoding: {}".format(content_encoding))
else:
return self._body
try:
return decoder.decompress(self._body)
except (IOError, zlib.error) as e:
raise ContentError("Failed to decompress content body: {}".format(e))
@CachedProperty
def text(self):
"""
Content of the response in unicode.
The response content will be decoded using the best available encoding based on the response headers.
Will fallback to :data:`apparent_encoding <urlquick.Response.apparent_encoding>`
if no encoding was given within headers.
"""
if self.encoding:
try:
return self.content.decode(self.encoding)
except UnicodeDecodeError:
logger.debug("Failed to decode content with given encoding: '%s'", self.encoding)
apparent_encoding = self.apparent_encoding
if apparent_encoding and not (self.encoding and getencoder(self.encoding) == getencoder(apparent_encoding)):
logger.debug("Attempting to decode with default encoding: '%s'", self.apparent_encoding)
try:
return self.content.decode(apparent_encoding)
except UnicodeDecodeError:
logger.debug("Failed to decode content with default encoding: %s, "
"switching to fallback encoding: 'iso-8859-1'", apparent_encoding)
else:
logger.debug("Attempting to decode with fallback encoding: 'iso-8859-1'")
return self.content.decode("iso-8859-1")
@CachedProperty
def cookies(self):
"""A dictionary of Cookies the server sent back."""
if u"Set-Cookie" in self._headers:
cookies = self._headers[u"Set-Cookie"]
if py3:
cookiejar = SimpleCookie(cookies)
else:
cookiejar = SimpleCookie(cookies.encode("iso-8859-1"))
return {cookie.key: cookie.value for cookie in cookiejar.values()}
else:
return {}
@CachedProperty
def links(self):
"""Dictionary of 'parsed header links' of the response, if any."""
if u"link" in self._headers:
links = {}
replace_chars = u" '\""
for val in re.split(u", *<", self._headers[u"link"]):
try:
url, params = val.split(";", 1)
except ValueError:
url, params = val, u""
link = {u"url": url.strip("<> '\"")}
for param in params.split(";"):
try:
key, value = param.split("=")
except ValueError:
break
link[key.strip(replace_chars).lower()] = value.strip(replace_chars)
key = link.get(u"rel") or link.get(u"url")
links[key] = link
return links
else:
return {}
@property
def headers(self):
"""Case-insensitive Dictionary of Response Headers."""
return self._headers
@property
def is_redirect(self):
"""``True``, if this Response is a well-formed HTTP redirect, that could have been processed automatically."""
headers = self._headers
return u"location" in headers and self.status_code in REDIRECT_CODES
@property
def is_permanent_redirect(self):
"""``True``, if this Response is one of the permanent versions of redirect."""
headers = self._headers
return u"location" in headers and self.status_code in (301, 308)
@property
def ok(self):
"""
``True``, if status_code is less than 400.
This attribute checks if the status code of the response is between 400 and 600. To
see if there was a client error or a server error. If the status code is between
200 and 400, this will return True. This is not a check to see if the response code is 200 "OK".
"""
return self.status_code < 400
[docs] def json(self, **kwargs):
"""
Returns the json-encoded content of a response.
:param kwargs: [opt] Arguments that :func:`json.loads` takes.
:raises ValueError: If the response body does not contain valid json.
"""
return _json.loads(self.text, **kwargs)
[docs] def xml(self):
"""
Parse's "XML" document into a element tree.
:return: The root element of the element tree.
:rtype: xml.etree.ElementTree.Element
"""
from xml.etree import ElementTree
return ElementTree.fromstring(self.content)
[docs] def parse(self, tag=u"", attrs=None):
"""
Parse's "HTML" document into a element tree using HTMLement.
.. seealso:: The htmlement documentation can be found at.\n
http://python-htmlement.readthedocs.io/en/stable/?badge=stable
:param str tag: [opt] Name of 'element' which is used to filter tree to required section.
:type attrs: dict
:param attrs: [opt] Attributes of 'element', used when searching for required section.
Attrs should be a dict of unicode key/value pairs.
:return: The root element of the element tree.
:rtype: xml.etree.ElementTree.Element
:raise MissingDependency: If the optional 'HTMLement' dependency is missing.
"""
try:
# noinspection PyUnresolvedReferences
from htmlement import HTMLement
except ImportError:
raise MissingDependency("Missing optional dependency named 'HTMLement'")
else:
parser = HTMLement(unicode(tag), attrs)
parser.feed(self.text)
return parser.close()
[docs] def iter_content(self, chunk_size=512, decode_unicode=False):
"""
Iterates over the response data. The chunk size are the number of bytes it should read into memory.
This is not necessarily the length of each item returned, as decoding can take place.
:param int chunk_size: [opt] The chunk size to use for each chunk.
(default=512)
:param bool decode_unicode: [opt] ``True`` to return unicode, else ``False`` to return bytes.
(default=``False``)
"""
content = self.text if decode_unicode else self.content
prevnl = 0
while True:
chucknl = prevnl + chunk_size
data = content[prevnl:chucknl]
if not data:
break
yield data
prevnl = chucknl
# noinspection PyUnusedLocal
[docs] def iter_lines(self, chunk_size=None, decode_unicode=False, delimiter=b"\n"):
"""
Iterates over the response data, one line at a time.
:param int chunk_size: [opt] Unused, here for compatibility with requests.
:param bool decode_unicode: [opt] ``True`` to return unicode, else ``False`` to return bytes.
(default=``False``)
:param bytes delimiter: [opt] Delimiter used as the end of line marker.
(default=b'\\\\n')
"""
if decode_unicode:
content = self.text
# noinspection PyArgumentList
delimiter = unicode(delimiter)
else:
content = self.content
prevnl = 0
sepsize = len(delimiter)
while True:
nextnl = content.find(delimiter, prevnl)
if nextnl < 0:
yield content[prevnl:]
break
yield content[prevnl:nextnl]
prevnl = nextnl + sepsize
[docs] def raise_for_status(self):
"""
Raises stored error, if one occurred.
:raises HTTPError: If response status code is greater or equal to 400
"""
# According to RFC 2616, "2xx" code indicates that the client's
# request was successfully received, understood, and accepted.
# Therefore all other codes will be considered as errors.
if self.status_code >= 400:
raise HTTPError(self.url, self.status_code, self.reason, self.headers)
def close(self):
pass
def __iter__(self):
"""Allows to use a response as an iterator."""
return self.iter_content()
def __bool__(self):
"""Returns True if status_code is less than 400."""
return self.ok
def __nonzero__(self):
"""Returns True if status_code is less than 400."""
return self.ok
def __repr__(self):
return "<Response [{}]>".format(self.status_code)
[docs]def request(method, url, params=None, data=None, headers=None, cookies=None, auth=None,
timeout=10, allow_redirects=None, verify=True, json=None, raise_for_status=None, max_age=None):
"""
Make request for remote resource.
:param str method: HTTP request method, GET, HEAD, POST.
:param str url: Url of the remote resource.
:param dict params: [opt] Dictionary of url query key/value pairs.
:param data: [opt] Dictionary (will be form-encoded) or bytes sent in the body of the Request.
:param dict headers: [opt] HTTP request headers.
:param dict cookies: [opt] Dictionary of cookies to send with the request.
:param tuple auth: [opt] (username, password) for basic authentication.
:param int timeout: [opt] Connection timeout in seconds.
:param bool allow_redirects: [opt] Enable/disable redirection. Defaults to ``True``.
:param bool verify: [opt] Controls whether to verify the server's TLS certificate. Defaults to ``True``
:param json: [opt] Json data sent in the body of the Request.
:param bool raise_for_status: [opt] Raise's HTTPError if status code is > 400. Defaults to ``False``.
:param int max_age: [opt] Age the 'cache' can be, before it’s considered stale. -1 will disable caching.
Defaults to :data:`MAX_AGE <urlquick.MAX_AGE>`
:return: A requests like Response object.
:rtype: urlquick.Response
:raises MaxRedirects: If too many redirects was detected.
:raises ConnError: If connection to server failed.
:raises HTTPError: If response status is greater or equal to 400 and raise_for_status is ``True``.
:raises SSLError: If an SSL error occurs while sending the request.
:raises Timeout: If the connection to server timed out.
"""
with Session() as session:
return session.request(method, url, params, data, headers, cookies, auth, timeout,
allow_redirects, verify, json, raise_for_status, max_age)
[docs]def get(url, params=None, **kwargs):
"""
Sends a GET request.
Requests data from a specified resource.
:param str url: Url of the remote resource.
:param dict params: [opt] Dictionary of url query key/value pairs.
:param kwargs: Optional arguments that :func:`request <urlquick.request>` takes.
:return: A requests like Response object.
:rtype: urlquick.Response
"""
with Session() as session:
return session.request(u"GET", url, params=params, **kwargs)
[docs]def head(url, **kwargs):
"""
Sends a HEAD request.
Same as GET but returns only HTTP headers and no document body.
:param str url: Url of the remote resource.
:param kwargs: Optional arguments that :func:`request <urlquick.request>` takes.
:return: A requests like Response object.
:rtype: urlquick.Response
"""
with Session() as session:
return session.request(u"HEAD", url, **kwargs)
[docs]def post(url, data=None, json=None, **kwargs):
"""
Sends a POST request.
Submits data to be processed to a specified resource.
:param str url: Url of the remote resource.
:param data: [opt] Dictionary (will be form-encoded) or bytes sent in the body of the Request.
:param json: [opt] Json data sent in the body of the Request.
:param kwargs: Optional arguments that :func:`request <urlquick.request>` takes.
:return: A requests like Response object.
:rtype: urlquick.Response
"""
with Session() as session:
return session.request(u"POST", url, data=data, json=json, **kwargs)
[docs]def put(url, data=None, **kwargs):
"""
Sends a PUT request.
Uploads a representation of the specified URI.
:param str url: Url of the remote resource.
:param data: [opt] Dictionary (will be form-encoded) or bytes sent in the body of the Request.
:param kwargs: Optional arguments that :func:`request <urlquick.request>` takes.
:return: A requests like Response object.
:rtype: urlquick.Response
"""
with Session() as session:
return session.request(u"PUT", url, data=data, **kwargs)
[docs]def patch(url, data=None, **kwargs):
"""
Sends a PATCH request.
:param str url: Url of the remote resource.
:param data: [opt] Dictionary (will be form-encoded) or bytes sent in the body of the Request.
:param kwargs: Optional arguments that :func:`request <urlquick.request>` takes.
:return: A requests like Response object.
:rtype: urlquick.Response
"""
with Session() as session:
return session.request(u"PATCH", url, data=data, **kwargs)
[docs]def delete(url, **kwargs):
"""
Sends a DELETE request.
:param str url: Url of the remote resource.
:param kwargs: Optional arguments that :func:`request <urlquick.request>` takes.
:return: A requests like Response object.
:rtype: urlquick.Response
"""
with Session() as session:
return session.request(u"DELETE", url, **kwargs)
[docs]def cache_cleanup(max_age=None):
"""
Remove all stale cache files.
:param int max_age: [opt] The max age the cache can be before removal.
defaults => :data:`MAX_AGE <urlquick.MAX_AGE>`
"""
logger.info("Initiating cache cleanup")
handler = CacheHandler
max_age = MAX_AGE if max_age is None else max_age
cache_dir = handler.cache_dir()
# Loop over all cache files and remove stale files
filestart = handler.safe_path(u"cache-")
for cachefile in os.listdir(cache_dir):
# Check that we actually have a cache file
if cachefile.startswith(filestart):
cache_path = os.path.join(cache_dir, cachefile)
# Check if the cache is not fresh and delete if so
if not handler.isfilefresh(cache_path, max_age):
handler.delete(cache_path)
def auto_cache_cleanup(max_age=60 * 60 * 24 * 14):
"""
Check if the cache needs cleanup. Uses a empty file to keep track.
:param int max_age: [opt] The max age the cache can be before removal.
defaults => 1209600 (14 days)
:returns: True if cache was cleaned else false if no cache cleanup was started.
:rtype: bool
"""
check_file = os.path.join(CACHE_LOCATION, "cache_check")
last_check = os.stat(check_file).st_mtime if os.path.exists(check_file) else 0
current_time = time.time()
# Check if it's time to initiate a cache cleanup
if current_time - last_check > max_age * 2:
cache_cleanup(max_age)
try:
os.utime(check_file, None)
except OSError:
open(check_file, "a").close()
return True
return False
#############
# Kodi Only #
#############
# Set the location of the cache file to the addon data directory
# _addon_data = __import__("xbmcaddon").Addon()
# _CACHE_LOCATION = __import__("xbmc").translatePath(_addon_data.getAddonInfo("profile"))
# CACHE_LOCATION = _CACHE_LOCATION.decode("utf8") if isinstance(_CACHE_LOCATION, bytes) else _CACHE_LOCATION
# logger.debug("Cache location: %s", CACHE_LOCATION)
# Session.default_raise_for_status = True
# Check if cache cleanup is required
auto_cache_cleanup()