Source code for grab.cookie

"""
RTFM:

* http://docs.python.org/2/library/cookielib.html#cookie-objects

Some code got from
    https://github.com/kennethreitz/requests/blob/master/requests/cookies.py
"""
import json

from six.moves.http_cookiejar import CookieJar, Cookie
from six.moves.urllib.parse import urlparse, urlunparse

from grab.error import GrabMisuseError

COOKIE_ATTRS = ('name', 'value', 'version', 'port', 'domain',
                'path', 'secure', 'expires', 'discard', 'comment',
                'comment_url', 'rfc2109')


# Source:
# https://github.com/kennethreitz/requests/blob/master/requests/cookies.py
class MockRequest(object):
    """Wraps a `requests.Request` to mimic a `urllib2.Request`.
    The code in `cookielib.CookieJar` expects this interface in order to
    correctly manage cookie policies, i.e., determine whether a cookie can be
    set, given the domains of the request and the cookie.
    The original request object is read-only. The client is responsible for
    collecting the new headers via `get_new_headers()` and interpreting them
    appropriately. You probably want `get_cookie_header`, defined below.
    """

    def __init__(self, request):
        self._req = request
        self._new_headers = {}
        self.type = urlparse(self._req.url).scheme

    def get_type(self):
        return self.type

    def get_host(self):
        return urlparse(self._req.url).netloc

    def get_origin_req_host(self):
        return self.get_host()

    def get_full_url(self):
        # Only return the response's URL if the user hadn't set the Host
        # header
        if not self._req.headers.get('Host'):
            return self._req.url
        # If they did set it, retrieve it and reconstruct the expected domain
        host = self._req.headers['Host']
        parsed = urlparse(self._req.url)
        # Reconstruct the URL as we expect it
        return urlunparse([
            parsed.scheme, host, parsed.path, parsed.params, parsed.query,
            parsed.fragment
        ])

    def is_unverifiable(self):
        return True

    def has_header(self, name):
        return name in self._req.headers or name in self._new_headers

    def get_header(self, name, default=None):
        return self._req.headers.get(name,
                                     self._new_headers.get(name, default))

    def add_header(self, key, val):
        """
        cookielib has no legitimate use for this method;
        add it back if you find one.
        """
        raise NotImplementedError('Cookie headers should be added'
                                  ' with add_unredirected_header()')

    def add_unredirected_header(self, name, value):
        self._new_headers[name] = value

    def get_new_headers(self):
        return self._new_headers

    @property
    def unverifiable(self):
        return self.is_unverifiable()

    @property
    def origin_req_host(self):
        return self.get_origin_req_host()

    @property
    def host(self):
        return self.get_host()


# https://github.com/kennethreitz/requests/blob/master/requests/cookies.py
class MockResponse(object):
    """Wraps a `httplib.HTTPMessage` to mimic a `urllib.addinfourl`.
    ...what? Basically, expose the parsed HTTP headers from the server response
    the way `cookielib` expects to see them.
    """

    def __init__(self, headers):
        """Make a MockResponse for `cookielib` to read.
        :param headers: a httplib.HTTPMessage or analogous carrying the headers
        """
        self._headers = headers

    def info(self):
        return self._headers

    def getheaders(self, name):
        self._headers.getheaders(name)





[docs]class CookieManager(object): """ Each Grab instance has `cookies` attribute that is instance of `CookieManager` class. That class contains helpful methods to create, load, save cookies from/to different places. """ __slots__ = ('cookiejar',)
[docs] def __init__(self, cookiejar=None): if cookiejar is not None: self.cookiejar = cookiejar else: self.cookiejar = CookieJar()
# self.disable_cookiejar_lock(self.cookiejar) # def disable_cookiejar_lock(self, cj): # cj._cookies_lock = dummy_threading.RLock()
[docs] def set(self, name, value, domain, **kwargs): """Add new cookie or replace existing cookie with same parameters. :param name: name of cookie :param value: value of cookie :param kwargs: extra attributes of cookie """ if domain == 'localhost': domain = '' self.cookiejar.set_cookie(create_cookie(name, value, domain, **kwargs))
[docs] def update(self, cookies): if isinstance(cookies, CookieJar): for cookie in cookies: self.cookiejar.set_cookie(cookie) elif isinstance(cookies, CookieManager): for cookie in cookies.cookiejar: self.cookiejar.set_cookie(cookie) else: raise GrabMisuseError('Unknown type of cookies argument: %s' % type(cookies))
@classmethod def from_cookie_list(cls, clist): jar = CookieJar() for cookie in clist: jar.set_cookie(cookie) return cls(jar)
[docs] def clear(self): self.cookiejar = CookieJar()
def __getstate__(self): state = {} for cls in type(self).mro(): cls_slots = getattr(cls, '__slots__', ()) for slot in cls_slots: if slot != '__weakref__': if hasattr(self, slot): state[slot] = getattr(self, slot) state['_cookiejar_cookies'] = list(self.cookiejar) del state['cookiejar'] return state def __setstate__(self, state): state['cookiejar'] = CookieJar() for cookie in state['_cookiejar_cookies']: state['cookiejar'].set_cookie(cookie) del state['_cookiejar_cookies'] for slot, value in state.items(): setattr(self, slot, value)
[docs] def __getitem__(self, key): for cookie in self.cookiejar: if cookie.name == key: return cookie.value raise KeyError
[docs] def items(self): res = [] for cookie in self.cookiejar: res.append((cookie.name, cookie.value)) return res
[docs] def load_from_file(self, path): """ Load cookies from the file. Content of file should be a JSON-serialized list of dicts. """ with open(path) as inf: data = inf.read() if data: items = json.loads(data) else: items = {} for item in items: extra = dict((x, y) for x, y in item.items() if x not in ['name', 'value', 'domain']) self.set(item['name'], item['value'], item['domain'], **extra)
[docs] def get_dict(self): res = [] for cookie in self.cookiejar: res.append(dict((x, getattr(cookie, x)) for x in COOKIE_ATTRS)) return res
[docs] def save_to_file(self, path): """ Dump all cookies to file. Cookies are dumped as JSON-serialized dict of keys and values. """ with open(path, 'w') as out: out.write(json.dumps(self.get_dict()))
def get_cookie_header(self, req): """ :param req: object with httplib.Request interface Actually, it have to have `url` and `headers` attributes """ mocked_req = MockRequest(req) self.cookiejar.add_cookie_header(mocked_req) return mocked_req.get_new_headers().get('Cookie')