Source code for grab.util.cookies

"""THe module provides things to operate with cookies.

Manuals:

* http://docs.python.org/2/library/cookielib.html#cookie-objects

Some code got from
    https://github.com/kennethreitz/requests/blob/master/requests/cookies.py
"""
from __future__ import annotations

import urllib.request
from collections.abc import Mapping, Sequence
from copy import copy
from http.client import HTTPMessage, HTTPResponse
from http.cookiejar import Cookie, CookieJar
from typing import Any, cast
from urllib.parse import urlparse, urlunparse

from urllib3._collections import HTTPHeaderDict


# Reference:
# https://docs.python.org/3/library/http.cookiejar.html#http.cookiejar.CookieJar.add_cookie_header
# Source:
# https://github.com/kennethreitz/requests/blob/master/requests/cookies.py
[docs]class MockRequest: """Wraps a `requests.Request` to mimic a `urllib2.Request`. The code in `cookielib.CookieJar` expects this interface in order to correctly manage cookie policies, i.e., determine whether a cookie can be set, given the domains of the request and the cookie. The original request object is read-only. The client is responsible for collecting the new headers via `get_new_headers()` and interpreting them appropriately. You probably want `get_cookie_header`, defined below. """ def __init__(self, url: str, headers: dict[str, str]) -> None: self._url = url self._headers = headers self._new_headers: dict[str, Any] = {} self.type = urlparse(self._url).scheme
[docs] def get_type(self) -> str: return self.type
[docs] def get_host(self) -> str: return urlparse(self._url).netloc
[docs] def get_origin_req_host(self) -> str: return self.get_host()
[docs] def get_full_url(self) -> str: # Only return the response's URL if the user hadn't set the Host # header if not self._headers.get("Host"): return self._url # If they did set it, retrieve it and reconstruct the expected domain host = self._headers["Host"] parsed = urlparse(self._url) # Reconstruct the URL as we expect it return urlunparse( [ parsed.scheme, host, parsed.path, parsed.params, parsed.query, parsed.fragment, ] )
[docs] def is_unverifiable(self) -> bool: return True
[docs] def has_header(self, name: str) -> bool: return name in self._headers or name in self._new_headers
[docs] def get_header(self, name: str, default: Any = None) -> str: return self._headers.get(name, self._new_headers.get(name, default))
[docs] def add_header(self, key: str, val: str) -> None: """Cookielib has no legitimate use for this method. Add it back if you find one. """ raise NotImplementedError( "Cookie headers should be added with add_unredirected_header()" )
[docs] def add_unredirected_header(self, name: str, value: str) -> None: self._new_headers[name] = value
[docs] def get_new_headers(self) -> dict[str, str]: return self._new_headers
@property
[docs] def unverifiable(self) -> bool: return self.is_unverifiable()
@property
[docs] def origin_req_host(self) -> str: return self.get_origin_req_host()
@property
[docs] def host(self) -> str: return self.get_host()
# https://github.com/kennethreitz/requests/blob/master/requests/cookies.py
[docs]class MockResponse: """Wraps a `httplib.HTTPMessage` to mimic a `urllib.addinfourl`. ...what? Basically, expose the parsed HTTP headers from the server response the way `cookielib` expects to see them. """ def __init__(self, headers: HTTPMessage | HTTPHeaderDict) -> None: """Make a MockResponse for `cookielib` to read. :param headers: a httplib.HTTPMessage or analogous carrying the headers """ self._headers = headers
[docs] def info(self) -> HTTPMessage | HTTPHeaderDict: return self._headers
[docs]def build_jar(cookies: Sequence[Cookie]) -> CookieJar: jar = CookieJar() for item in cookies: jar.set_cookie(item) return jar
[docs]def extract_response_cookies( req_url: str, req_headers: Mapping[str, Any] | HTTPMessage | HTTPHeaderDict, response_headers: HTTPMessage | HTTPHeaderDict, ) -> Sequence[Cookie]: jar = CookieJar() jar.extract_cookies( cast(HTTPResponse, MockResponse(response_headers)), cast(urllib.request.Request, MockRequest(req_url, dict(req_headers))), ) return list(jar)