You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

712 lines
23 KiB

4 years ago
  1. import asyncio
  2. import collections
  3. import datetime
  4. import io
  5. import re
  6. import socket
  7. import string
  8. import tempfile
  9. import types
  10. import warnings
  11. from email.utils import parsedate
  12. from http.cookies import SimpleCookie
  13. from types import MappingProxyType
  14. from typing import Any, Dict, Mapping, Optional, Tuple, cast # noqa
  15. from urllib.parse import parse_qsl
  16. import attr
  17. from multidict import CIMultiDict, CIMultiDictProxy, MultiDict, MultiDictProxy
  18. from yarl import URL
  19. from . import hdrs
  20. from .helpers import DEBUG, ChainMapProxy, HeadersMixin, reify, sentinel
  21. from .multipart import MultipartReader
  22. from .streams import EmptyStreamReader, StreamReader
  23. from .typedefs import (DEFAULT_JSON_DECODER, JSONDecoder, LooseHeaders,
  24. RawHeaders, StrOrURL)
  25. from .web_exceptions import HTTPRequestEntityTooLarge
  26. from .web_urldispatcher import UrlMappingMatchInfo
  27. __all__ = ('BaseRequest', 'FileField', 'Request')
  28. @attr.s(frozen=True, slots=True)
  29. class FileField:
  30. name = attr.ib(type=str)
  31. filename = attr.ib(type=str)
  32. file = attr.ib(type=io.BufferedReader)
  33. content_type = attr.ib(type=str)
  34. headers = attr.ib(type=CIMultiDictProxy)
  35. _TCHAR = string.digits + string.ascii_letters + r"!#$%&'*+.^_`|~-"
  36. # '-' at the end to prevent interpretation as range in a char class
  37. _TOKEN = r'[{tchar}]+'.format(tchar=_TCHAR)
  38. _QDTEXT = r'[{}]'.format(
  39. r''.join(chr(c) for c in (0x09, 0x20, 0x21) + tuple(range(0x23, 0x7F))))
  40. # qdtext includes 0x5C to escape 0x5D ('\]')
  41. # qdtext excludes obs-text (because obsoleted, and encoding not specified)
  42. _QUOTED_PAIR = r'\\[\t !-~]'
  43. _QUOTED_STRING = r'"(?:{quoted_pair}|{qdtext})*"'.format(
  44. qdtext=_QDTEXT, quoted_pair=_QUOTED_PAIR)
  45. _FORWARDED_PAIR = (
  46. r'({token})=({token}|{quoted_string})(:\d{{1,4}})?'.format(
  47. token=_TOKEN,
  48. quoted_string=_QUOTED_STRING))
  49. _QUOTED_PAIR_REPLACE_RE = re.compile(r'\\([\t !-~])')
  50. # same pattern as _QUOTED_PAIR but contains a capture group
  51. _FORWARDED_PAIR_RE = re.compile(_FORWARDED_PAIR)
  52. ############################################################
  53. # HTTP Request
  54. ############################################################
  55. class BaseRequest(collections.MutableMapping, HeadersMixin):
  56. POST_METHODS = {hdrs.METH_PATCH, hdrs.METH_POST, hdrs.METH_PUT,
  57. hdrs.METH_TRACE, hdrs.METH_DELETE}
  58. ATTRS = HeadersMixin.ATTRS | frozenset([
  59. '_message', '_protocol', '_payload_writer', '_payload', '_headers',
  60. '_method', '_version', '_rel_url', '_post', '_read_bytes',
  61. '_state', '_cache', '_task', '_client_max_size', '_loop'])
  62. def __init__(self, message, payload, protocol, payload_writer, task,
  63. loop,
  64. *, client_max_size=1024**2,
  65. state=None,
  66. scheme=None, host=None, remote=None):
  67. if state is None:
  68. state = {}
  69. self._message = message
  70. self._protocol = protocol
  71. self._payload_writer = payload_writer
  72. self._payload = payload
  73. self._headers = message.headers
  74. self._method = message.method
  75. self._version = message.version
  76. self._rel_url = message.url
  77. self._post = None
  78. self._read_bytes = None
  79. self._state = state
  80. self._cache = {}
  81. self._task = task
  82. self._client_max_size = client_max_size
  83. self._loop = loop
  84. if scheme is not None:
  85. self._cache['scheme'] = scheme
  86. if host is not None:
  87. self._cache['host'] = host
  88. if remote is not None:
  89. self._cache['remote'] = remote
  90. def clone(self, *, method: str=sentinel, rel_url: StrOrURL=sentinel,
  91. headers: LooseHeaders=sentinel, scheme: str=sentinel,
  92. host: str=sentinel,
  93. remote: str=sentinel) -> 'BaseRequest':
  94. """Clone itself with replacement some attributes.
  95. Creates and returns a new instance of Request object. If no parameters
  96. are given, an exact copy is returned. If a parameter is not passed, it
  97. will reuse the one from the current request object.
  98. """
  99. if self._read_bytes:
  100. raise RuntimeError("Cannot clone request "
  101. "after reading it's content")
  102. dct = {} # type: Dict[str, Any]
  103. if method is not sentinel:
  104. dct['method'] = method
  105. if rel_url is not sentinel:
  106. new_url = URL(rel_url)
  107. dct['url'] = new_url
  108. dct['path'] = str(new_url)
  109. if headers is not sentinel:
  110. # a copy semantic
  111. dct['headers'] = CIMultiDictProxy(CIMultiDict(headers))
  112. dct['raw_headers'] = tuple((k.encode('utf-8'), v.encode('utf-8'))
  113. for k, v in headers.items())
  114. message = self._message._replace(**dct)
  115. kwargs = {}
  116. if scheme is not sentinel:
  117. kwargs['scheme'] = scheme
  118. if host is not sentinel:
  119. kwargs['host'] = host
  120. if remote is not sentinel:
  121. kwargs['remote'] = remote
  122. return self.__class__(
  123. message,
  124. self._payload,
  125. self._protocol,
  126. self._payload_writer,
  127. self._task,
  128. self._loop,
  129. client_max_size=self._client_max_size,
  130. state=self._state.copy(),
  131. **kwargs)
  132. @property
  133. def task(self):
  134. return self._task
  135. @property
  136. def protocol(self):
  137. return self._protocol
  138. @property
  139. def transport(self):
  140. if self._protocol is None:
  141. return None
  142. return self._protocol.transport
  143. @property
  144. def writer(self):
  145. return self._payload_writer
  146. @reify
  147. def message(self):
  148. warnings.warn("Request.message is deprecated",
  149. DeprecationWarning,
  150. stacklevel=3)
  151. return self._message
  152. @reify
  153. def rel_url(self) -> URL:
  154. return self._rel_url
  155. @reify
  156. def loop(self) -> asyncio.AbstractEventLoop:
  157. return self._loop
  158. # MutableMapping API
  159. def __getitem__(self, key):
  160. return self._state[key]
  161. def __setitem__(self, key, value):
  162. self._state[key] = value
  163. def __delitem__(self, key):
  164. del self._state[key]
  165. def __len__(self):
  166. return len(self._state)
  167. def __iter__(self):
  168. return iter(self._state)
  169. ########
  170. @reify
  171. def secure(self) -> bool:
  172. """A bool indicating if the request is handled with SSL."""
  173. return self.scheme == 'https'
  174. @reify
  175. def forwarded(self):
  176. """A tuple containing all parsed Forwarded header(s).
  177. Makes an effort to parse Forwarded headers as specified by RFC 7239:
  178. - It adds one (immutable) dictionary per Forwarded 'field-value', ie
  179. per proxy. The element corresponds to the data in the Forwarded
  180. field-value added by the first proxy encountered by the client. Each
  181. subsequent item corresponds to those added by later proxies.
  182. - It checks that every value has valid syntax in general as specified
  183. in section 4: either a 'token' or a 'quoted-string'.
  184. - It un-escapes found escape sequences.
  185. - It does NOT validate 'by' and 'for' contents as specified in section
  186. 6.
  187. - It does NOT validate 'host' contents (Host ABNF).
  188. - It does NOT validate 'proto' contents for valid URI scheme names.
  189. Returns a tuple containing one or more immutable dicts
  190. """
  191. elems = []
  192. for field_value in self._message.headers.getall(hdrs.FORWARDED, ()):
  193. length = len(field_value)
  194. pos = 0
  195. need_separator = False
  196. elem = {}
  197. elems.append(types.MappingProxyType(elem))
  198. while 0 <= pos < length:
  199. match = _FORWARDED_PAIR_RE.match(field_value, pos)
  200. if match is not None: # got a valid forwarded-pair
  201. if need_separator:
  202. # bad syntax here, skip to next comma
  203. pos = field_value.find(',', pos)
  204. else:
  205. name, value, port = match.groups()
  206. if value[0] == '"':
  207. # quoted string: remove quotes and unescape
  208. value = _QUOTED_PAIR_REPLACE_RE.sub(r'\1',
  209. value[1:-1])
  210. if port:
  211. value += port
  212. elem[name.lower()] = value
  213. pos += len(match.group(0))
  214. need_separator = True
  215. elif field_value[pos] == ',': # next forwarded-element
  216. need_separator = False
  217. elem = {}
  218. elems.append(types.MappingProxyType(elem))
  219. pos += 1
  220. elif field_value[pos] == ';': # next forwarded-pair
  221. need_separator = False
  222. pos += 1
  223. elif field_value[pos] in ' \t':
  224. # Allow whitespace even between forwarded-pairs, though
  225. # RFC 7239 doesn't. This simplifies code and is in line
  226. # with Postel's law.
  227. pos += 1
  228. else:
  229. # bad syntax here, skip to next comma
  230. pos = field_value.find(',', pos)
  231. return tuple(elems)
  232. @reify
  233. def scheme(self) -> str:
  234. """A string representing the scheme of the request.
  235. Hostname is resolved in this order:
  236. - overridden value by .clone(scheme=new_scheme) call.
  237. - type of connection to peer: HTTPS if socket is SSL, HTTP otherwise.
  238. 'http' or 'https'.
  239. """
  240. if self.transport.get_extra_info('sslcontext'):
  241. return 'https'
  242. else:
  243. return 'http'
  244. @reify
  245. def method(self) -> str:
  246. """Read only property for getting HTTP method.
  247. The value is upper-cased str like 'GET', 'POST', 'PUT' etc.
  248. """
  249. return self._method
  250. @reify
  251. def version(self) -> Tuple[int, int]:
  252. """Read only property for getting HTTP version of request.
  253. Returns aiohttp.protocol.HttpVersion instance.
  254. """
  255. return self._version
  256. @reify
  257. def host(self) -> str:
  258. """Hostname of the request.
  259. Hostname is resolved in this order:
  260. - overridden value by .clone(host=new_host) call.
  261. - HOST HTTP header
  262. - socket.getfqdn() value
  263. """
  264. host = self._message.headers.get(hdrs.HOST)
  265. if host is not None:
  266. return host
  267. else:
  268. return socket.getfqdn()
  269. @reify
  270. def remote(self) -> Optional[str]:
  271. """Remote IP of client initiated HTTP request.
  272. The IP is resolved in this order:
  273. - overridden value by .clone(remote=new_remote) call.
  274. - peername of opened socket
  275. """
  276. if self.transport is None:
  277. return None
  278. peername = self.transport.get_extra_info('peername')
  279. if isinstance(peername, (list, tuple)):
  280. return peername[0]
  281. else:
  282. return peername
  283. @reify
  284. def url(self) -> URL:
  285. url = URL.build(scheme=self.scheme, host=self.host)
  286. return url.join(self._rel_url)
  287. @reify
  288. def path(self) -> str:
  289. """The URL including *PATH INFO* without the host or scheme.
  290. E.g., ``/app/blog``
  291. """
  292. return self._rel_url.path
  293. @reify
  294. def path_qs(self) -> str:
  295. """The URL including PATH_INFO and the query string.
  296. E.g, /app/blog?id=10
  297. """
  298. return str(self._rel_url)
  299. @reify
  300. def raw_path(self) -> str:
  301. """ The URL including raw *PATH INFO* without the host or scheme.
  302. Warning, the path is unquoted and may contains non valid URL characters
  303. E.g., ``/my%2Fpath%7Cwith%21some%25strange%24characters``
  304. """
  305. return self._message.path
  306. @reify
  307. def query(self) -> MultiDict:
  308. """A multidict with all the variables in the query string."""
  309. return self._rel_url.query
  310. @reify
  311. def query_string(self) -> str:
  312. """The query string in the URL.
  313. E.g., id=10
  314. """
  315. return self._rel_url.query_string
  316. @reify
  317. def headers(self) -> CIMultiDictProxy:
  318. """A case-insensitive multidict proxy with all headers."""
  319. return self._headers
  320. @reify
  321. def raw_headers(self) -> RawHeaders:
  322. """A sequence of pairs for all headers."""
  323. return self._message.raw_headers
  324. @staticmethod
  325. def _http_date(_date_str) -> Optional[datetime.datetime]:
  326. """Process a date string, return a datetime object
  327. """
  328. if _date_str is not None:
  329. timetuple = parsedate(_date_str)
  330. if timetuple is not None:
  331. return datetime.datetime(*timetuple[:6],
  332. tzinfo=datetime.timezone.utc)
  333. return None
  334. @reify
  335. def if_modified_since(self) -> Optional[datetime.datetime]:
  336. """The value of If-Modified-Since HTTP header, or None.
  337. This header is represented as a `datetime` object.
  338. """
  339. return self._http_date(self.headers.get(hdrs.IF_MODIFIED_SINCE))
  340. @reify
  341. def if_unmodified_since(self) -> Optional[datetime.datetime]:
  342. """The value of If-Unmodified-Since HTTP header, or None.
  343. This header is represented as a `datetime` object.
  344. """
  345. return self._http_date(self.headers.get(hdrs.IF_UNMODIFIED_SINCE))
  346. @reify
  347. def if_range(self) -> Optional[datetime.datetime]:
  348. """The value of If-Range HTTP header, or None.
  349. This header is represented as a `datetime` object.
  350. """
  351. return self._http_date(self.headers.get(hdrs.IF_RANGE))
  352. @reify
  353. def keep_alive(self) -> bool:
  354. """Is keepalive enabled by client?"""
  355. return not self._message.should_close
  356. @reify
  357. def cookies(self) -> Mapping[str, str]:
  358. """Return request cookies.
  359. A read-only dictionary-like object.
  360. """
  361. raw = self.headers.get(hdrs.COOKIE, '')
  362. parsed = SimpleCookie(raw) # type: ignore
  363. return MappingProxyType(
  364. {key: val.value for key, val in parsed.items()})
  365. @reify
  366. def http_range(self):
  367. """The content of Range HTTP header.
  368. Return a slice instance.
  369. """
  370. rng = self._headers.get(hdrs.RANGE)
  371. start, end = None, None
  372. if rng is not None:
  373. try:
  374. pattern = r'^bytes=(\d*)-(\d*)$'
  375. start, end = re.findall(pattern, rng)[0]
  376. except IndexError: # pattern was not found in header
  377. raise ValueError("range not in acceptable format")
  378. end = int(end) if end else None
  379. start = int(start) if start else None
  380. if start is None and end is not None:
  381. # end with no start is to return tail of content
  382. start = -end
  383. end = None
  384. if start is not None and end is not None:
  385. # end is inclusive in range header, exclusive for slice
  386. end += 1
  387. if start >= end:
  388. raise ValueError('start cannot be after end')
  389. if start is end is None: # No valid range supplied
  390. raise ValueError('No start or end of range specified')
  391. return slice(start, end, 1)
  392. @reify
  393. def content(self) -> StreamReader:
  394. """Return raw payload stream."""
  395. return self._payload
  396. @property
  397. def has_body(self) -> bool:
  398. """Return True if request's HTTP BODY can be read, False otherwise."""
  399. warnings.warn(
  400. "Deprecated, use .can_read_body #2005",
  401. DeprecationWarning, stacklevel=2)
  402. return not self._payload.at_eof()
  403. @property
  404. def can_read_body(self) -> bool:
  405. """Return True if request's HTTP BODY can be read, False otherwise."""
  406. return not self._payload.at_eof()
  407. @reify
  408. def body_exists(self) -> bool:
  409. """Return True if request has HTTP BODY, False otherwise."""
  410. return type(self._payload) is not EmptyStreamReader
  411. async def release(self) -> None:
  412. """Release request.
  413. Eat unread part of HTTP BODY if present.
  414. """
  415. while not self._payload.at_eof():
  416. await self._payload.readany()
  417. async def read(self) -> bytes:
  418. """Read request body if present.
  419. Returns bytes object with full request content.
  420. """
  421. if self._read_bytes is None:
  422. body = bytearray()
  423. while True:
  424. chunk = await self._payload.readany()
  425. body.extend(chunk)
  426. if self._client_max_size:
  427. body_size = len(body)
  428. if body_size >= self._client_max_size:
  429. raise HTTPRequestEntityTooLarge(
  430. max_size=self._client_max_size,
  431. actual_size=body_size
  432. )
  433. if not chunk:
  434. break
  435. self._read_bytes = bytes(body)
  436. return self._read_bytes
  437. async def text(self) -> str:
  438. """Return BODY as text using encoding from .charset."""
  439. bytes_body = await self.read()
  440. encoding = self.charset or 'utf-8'
  441. return bytes_body.decode(encoding)
  442. async def json(self, *, loads: JSONDecoder=DEFAULT_JSON_DECODER) -> Any:
  443. """Return BODY as JSON."""
  444. body = await self.text()
  445. return loads(body)
  446. async def multipart(self) -> MultipartReader:
  447. """Return async iterator to process BODY as multipart."""
  448. return MultipartReader(self._headers, self._payload)
  449. async def post(self) -> MultiDictProxy:
  450. """Return POST parameters."""
  451. if self._post is not None:
  452. return self._post
  453. if self._method not in self.POST_METHODS:
  454. self._post = MultiDictProxy(MultiDict())
  455. return self._post
  456. content_type = self.content_type
  457. if (content_type not in ('',
  458. 'application/x-www-form-urlencoded',
  459. 'multipart/form-data')):
  460. self._post = MultiDictProxy(MultiDict())
  461. return self._post
  462. out = MultiDict() # type: MultiDict
  463. if content_type == 'multipart/form-data':
  464. multipart = await self.multipart()
  465. max_size = self._client_max_size
  466. field = await multipart.next()
  467. while field is not None:
  468. size = 0
  469. content_type = field.headers.get(hdrs.CONTENT_TYPE)
  470. if field.filename:
  471. # store file in temp file
  472. tmp = tempfile.TemporaryFile()
  473. chunk = await field.read_chunk(size=2**16)
  474. while chunk:
  475. chunk = field.decode(chunk)
  476. tmp.write(chunk)
  477. size += len(chunk)
  478. if 0 < max_size < size:
  479. raise HTTPRequestEntityTooLarge(
  480. max_size=max_size,
  481. actual_size=size
  482. )
  483. chunk = await field.read_chunk(size=2**16)
  484. tmp.seek(0)
  485. ff = FileField(field.name, field.filename,
  486. cast(io.BufferedReader, tmp),
  487. content_type, field.headers)
  488. out.add(field.name, ff)
  489. else:
  490. value = await field.read(decode=True)
  491. if content_type is None or \
  492. content_type.startswith('text/'):
  493. charset = field.get_charset(default='utf-8')
  494. value = value.decode(charset)
  495. out.add(field.name, value)
  496. size += len(value)
  497. if 0 < max_size < size:
  498. raise HTTPRequestEntityTooLarge(
  499. max_size=max_size,
  500. actual_size=size
  501. )
  502. field = await multipart.next()
  503. else:
  504. data = await self.read()
  505. if data:
  506. charset = self.charset or 'utf-8'
  507. out.extend(
  508. parse_qsl(
  509. data.rstrip().decode(charset),
  510. keep_blank_values=True,
  511. encoding=charset))
  512. self._post = MultiDictProxy(out)
  513. return self._post
  514. def __repr__(self):
  515. ascii_encodable_path = self.path.encode('ascii', 'backslashreplace') \
  516. .decode('ascii')
  517. return "<{} {} {} >".format(self.__class__.__name__,
  518. self._method, ascii_encodable_path)
  519. def __eq__(self, other):
  520. return id(self) == id(other)
  521. @asyncio.coroutine
  522. def _prepare_hook(self, response):
  523. return
  524. yield # pragma: no cover
  525. class Request(BaseRequest):
  526. ATTRS = BaseRequest.ATTRS | frozenset(['_match_info'])
  527. def __init__(self, *args, **kwargs) -> None:
  528. super().__init__(*args, **kwargs)
  529. # matchdict, route_name, handler
  530. # or information about traversal lookup
  531. # initialized after route resolving
  532. self._match_info = None # type: Optional[UrlMappingMatchInfo]
  533. if DEBUG:
  534. def __setattr__(self, name, val):
  535. if name not in self.ATTRS:
  536. warnings.warn("Setting custom {}.{} attribute "
  537. "is discouraged".format(self.__class__.__name__,
  538. name),
  539. DeprecationWarning,
  540. stacklevel=2)
  541. super().__setattr__(name, val)
  542. def clone(self, *, method: str=sentinel, rel_url:
  543. StrOrURL=sentinel, headers: LooseHeaders=sentinel,
  544. scheme: str=sentinel, host: str=sentinel, remote:
  545. str=sentinel) -> 'Request':
  546. ret = super().clone(method=method,
  547. rel_url=rel_url,
  548. headers=headers,
  549. scheme=scheme,
  550. host=host,
  551. remote=remote)
  552. new_ret = cast(Request, ret)
  553. new_ret._match_info = self._match_info
  554. return new_ret
  555. @reify
  556. def match_info(self) -> Optional[UrlMappingMatchInfo]:
  557. """Result of route resolving."""
  558. return self._match_info
  559. @property
  560. def app(self):
  561. """Application instance."""
  562. match_info = self._match_info
  563. if match_info is None:
  564. return None
  565. return match_info.current_app
  566. @property
  567. def config_dict(self) -> ChainMapProxy:
  568. match_info = self._match_info
  569. if match_info is None:
  570. return ChainMapProxy([])
  571. lst = match_info.apps
  572. app = self.app
  573. idx = lst.index(app)
  574. sublist = list(reversed(lst[:idx + 1]))
  575. return ChainMapProxy(sublist)
  576. async def _prepare_hook(self, response):
  577. match_info = self._match_info
  578. if match_info is None:
  579. return
  580. for app in match_info._apps:
  581. await app.on_response_prepare.send(self, response)