You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

845 lines
28 KiB

4 years ago
  1. #cython: language_level=3
  2. #
  3. # Based on https://github.com/MagicStack/httptools
  4. #
  5. from __future__ import absolute_import, print_function
  6. from cpython.mem cimport PyMem_Malloc, PyMem_Free
  7. from libc.string cimport memcpy
  8. from cpython cimport (PyObject_GetBuffer, PyBuffer_Release, PyBUF_SIMPLE,
  9. Py_buffer, PyBytes_AsString, PyBytes_AsStringAndSize)
  10. from multidict import (CIMultiDict as _CIMultiDict,
  11. CIMultiDictProxy as _CIMultiDictProxy)
  12. from yarl import URL as _URL
  13. from aiohttp import hdrs
  14. from .http_exceptions import (
  15. BadHttpMessage, BadStatusLine, InvalidHeader, LineTooLong, InvalidURLError,
  16. PayloadEncodingError, ContentLengthError, TransferEncodingError)
  17. from .http_writer import (HttpVersion as _HttpVersion,
  18. HttpVersion10 as _HttpVersion10,
  19. HttpVersion11 as _HttpVersion11)
  20. from .http_parser import DeflateBuffer as _DeflateBuffer
  21. from .streams import (EMPTY_PAYLOAD as _EMPTY_PAYLOAD,
  22. StreamReader as _StreamReader)
  23. cimport cython
  24. from aiohttp cimport _cparser as cparser
  25. include "_headers.pxi"
  26. from aiohttp cimport _find_header
  27. DEF DEFAULT_FREELIST_SIZE = 250
  28. cdef extern from "Python.h":
  29. int PyByteArray_Resize(object, Py_ssize_t) except -1
  30. Py_ssize_t PyByteArray_Size(object) except -1
  31. char* PyByteArray_AsString(object)
  32. __all__ = ('HttpRequestParser', 'HttpResponseParser',
  33. 'RawRequestMessage', 'RawResponseMessage')
  34. cdef object URL = _URL
  35. cdef object URL_build = URL.build
  36. cdef object CIMultiDict = _CIMultiDict
  37. cdef object CIMultiDictProxy = _CIMultiDictProxy
  38. cdef object HttpVersion = _HttpVersion
  39. cdef object HttpVersion10 = _HttpVersion10
  40. cdef object HttpVersion11 = _HttpVersion11
  41. cdef object SEC_WEBSOCKET_KEY1 = hdrs.SEC_WEBSOCKET_KEY1
  42. cdef object CONTENT_ENCODING = hdrs.CONTENT_ENCODING
  43. cdef object EMPTY_PAYLOAD = _EMPTY_PAYLOAD
  44. cdef object StreamReader = _StreamReader
  45. cdef object DeflateBuffer = _DeflateBuffer
  46. cdef inline object extend(object buf, const char* at, size_t length):
  47. cdef Py_ssize_t s
  48. cdef char* ptr
  49. s = PyByteArray_Size(buf)
  50. PyByteArray_Resize(buf, s + length)
  51. ptr = PyByteArray_AsString(buf)
  52. memcpy(ptr + s, at, length)
  53. DEF METHODS_COUNT = 34;
  54. cdef list _http_method = []
  55. for i in range(METHODS_COUNT):
  56. _http_method.append(
  57. cparser.http_method_str(<cparser.http_method> i).decode('ascii'))
  58. cdef inline str http_method_str(int i):
  59. if i < METHODS_COUNT:
  60. return <str>_http_method[i]
  61. else:
  62. return "<unknown>"
  63. cdef inline object find_header(bytes raw_header):
  64. cdef Py_ssize_t size
  65. cdef char *buf
  66. cdef int idx
  67. PyBytes_AsStringAndSize(raw_header, &buf, &size)
  68. idx = _find_header.find_header(buf, size)
  69. if idx == -1:
  70. return raw_header.decode('utf-8', 'surrogateescape')
  71. return headers[idx]
  72. @cython.freelist(DEFAULT_FREELIST_SIZE)
  73. cdef class RawRequestMessage:
  74. cdef readonly str method
  75. cdef readonly str path
  76. cdef readonly object version # HttpVersion
  77. cdef readonly object headers # CIMultiDict
  78. cdef readonly object raw_headers # tuple
  79. cdef readonly object should_close
  80. cdef readonly object compression
  81. cdef readonly object upgrade
  82. cdef readonly object chunked
  83. cdef readonly object url # yarl.URL
  84. def __init__(self, method, path, version, headers, raw_headers,
  85. should_close, compression, upgrade, chunked, url):
  86. self.method = method
  87. self.path = path
  88. self.version = version
  89. self.headers = headers
  90. self.raw_headers = raw_headers
  91. self.should_close = should_close
  92. self.compression = compression
  93. self.upgrade = upgrade
  94. self.chunked = chunked
  95. self.url = url
  96. def __repr__(self):
  97. info = []
  98. info.append(("method", self.method))
  99. info.append(("path", self.path))
  100. info.append(("version", self.version))
  101. info.append(("headers", self.headers))
  102. info.append(("raw_headers", self.raw_headers))
  103. info.append(("should_close", self.should_close))
  104. info.append(("compression", self.compression))
  105. info.append(("upgrade", self.upgrade))
  106. info.append(("chunked", self.chunked))
  107. info.append(("url", self.url))
  108. sinfo = ', '.join(name + '=' + repr(val) for name, val in info)
  109. return '<RawRequestMessage(' + sinfo + ')>'
  110. def _replace(self, **dct):
  111. cdef RawRequestMessage ret
  112. ret = _new_request_message(self.method,
  113. self.path,
  114. self.version,
  115. self.headers,
  116. self.raw_headers,
  117. self.should_close,
  118. self.compression,
  119. self.upgrade,
  120. self.chunked,
  121. self.url)
  122. if "method" in dct:
  123. ret.method = dct["method"]
  124. if "path" in dct:
  125. ret.path = dct["path"]
  126. if "version" in dct:
  127. ret.version = dct["version"]
  128. if "headers" in dct:
  129. ret.headers = dct["headers"]
  130. if "raw_headers" in dct:
  131. ret.raw_headers = dct["raw_headers"]
  132. if "should_close" in dct:
  133. ret.should_close = dct["should_close"]
  134. if "compression" in dct:
  135. ret.compression = dct["compression"]
  136. if "upgrade" in dct:
  137. ret.upgrade = dct["upgrade"]
  138. if "chunked" in dct:
  139. ret.chunked = dct["chunked"]
  140. if "url" in dct:
  141. ret.url = dct["url"]
  142. return ret
  143. cdef _new_request_message(str method,
  144. str path,
  145. object version,
  146. object headers,
  147. object raw_headers,
  148. bint should_close,
  149. object compression,
  150. bint upgrade,
  151. bint chunked,
  152. object url):
  153. cdef RawRequestMessage ret
  154. ret = RawRequestMessage.__new__(RawRequestMessage)
  155. ret.method = method
  156. ret.path = path
  157. ret.version = version
  158. ret.headers = headers
  159. ret.raw_headers = raw_headers
  160. ret.should_close = should_close
  161. ret.compression = compression
  162. ret.upgrade = upgrade
  163. ret.chunked = chunked
  164. ret.url = url
  165. return ret
  166. @cython.freelist(DEFAULT_FREELIST_SIZE)
  167. cdef class RawResponseMessage:
  168. cdef readonly object version # HttpVersion
  169. cdef readonly int code
  170. cdef readonly str reason
  171. cdef readonly object headers # CIMultiDict
  172. cdef readonly object raw_headers # tuple
  173. cdef readonly object should_close
  174. cdef readonly object compression
  175. cdef readonly object upgrade
  176. cdef readonly object chunked
  177. def __init__(self, version, code, reason, headers, raw_headers,
  178. should_close, compression, upgrade, chunked):
  179. self.version = version
  180. self.code = code
  181. self.reason = reason
  182. self.headers = headers
  183. self.raw_headers = raw_headers
  184. self.should_close = should_close
  185. self.compression = compression
  186. self.upgrade = upgrade
  187. self.chunked = chunked
  188. def __repr__(self):
  189. info = []
  190. info.append(("version", self.version))
  191. info.append(("code", self.code))
  192. info.append(("reason", self.reason))
  193. info.append(("headers", self.headers))
  194. info.append(("raw_headers", self.raw_headers))
  195. info.append(("should_close", self.should_close))
  196. info.append(("compression", self.compression))
  197. info.append(("upgrade", self.upgrade))
  198. info.append(("chunked", self.chunked))
  199. sinfo = ', '.join(name + '=' + repr(val) for name, val in info)
  200. return '<RawResponseMessage(' + sinfo + ')>'
  201. cdef _new_response_message(object version,
  202. int code,
  203. str reason,
  204. object headers,
  205. object raw_headers,
  206. bint should_close,
  207. object compression,
  208. bint upgrade,
  209. bint chunked):
  210. cdef RawResponseMessage ret
  211. ret = RawResponseMessage.__new__(RawResponseMessage)
  212. ret.version = version
  213. ret.code = code
  214. ret.reason = reason
  215. ret.headers = headers
  216. ret.raw_headers = raw_headers
  217. ret.should_close = should_close
  218. ret.compression = compression
  219. ret.upgrade = upgrade
  220. ret.chunked = chunked
  221. return ret
  222. @cython.internal
  223. cdef class HttpParser:
  224. cdef:
  225. cparser.http_parser* _cparser
  226. cparser.http_parser_settings* _csettings
  227. bytearray _raw_name
  228. bytearray _raw_value
  229. bint _has_value
  230. object _protocol
  231. object _loop
  232. object _timer
  233. size_t _max_line_size
  234. size_t _max_field_size
  235. size_t _max_headers
  236. bint _response_with_body
  237. bint _started
  238. object _url
  239. bytearray _buf
  240. str _path
  241. str _reason
  242. object _headers
  243. list _raw_headers
  244. bint _upgraded
  245. list _messages
  246. object _payload
  247. bint _payload_error
  248. object _payload_exception
  249. object _last_error
  250. bint _auto_decompress
  251. str _content_encoding
  252. Py_buffer py_buf
  253. def __cinit__(self):
  254. self._cparser = <cparser.http_parser*> \
  255. PyMem_Malloc(sizeof(cparser.http_parser))
  256. if self._cparser is NULL:
  257. raise MemoryError()
  258. self._csettings = <cparser.http_parser_settings*> \
  259. PyMem_Malloc(sizeof(cparser.http_parser_settings))
  260. if self._csettings is NULL:
  261. raise MemoryError()
  262. def __dealloc__(self):
  263. PyMem_Free(self._cparser)
  264. PyMem_Free(self._csettings)
  265. cdef _init(self, cparser.http_parser_type mode,
  266. object protocol, object loop, object timer=None,
  267. size_t max_line_size=8190, size_t max_headers=32768,
  268. size_t max_field_size=8190, payload_exception=None,
  269. bint response_with_body=True, bint auto_decompress=True):
  270. cparser.http_parser_init(self._cparser, mode)
  271. self._cparser.data = <void*>self
  272. self._cparser.content_length = 0
  273. cparser.http_parser_settings_init(self._csettings)
  274. self._protocol = protocol
  275. self._loop = loop
  276. self._timer = timer
  277. self._buf = bytearray()
  278. self._payload = None
  279. self._payload_error = 0
  280. self._payload_exception = payload_exception
  281. self._messages = []
  282. self._raw_name = bytearray()
  283. self._raw_value = bytearray()
  284. self._has_value = False
  285. self._max_line_size = max_line_size
  286. self._max_headers = max_headers
  287. self._max_field_size = max_field_size
  288. self._response_with_body = response_with_body
  289. self._upgraded = False
  290. self._auto_decompress = auto_decompress
  291. self._content_encoding = None
  292. self._csettings.on_url = cb_on_url
  293. self._csettings.on_status = cb_on_status
  294. self._csettings.on_header_field = cb_on_header_field
  295. self._csettings.on_header_value = cb_on_header_value
  296. self._csettings.on_headers_complete = cb_on_headers_complete
  297. self._csettings.on_body = cb_on_body
  298. self._csettings.on_message_begin = cb_on_message_begin
  299. self._csettings.on_message_complete = cb_on_message_complete
  300. self._csettings.on_chunk_header = cb_on_chunk_header
  301. self._csettings.on_chunk_complete = cb_on_chunk_complete
  302. self._last_error = None
  303. cdef _process_header(self):
  304. if self._raw_name:
  305. raw_name = bytes(self._raw_name)
  306. raw_value = bytes(self._raw_value)
  307. name = find_header(raw_name)
  308. value = raw_value.decode('utf-8', 'surrogateescape')
  309. self._headers.add(name, value)
  310. if name is CONTENT_ENCODING:
  311. self._content_encoding = value
  312. PyByteArray_Resize(self._raw_name, 0)
  313. PyByteArray_Resize(self._raw_value, 0)
  314. self._has_value = False
  315. self._raw_headers.append((raw_name, raw_value))
  316. cdef _on_header_field(self, char* at, size_t length):
  317. cdef Py_ssize_t size
  318. cdef char *buf
  319. if self._has_value:
  320. self._process_header()
  321. size = PyByteArray_Size(self._raw_name)
  322. PyByteArray_Resize(self._raw_name, size + length)
  323. buf = PyByteArray_AsString(self._raw_name)
  324. memcpy(buf + size, at, length)
  325. cdef _on_header_value(self, char* at, size_t length):
  326. cdef Py_ssize_t size
  327. cdef char *buf
  328. size = PyByteArray_Size(self._raw_value)
  329. PyByteArray_Resize(self._raw_value, size + length)
  330. buf = PyByteArray_AsString(self._raw_value)
  331. memcpy(buf + size, at, length)
  332. self._has_value = True
  333. cdef _on_headers_complete(self):
  334. self._process_header()
  335. method = http_method_str(self._cparser.method)
  336. should_close = not cparser.http_should_keep_alive(self._cparser)
  337. upgrade = self._cparser.upgrade
  338. chunked = self._cparser.flags & cparser.F_CHUNKED
  339. raw_headers = tuple(self._raw_headers)
  340. headers = CIMultiDictProxy(self._headers)
  341. if upgrade or self._cparser.method == 5: # cparser.CONNECT:
  342. self._upgraded = True
  343. # do not support old websocket spec
  344. if SEC_WEBSOCKET_KEY1 in headers:
  345. raise InvalidHeader(SEC_WEBSOCKET_KEY1)
  346. encoding = None
  347. enc = self._content_encoding
  348. if enc is not None:
  349. self._content_encoding = None
  350. enc = enc.lower()
  351. if enc in ('gzip', 'deflate', 'br'):
  352. encoding = enc
  353. if self._cparser.type == cparser.HTTP_REQUEST:
  354. msg = _new_request_message(
  355. method, self._path,
  356. self.http_version(), headers, raw_headers,
  357. should_close, encoding, upgrade, chunked, self._url)
  358. else:
  359. msg = _new_response_message(
  360. self.http_version(), self._cparser.status_code, self._reason,
  361. headers, raw_headers, should_close, encoding,
  362. upgrade, chunked)
  363. if (self._cparser.content_length > 0 or chunked or
  364. self._cparser.method == 5): # CONNECT: 5
  365. payload = StreamReader(
  366. self._protocol, timer=self._timer, loop=self._loop)
  367. else:
  368. payload = EMPTY_PAYLOAD
  369. self._payload = payload
  370. if encoding is not None and self._auto_decompress:
  371. self._payload = DeflateBuffer(payload, encoding)
  372. if not self._response_with_body:
  373. payload = EMPTY_PAYLOAD
  374. self._messages.append((msg, payload))
  375. cdef _on_message_complete(self):
  376. self._payload.feed_eof()
  377. self._payload = None
  378. cdef _on_chunk_header(self):
  379. self._payload.begin_http_chunk_receiving()
  380. cdef _on_chunk_complete(self):
  381. self._payload.end_http_chunk_receiving()
  382. cdef object _on_status_complete(self):
  383. pass
  384. cdef inline http_version(self):
  385. cdef cparser.http_parser* parser = self._cparser
  386. if parser.http_major == 1:
  387. if parser.http_minor == 0:
  388. return HttpVersion10
  389. elif parser.http_minor == 1:
  390. return HttpVersion11
  391. return HttpVersion(parser.http_major, parser.http_minor)
  392. ### Public API ###
  393. def feed_eof(self):
  394. cdef bytes desc
  395. if self._payload is not None:
  396. if self._cparser.flags & cparser.F_CHUNKED:
  397. raise TransferEncodingError(
  398. "Not enough data for satisfy transfer length header.")
  399. elif self._cparser.flags & cparser.F_CONTENTLENGTH:
  400. raise ContentLengthError(
  401. "Not enough data for satisfy content length header.")
  402. elif self._cparser.http_errno != cparser.HPE_OK:
  403. desc = cparser.http_errno_description(
  404. <cparser.http_errno> self._cparser.http_errno)
  405. raise PayloadEncodingError(desc.decode('latin-1'))
  406. else:
  407. self._payload.feed_eof()
  408. elif self._started:
  409. self._on_headers_complete()
  410. if self._messages:
  411. return self._messages[-1][0]
  412. def feed_data(self, data):
  413. cdef:
  414. size_t data_len
  415. size_t nb
  416. PyObject_GetBuffer(data, &self.py_buf, PyBUF_SIMPLE)
  417. data_len = <size_t>self.py_buf.len
  418. nb = cparser.http_parser_execute(
  419. self._cparser,
  420. self._csettings,
  421. <char*>self.py_buf.buf,
  422. data_len)
  423. PyBuffer_Release(&self.py_buf)
  424. # i am not sure about cparser.HPE_INVALID_METHOD,
  425. # seems get err for valid request
  426. # test_client_functional.py::test_post_data_with_bytesio_file
  427. if (self._cparser.http_errno != cparser.HPE_OK and
  428. (self._cparser.http_errno != cparser.HPE_INVALID_METHOD or
  429. self._cparser.method == 0)):
  430. if self._payload_error == 0:
  431. if self._last_error is not None:
  432. ex = self._last_error
  433. self._last_error = None
  434. else:
  435. ex = parser_error_from_errno(
  436. <cparser.http_errno> self._cparser.http_errno)
  437. self._payload = None
  438. raise ex
  439. if self._messages:
  440. messages = self._messages
  441. self._messages = []
  442. else:
  443. messages = ()
  444. if self._upgraded:
  445. return messages, True, data[nb:]
  446. else:
  447. return messages, False, b''
  448. cdef class HttpRequestParser(HttpParser):
  449. def __init__(self, protocol, loop, timer=None,
  450. size_t max_line_size=8190, size_t max_headers=32768,
  451. size_t max_field_size=8190, payload_exception=None,
  452. bint response_with_body=True, bint read_until_eof=False):
  453. self._init(cparser.HTTP_REQUEST, protocol, loop, timer,
  454. max_line_size, max_headers, max_field_size,
  455. payload_exception, response_with_body)
  456. cdef object _on_status_complete(self):
  457. cdef Py_buffer py_buf
  458. if not self._buf:
  459. return
  460. self._path = self._buf.decode('utf-8', 'surrogateescape')
  461. if self._cparser.method == 5: # CONNECT
  462. self._url = URL(self._path)
  463. else:
  464. PyObject_GetBuffer(self._buf, &py_buf, PyBUF_SIMPLE)
  465. try:
  466. self._url = _parse_url(<char*>py_buf.buf,
  467. py_buf.len)
  468. finally:
  469. PyBuffer_Release(&py_buf)
  470. PyByteArray_Resize(self._buf, 0)
  471. cdef class HttpResponseParser(HttpParser):
  472. def __init__(self, protocol, loop, timer=None,
  473. size_t max_line_size=8190, size_t max_headers=32768,
  474. size_t max_field_size=8190, payload_exception=None,
  475. bint response_with_body=True, bint read_until_eof=False,
  476. bint auto_decompress=True):
  477. self._init(cparser.HTTP_RESPONSE, protocol, loop, timer,
  478. max_line_size, max_headers, max_field_size,
  479. payload_exception, response_with_body, auto_decompress)
  480. cdef object _on_status_complete(self):
  481. if self._buf:
  482. self._reason = self._buf.decode('utf-8', 'surrogateescape')
  483. PyByteArray_Resize(self._buf, 0)
  484. cdef int cb_on_message_begin(cparser.http_parser* parser) except -1:
  485. cdef HttpParser pyparser = <HttpParser>parser.data
  486. pyparser._started = True
  487. pyparser._headers = CIMultiDict()
  488. pyparser._raw_headers = []
  489. PyByteArray_Resize(pyparser._buf, 0)
  490. pyparser._path = None
  491. pyparser._reason = None
  492. return 0
  493. cdef int cb_on_url(cparser.http_parser* parser,
  494. const char *at, size_t length) except -1:
  495. cdef HttpParser pyparser = <HttpParser>parser.data
  496. try:
  497. if length > pyparser._max_line_size:
  498. raise LineTooLong(
  499. 'Status line is too long', pyparser._max_line_size, length)
  500. extend(pyparser._buf, at, length)
  501. except BaseException as ex:
  502. pyparser._last_error = ex
  503. return -1
  504. else:
  505. return 0
  506. cdef int cb_on_status(cparser.http_parser* parser,
  507. const char *at, size_t length) except -1:
  508. cdef HttpParser pyparser = <HttpParser>parser.data
  509. cdef str reason
  510. try:
  511. if length > pyparser._max_line_size:
  512. raise LineTooLong(
  513. 'Status line is too long', pyparser._max_line_size, length)
  514. extend(pyparser._buf, at, length)
  515. except BaseException as ex:
  516. pyparser._last_error = ex
  517. return -1
  518. else:
  519. return 0
  520. cdef int cb_on_header_field(cparser.http_parser* parser,
  521. const char *at, size_t length) except -1:
  522. cdef HttpParser pyparser = <HttpParser>parser.data
  523. cdef Py_ssize_t size
  524. try:
  525. pyparser._on_status_complete()
  526. size = len(pyparser._raw_name) + length
  527. if size > pyparser._max_field_size:
  528. raise LineTooLong(
  529. 'Header name is too long', pyparser._max_field_size, size)
  530. pyparser._on_header_field(at, length)
  531. except BaseException as ex:
  532. pyparser._last_error = ex
  533. return -1
  534. else:
  535. return 0
  536. cdef int cb_on_header_value(cparser.http_parser* parser,
  537. const char *at, size_t length) except -1:
  538. cdef HttpParser pyparser = <HttpParser>parser.data
  539. cdef Py_ssize_t size
  540. try:
  541. size = len(pyparser._raw_value) + length
  542. if size > pyparser._max_field_size:
  543. raise LineTooLong(
  544. 'Header value is too long', pyparser._max_field_size, size)
  545. pyparser._on_header_value(at, length)
  546. except BaseException as ex:
  547. pyparser._last_error = ex
  548. return -1
  549. else:
  550. return 0
  551. cdef int cb_on_headers_complete(cparser.http_parser* parser) except -1:
  552. cdef HttpParser pyparser = <HttpParser>parser.data
  553. try:
  554. pyparser._on_status_complete()
  555. pyparser._on_headers_complete()
  556. except BaseException as exc:
  557. pyparser._last_error = exc
  558. return -1
  559. else:
  560. if pyparser._cparser.upgrade or pyparser._cparser.method == 5: # CONNECT
  561. return 2
  562. else:
  563. return 0
  564. cdef int cb_on_body(cparser.http_parser* parser,
  565. const char *at, size_t length) except -1:
  566. cdef HttpParser pyparser = <HttpParser>parser.data
  567. cdef bytes body = at[:length]
  568. try:
  569. pyparser._payload.feed_data(body, length)
  570. except BaseException as exc:
  571. if pyparser._payload_exception is not None:
  572. pyparser._payload.set_exception(pyparser._payload_exception(str(exc)))
  573. else:
  574. pyparser._payload.set_exception(exc)
  575. pyparser._payload_error = 1
  576. return -1
  577. else:
  578. return 0
  579. cdef int cb_on_message_complete(cparser.http_parser* parser) except -1:
  580. cdef HttpParser pyparser = <HttpParser>parser.data
  581. try:
  582. pyparser._started = False
  583. pyparser._on_message_complete()
  584. except BaseException as exc:
  585. pyparser._last_error = exc
  586. return -1
  587. else:
  588. return 0
  589. cdef int cb_on_chunk_header(cparser.http_parser* parser) except -1:
  590. cdef HttpParser pyparser = <HttpParser>parser.data
  591. try:
  592. pyparser._on_chunk_header()
  593. except BaseException as exc:
  594. pyparser._last_error = exc
  595. return -1
  596. else:
  597. return 0
  598. cdef int cb_on_chunk_complete(cparser.http_parser* parser) except -1:
  599. cdef HttpParser pyparser = <HttpParser>parser.data
  600. try:
  601. pyparser._on_chunk_complete()
  602. except BaseException as exc:
  603. pyparser._last_error = exc
  604. return -1
  605. else:
  606. return 0
  607. cdef parser_error_from_errno(cparser.http_errno errno):
  608. cdef bytes desc = cparser.http_errno_description(errno)
  609. if errno in (cparser.HPE_CB_message_begin,
  610. cparser.HPE_CB_url,
  611. cparser.HPE_CB_header_field,
  612. cparser.HPE_CB_header_value,
  613. cparser.HPE_CB_headers_complete,
  614. cparser.HPE_CB_body,
  615. cparser.HPE_CB_message_complete,
  616. cparser.HPE_CB_status,
  617. cparser.HPE_CB_chunk_header,
  618. cparser.HPE_CB_chunk_complete):
  619. cls = BadHttpMessage
  620. elif errno == cparser.HPE_INVALID_STATUS:
  621. cls = BadStatusLine
  622. elif errno == cparser.HPE_INVALID_METHOD:
  623. cls = BadStatusLine
  624. elif errno == cparser.HPE_INVALID_URL:
  625. cls = InvalidURLError
  626. else:
  627. cls = BadHttpMessage
  628. return cls(desc.decode('latin-1'))
  629. def parse_url(url):
  630. cdef:
  631. Py_buffer py_buf
  632. char* buf_data
  633. PyObject_GetBuffer(url, &py_buf, PyBUF_SIMPLE)
  634. try:
  635. buf_data = <char*>py_buf.buf
  636. return _parse_url(buf_data, py_buf.len)
  637. finally:
  638. PyBuffer_Release(&py_buf)
  639. cdef _parse_url(char* buf_data, size_t length):
  640. cdef:
  641. cparser.http_parser_url* parsed
  642. int res
  643. str schema = None
  644. str host = None
  645. object port = None
  646. str path = None
  647. str query = None
  648. str fragment = None
  649. str user = None
  650. str password = None
  651. str userinfo = None
  652. object result = None
  653. int off
  654. int ln
  655. parsed = <cparser.http_parser_url*> \
  656. PyMem_Malloc(sizeof(cparser.http_parser_url))
  657. if parsed is NULL:
  658. raise MemoryError()
  659. cparser.http_parser_url_init(parsed)
  660. try:
  661. res = cparser.http_parser_parse_url(buf_data, length, 0, parsed)
  662. if res == 0:
  663. if parsed.field_set & (1 << cparser.UF_SCHEMA):
  664. off = parsed.field_data[<int>cparser.UF_SCHEMA].off
  665. ln = parsed.field_data[<int>cparser.UF_SCHEMA].len
  666. schema = buf_data[off:off+ln].decode('utf-8', 'surrogateescape')
  667. else:
  668. schema = ''
  669. if parsed.field_set & (1 << cparser.UF_HOST):
  670. off = parsed.field_data[<int>cparser.UF_HOST].off
  671. ln = parsed.field_data[<int>cparser.UF_HOST].len
  672. host = buf_data[off:off+ln].decode('utf-8', 'surrogateescape')
  673. else:
  674. host = ''
  675. if parsed.field_set & (1 << cparser.UF_PORT):
  676. port = parsed.port
  677. if parsed.field_set & (1 << cparser.UF_PATH):
  678. off = parsed.field_data[<int>cparser.UF_PATH].off
  679. ln = parsed.field_data[<int>cparser.UF_PATH].len
  680. path = buf_data[off:off+ln].decode('utf-8', 'surrogateescape')
  681. else:
  682. path = ''
  683. if parsed.field_set & (1 << cparser.UF_QUERY):
  684. off = parsed.field_data[<int>cparser.UF_QUERY].off
  685. ln = parsed.field_data[<int>cparser.UF_QUERY].len
  686. query = buf_data[off:off+ln].decode('utf-8', 'surrogateescape')
  687. else:
  688. query = ''
  689. if parsed.field_set & (1 << cparser.UF_FRAGMENT):
  690. off = parsed.field_data[<int>cparser.UF_FRAGMENT].off
  691. ln = parsed.field_data[<int>cparser.UF_FRAGMENT].len
  692. fragment = buf_data[off:off+ln].decode('utf-8', 'surrogateescape')
  693. else:
  694. fragment = ''
  695. if parsed.field_set & (1 << cparser.UF_USERINFO):
  696. off = parsed.field_data[<int>cparser.UF_USERINFO].off
  697. ln = parsed.field_data[<int>cparser.UF_USERINFO].len
  698. userinfo = buf_data[off:off+ln].decode('utf-8', 'surrogateescape')
  699. user, sep, password = userinfo.partition(':')
  700. return URL_build(scheme=schema,
  701. user=user, password=password, host=host, port=port,
  702. path=path, query=query, fragment=fragment)
  703. else:
  704. raise InvalidURLError("invalid url {!r}".format(buf_data))
  705. finally:
  706. PyMem_Free(parsed)