You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

56 lines
2.8 KiB

4 years ago
  1. from libc.string cimport const_char
  2. from lxml.includes.tree cimport xmlDoc
  3. from lxml.includes.tree cimport xmlInputReadCallback, xmlInputCloseCallback
  4. from lxml.includes.xmlparser cimport xmlParserCtxt, xmlSAXHandler, xmlSAXHandlerV1
  5. cdef extern from "libxml/HTMLparser.h":
  6. ctypedef enum htmlParserOption:
  7. HTML_PARSE_NOERROR # suppress error reports
  8. HTML_PARSE_NOWARNING # suppress warning reports
  9. HTML_PARSE_PEDANTIC # pedantic error reporting
  10. HTML_PARSE_NOBLANKS # remove blank nodes
  11. HTML_PARSE_NONET # Forbid network access
  12. # libxml2 2.6.21+ only:
  13. HTML_PARSE_RECOVER # Relaxed parsing
  14. HTML_PARSE_COMPACT # compact small text nodes
  15. # libxml2 2.7.7+ only:
  16. HTML_PARSE_NOIMPLIED # Do not add implied html/body... elements
  17. # libxml2 2.7.8+ only:
  18. HTML_PARSE_NODEFDTD # do not default a doctype if not found
  19. # libxml2 2.8.0+ only:
  20. XML_PARSE_IGNORE_ENC # ignore internal document encoding hint
  21. xmlSAXHandlerV1 htmlDefaultSAXHandler
  22. cdef xmlParserCtxt* htmlCreateMemoryParserCtxt(
  23. char* buffer, int size) nogil
  24. cdef xmlParserCtxt* htmlCreateFileParserCtxt(
  25. char* filename, char* encoding) nogil
  26. cdef xmlParserCtxt* htmlCreatePushParserCtxt(xmlSAXHandler* sax,
  27. void* user_data,
  28. char* chunk, int size,
  29. char* filename, int enc) nogil
  30. cdef void htmlFreeParserCtxt(xmlParserCtxt* ctxt) nogil
  31. cdef void htmlCtxtReset(xmlParserCtxt* ctxt) nogil
  32. cdef int htmlCtxtUseOptions(xmlParserCtxt* ctxt, int options) nogil
  33. cdef int htmlParseDocument(xmlParserCtxt* ctxt) nogil
  34. cdef int htmlParseChunk(xmlParserCtxt* ctxt,
  35. char* chunk, int size, int terminate) nogil
  36. cdef xmlDoc* htmlCtxtReadFile(xmlParserCtxt* ctxt,
  37. char* filename, const_char* encoding,
  38. int options) nogil
  39. cdef xmlDoc* htmlCtxtReadDoc(xmlParserCtxt* ctxt,
  40. char* buffer, char* URL, const_char* encoding,
  41. int options) nogil
  42. cdef xmlDoc* htmlCtxtReadIO(xmlParserCtxt* ctxt,
  43. xmlInputReadCallback ioread,
  44. xmlInputCloseCallback ioclose,
  45. void* ioctx,
  46. char* URL, const_char* encoding,
  47. int options) nogil
  48. cdef xmlDoc* htmlCtxtReadMemory(xmlParserCtxt* ctxt,
  49. char* buffer, int size,
  50. char* filename, const_char* encoding,
  51. int options) nogil