You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

237 lines
9.9 KiB

4 years ago
  1. # public Cython/C interface to lxml.etree
  2. from lxml.includes cimport tree
  3. from lxml.includes.tree cimport const_xmlChar
  4. cdef extern from "lxml-version.h":
  5. cdef char* LXML_VERSION_STRING
  6. cdef extern from "etree_defs.h":
  7. # test if c_node is considered an Element (i.e. Element, Comment, etc.)
  8. cdef bint _isElement(tree.xmlNode* c_node) nogil
  9. # return the namespace URI of the node or NULL
  10. cdef const_xmlChar* _getNs(tree.xmlNode* node) nogil
  11. # pair of macros for tree traversal
  12. cdef void BEGIN_FOR_EACH_ELEMENT_FROM(tree.xmlNode* tree_top,
  13. tree.xmlNode* start_node,
  14. int start_node_inclusive) nogil
  15. cdef void END_FOR_EACH_ELEMENT_FROM(tree.xmlNode* start_node) nogil
  16. cdef extern from "etree_api.h":
  17. # first function to call!
  18. cdef int import_lxml__etree() except -1
  19. ##########################################################################
  20. # public ElementTree API classes
  21. cdef class lxml.etree._Document [ object LxmlDocument ]:
  22. cdef tree.xmlDoc* _c_doc
  23. cdef class lxml.etree._Element [ object LxmlElement ]:
  24. cdef _Document _doc
  25. cdef tree.xmlNode* _c_node
  26. cdef class lxml.etree.ElementBase(_Element) [ object LxmlElementBase ]:
  27. pass
  28. cdef class lxml.etree._ElementTree [ object LxmlElementTree ]:
  29. cdef _Document _doc
  30. cdef _Element _context_node
  31. cdef class lxml.etree.ElementClassLookup [ object LxmlElementClassLookup ]:
  32. cdef object (*_lookup_function)(object, _Document, tree.xmlNode*)
  33. cdef class lxml.etree.FallbackElementClassLookup(ElementClassLookup) \
  34. [ object LxmlFallbackElementClassLookup ]:
  35. cdef ElementClassLookup fallback
  36. cdef object (*_fallback_function)(object, _Document, tree.xmlNode*)
  37. ##########################################################################
  38. # creating Element objects
  39. # create an Element for a C-node in the Document
  40. cdef _Element elementFactory(_Document doc, tree.xmlNode* c_node)
  41. # create an ElementTree for an Element
  42. cdef _ElementTree elementTreeFactory(_Element context_node)
  43. # create an ElementTree subclass for an Element
  44. cdef _ElementTree newElementTree(_Element context_node, object subclass)
  45. # create an ElementTree from an external document
  46. cdef _ElementTree adoptExternalDocument(tree.xmlDoc* c_doc, parser, bint is_owned)
  47. # create a new Element for an existing or new document (doc = None)
  48. # builds Python object after setting text, tail, namespaces and attributes
  49. cdef _Element makeElement(tag, _Document doc, parser,
  50. text, tail, attrib, nsmap)
  51. # create a new SubElement for an existing parent
  52. # builds Python object after setting text, tail, namespaces and attributes
  53. cdef _Element makeSubElement(_Element parent, tag, text, tail,
  54. attrib, nsmap)
  55. # deep copy a node to include it in the Document
  56. cdef _Element deepcopyNodeToDocument(_Document doc, tree.xmlNode* c_root)
  57. # set the internal lookup function for Element/Comment/PI classes
  58. # use setElementClassLookupFunction(NULL, None) to reset it
  59. # note that the lookup function *must always* return an _Element subclass!
  60. cdef void setElementClassLookupFunction(
  61. object (*function)(object, _Document, tree.xmlNode*), object state)
  62. # lookup function that always returns the default Element class
  63. # note that the first argument is expected to be None!
  64. cdef object lookupDefaultElementClass(_1, _Document _2,
  65. tree.xmlNode* c_node)
  66. # lookup function for namespace/tag specific Element classes
  67. # note that the first argument is expected to be None!
  68. cdef object lookupNamespaceElementClass(_1, _Document _2,
  69. tree.xmlNode* c_node)
  70. # call the fallback lookup function of a FallbackElementClassLookup
  71. cdef object callLookupFallback(FallbackElementClassLookup lookup,
  72. _Document doc, tree.xmlNode* c_node)
  73. ##########################################################################
  74. # XML attribute access
  75. # return an attribute value for a C attribute on a C element node
  76. cdef object attributeValue(tree.xmlNode* c_element,
  77. tree.xmlAttr* c_attrib_node)
  78. # return the value of the attribute with 'ns' and 'name' (or None)
  79. cdef object attributeValueFromNsName(tree.xmlNode* c_element,
  80. const_xmlChar* c_ns, const_xmlChar* c_name)
  81. # return the value of attribute "{ns}name", or the default value
  82. cdef object getAttributeValue(_Element element, key, default)
  83. # return an iterator over attribute names (1), values (2) or items (3)
  84. # attributes must not be removed during iteration!
  85. cdef object iterattributes(_Element element, int keysvalues)
  86. # return the list of all attribute names (1), values (2) or items (3)
  87. cdef list collectAttributes(tree.xmlNode* c_element, int keysvalues)
  88. # set an attribute value on an element
  89. # on failure, sets an exception and returns -1
  90. cdef int setAttributeValue(_Element element, key, value) except -1
  91. # delete an attribute
  92. # on failure, sets an exception and returns -1
  93. cdef int delAttribute(_Element element, key) except -1
  94. # delete an attribute based on name and namespace URI
  95. # returns -1 if the attribute was not found (no exception)
  96. cdef int delAttributeFromNsName(tree.xmlNode* c_element,
  97. const_xmlChar* c_href, const_xmlChar* c_name)
  98. ##########################################################################
  99. # XML node helper functions
  100. # check if the element has at least one child
  101. cdef bint hasChild(tree.xmlNode* c_node) nogil
  102. # find child element number 'index' (supports negative indexes)
  103. cdef tree.xmlNode* findChild(tree.xmlNode* c_node,
  104. Py_ssize_t index) nogil
  105. # find child element number 'index' starting at first one
  106. cdef tree.xmlNode* findChildForwards(tree.xmlNode* c_node,
  107. Py_ssize_t index) nogil
  108. # find child element number 'index' starting at last one
  109. cdef tree.xmlNode* findChildBackwards(tree.xmlNode* c_node,
  110. Py_ssize_t index) nogil
  111. # return next/previous sibling element of the node
  112. cdef tree.xmlNode* nextElement(tree.xmlNode* c_node) nogil
  113. cdef tree.xmlNode* previousElement(tree.xmlNode* c_node) nogil
  114. ##########################################################################
  115. # iterators (DEPRECATED API, don't use in new code!)
  116. cdef class lxml.etree._ElementTagMatcher [ object LxmlElementTagMatcher ]:
  117. cdef char* _href
  118. cdef char* _name
  119. # store "{ns}tag" (or None) filter for this matcher or element iterator
  120. # ** unless _href *and* _name are set up 'by hand', this function *must*
  121. # ** be called when subclassing the iterator below!
  122. cdef void initTagMatch(_ElementTagMatcher matcher, tag)
  123. cdef class lxml.etree._ElementIterator(_ElementTagMatcher) [
  124. object LxmlElementIterator ]:
  125. cdef _Element _node
  126. cdef tree.xmlNode* (*_next_element)(tree.xmlNode*)
  127. # store the initial node of the iterator if it matches the required tag
  128. # or its next matching sibling if not
  129. cdef void iteratorStoreNext(_ElementIterator iterator, _Element node)
  130. ##########################################################################
  131. # other helper functions
  132. # check if a C node matches a tag name and namespace
  133. # (NULL allowed for each => always matches)
  134. cdef int tagMatches(tree.xmlNode* c_node, const_xmlChar* c_href, const_xmlChar* c_name)
  135. # convert a UTF-8 char* to a Python string or unicode string
  136. cdef object pyunicode(const_xmlChar* s)
  137. # convert the string to UTF-8 using the normal lxml.etree semantics
  138. cdef bytes utf8(object s)
  139. # split a tag into a (URI, name) tuple, return None as URI for '{}tag'
  140. cdef tuple getNsTag(object tag)
  141. # split a tag into a (URI, name) tuple, return b'' as URI for '{}tag'
  142. cdef tuple getNsTagWithEmptyNs(object tag)
  143. # get the "{ns}tag" string for a C node
  144. cdef object namespacedName(tree.xmlNode* c_node)
  145. # get the "{ns}tag" string for a href/tagname pair (c_ns may be NULL)
  146. cdef object namespacedNameFromNsName(const_xmlChar* c_ns, const_xmlChar* c_tag)
  147. # check if the node has a text value (which may be '')
  148. cdef bint hasText(tree.xmlNode* c_node) nogil
  149. # check if the node has a tail value (which may be '')
  150. cdef bint hasTail(tree.xmlNode* c_node) nogil
  151. # get the text content of an element (or None)
  152. cdef object textOf(tree.xmlNode* c_node)
  153. # get the tail content of an element (or None)
  154. cdef object tailOf(tree.xmlNode* c_node)
  155. # set the text value of an element
  156. cdef int setNodeText(tree.xmlNode* c_node, text) except -1
  157. # set the tail text value of an element
  158. cdef int setTailText(tree.xmlNode* c_node, text) except -1
  159. # append an element to the children of a parent element
  160. # deprecated: don't use, does not propagate exceptions!
  161. # use appendChildToElement() instead
  162. cdef void appendChild(_Element parent, _Element child)
  163. # added in lxml 3.3 as a safe replacement for appendChild()
  164. # return -1 for exception, 0 for ok
  165. cdef int appendChildToElement(_Element parent, _Element child) except -1
  166. # recursively lookup a namespace in element or ancestors, or create it
  167. cdef tree.xmlNs* findOrBuildNodeNsPrefix(
  168. _Document doc, tree.xmlNode* c_node, const_xmlChar* href, const_xmlChar* prefix)
  169. # find the Document of an Element, ElementTree or Document (itself!)
  170. cdef _Document documentOrRaise(object input)
  171. # find the root Element of an Element (itself!), ElementTree or Document
  172. cdef _Element rootNodeOrRaise(object input)