You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

61 lines
1.7 KiB

4 years ago
  1. import re
  2. from lxml import etree
  3. from six import string_types
  4. from w3lib.html import HTML5_WHITESPACE
  5. regex = '[{}]+'.format(HTML5_WHITESPACE)
  6. replace_html5_whitespaces = re.compile(regex).sub
  7. def set_xpathfunc(fname, func):
  8. """Register a custom extension function to use in XPath expressions.
  9. The function ``func`` registered under ``fname`` identifier will be called
  10. for every matching node, being passed a ``context`` parameter as well as
  11. any parameters passed from the corresponding XPath expression.
  12. If ``func`` is ``None``, the extension function will be removed.
  13. See more `in lxml documentation`_.
  14. .. _`in lxml documentation`: http://lxml.de/extensions.html#xpath-extension-functions
  15. """
  16. ns_fns = etree.FunctionNamespace(None)
  17. if func is not None:
  18. ns_fns[fname] = func
  19. else:
  20. del ns_fns[fname]
  21. def setup():
  22. set_xpathfunc('has-class', has_class)
  23. def has_class(context, *classes):
  24. """has-class function.
  25. Return True if all ``classes`` are present in element's class attr.
  26. """
  27. if not context.eval_context.get('args_checked'):
  28. if not classes:
  29. raise ValueError(
  30. 'XPath error: has-class must have at least 1 argument')
  31. for c in classes:
  32. if not isinstance(c, string_types):
  33. raise ValueError(
  34. 'XPath error: has-class arguments must be strings')
  35. context.eval_context['args_checked'] = True
  36. node_cls = context.context_node.get('class')
  37. if node_cls is None:
  38. return False
  39. node_cls = ' ' + node_cls + ' '
  40. node_cls = replace_html5_whitespaces(' ', node_cls)
  41. for cls in classes:
  42. if ' ' + cls + ' ' not in node_cls:
  43. return False
  44. return True