import re
|
|
from lxml import etree
|
|
|
|
from six import string_types
|
|
|
|
from w3lib.html import HTML5_WHITESPACE
|
|
|
|
regex = '[{}]+'.format(HTML5_WHITESPACE)
|
|
replace_html5_whitespaces = re.compile(regex).sub
|
|
|
|
|
|
def set_xpathfunc(fname, func):
|
|
"""Register a custom extension function to use in XPath expressions.
|
|
|
|
The function ``func`` registered under ``fname`` identifier will be called
|
|
for every matching node, being passed a ``context`` parameter as well as
|
|
any parameters passed from the corresponding XPath expression.
|
|
|
|
If ``func`` is ``None``, the extension function will be removed.
|
|
|
|
See more `in lxml documentation`_.
|
|
|
|
.. _`in lxml documentation`: http://lxml.de/extensions.html#xpath-extension-functions
|
|
|
|
"""
|
|
ns_fns = etree.FunctionNamespace(None)
|
|
if func is not None:
|
|
ns_fns[fname] = func
|
|
else:
|
|
del ns_fns[fname]
|
|
|
|
|
|
def setup():
|
|
set_xpathfunc('has-class', has_class)
|
|
|
|
|
|
def has_class(context, *classes):
|
|
"""has-class function.
|
|
|
|
Return True if all ``classes`` are present in element's class attr.
|
|
|
|
"""
|
|
if not context.eval_context.get('args_checked'):
|
|
if not classes:
|
|
raise ValueError(
|
|
'XPath error: has-class must have at least 1 argument')
|
|
for c in classes:
|
|
if not isinstance(c, string_types):
|
|
raise ValueError(
|
|
'XPath error: has-class arguments must be strings')
|
|
context.eval_context['args_checked'] = True
|
|
|
|
node_cls = context.context_node.get('class')
|
|
if node_cls is None:
|
|
return False
|
|
node_cls = ' ' + node_cls + ' '
|
|
node_cls = replace_html5_whitespaces(' ', node_cls)
|
|
for cls in classes:
|
|
if ' ' + cls + ' ' not in node_cls:
|
|
return False
|
|
return True
|