You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

773 lines
27 KiB

4 years ago
  1. # -*- coding: utf-8 -*-
  2. """
  3. cssselect.xpath
  4. ===============
  5. Translation of parsed CSS selectors to XPath expressions.
  6. :copyright: (c) 2007-2012 Ian Bicking and contributors.
  7. See AUTHORS for more details.
  8. :license: BSD, see LICENSE for more details.
  9. """
  10. import sys
  11. import re
  12. from cssselect.parser import parse, parse_series, SelectorError
  13. if sys.version_info[0] < 3:
  14. _basestring = basestring
  15. _unicode = unicode
  16. else:
  17. _basestring = str
  18. _unicode = str
  19. def _unicode_safe_getattr(obj, name, default=None):
  20. # getattr() with a non-ASCII name fails on Python 2.x
  21. name = name.encode('ascii', 'replace').decode('ascii')
  22. return getattr(obj, name, default)
  23. class ExpressionError(SelectorError, RuntimeError):
  24. """Unknown or unsupported selector (eg. pseudo-class)."""
  25. #### XPath Helpers
  26. class XPathExpr(object):
  27. def __init__(self, path='', element='*', condition='', star_prefix=False):
  28. self.path = path
  29. self.element = element
  30. self.condition = condition
  31. def __str__(self):
  32. path = _unicode(self.path) + _unicode(self.element)
  33. if self.condition:
  34. path += '[%s]' % self.condition
  35. return path
  36. def __repr__(self):
  37. return '%s[%s]' % (self.__class__.__name__, self)
  38. def add_condition(self, condition):
  39. if self.condition:
  40. self.condition = '%s and (%s)' % (self.condition, condition)
  41. else:
  42. self.condition = condition
  43. return self
  44. def add_name_test(self):
  45. if self.element == '*':
  46. # We weren't doing a test anyway
  47. return
  48. self.add_condition(
  49. "name() = %s" % GenericTranslator.xpath_literal(self.element))
  50. self.element = '*'
  51. def add_star_prefix(self):
  52. """
  53. Append '*/' to the path to keep the context constrained
  54. to a single parent.
  55. """
  56. self.path += '*/'
  57. def join(self, combiner, other):
  58. path = _unicode(self) + combiner
  59. # Any "star prefix" is redundant when joining.
  60. if other.path != '*/':
  61. path += other.path
  62. self.path = path
  63. self.element = other.element
  64. self.condition = other.condition
  65. return self
  66. split_at_single_quotes = re.compile("('+)").split
  67. # The spec is actually more permissive than that, but don’t bother.
  68. # This is just for the fast path.
  69. # http://www.w3.org/TR/REC-xml/#NT-NameStartChar
  70. is_safe_name = re.compile('^[a-zA-Z_][a-zA-Z0-9_.-]*$').match
  71. # Test that the string is not empty and does not contain whitespace
  72. is_non_whitespace = re.compile(r'^[^ \t\r\n\f]+$').match
  73. #### Translation
  74. class GenericTranslator(object):
  75. """
  76. Translator for "generic" XML documents.
  77. Everything is case-sensitive, no assumption is made on the meaning
  78. of element names and attribute names.
  79. """
  80. ####
  81. #### HERE BE DRAGONS
  82. ####
  83. #### You are welcome to hook into this to change some behavior,
  84. #### but do so at your own risks.
  85. #### Until it has received a lot more work and review,
  86. #### I reserve the right to change this API in backward-incompatible ways
  87. #### with any minor version of cssselect.
  88. #### See https://github.com/scrapy/cssselect/pull/22
  89. #### -- Simon Sapin.
  90. ####
  91. combinator_mapping = {
  92. ' ': 'descendant',
  93. '>': 'child',
  94. '+': 'direct_adjacent',
  95. '~': 'indirect_adjacent',
  96. }
  97. attribute_operator_mapping = {
  98. 'exists': 'exists',
  99. '=': 'equals',
  100. '~=': 'includes',
  101. '|=': 'dashmatch',
  102. '^=': 'prefixmatch',
  103. '$=': 'suffixmatch',
  104. '*=': 'substringmatch',
  105. '!=': 'different', # XXX Not in Level 3 but meh
  106. }
  107. #: The attribute used for ID selectors depends on the document language:
  108. #: http://www.w3.org/TR/selectors/#id-selectors
  109. id_attribute = 'id'
  110. #: The attribute used for ``:lang()`` depends on the document language:
  111. #: http://www.w3.org/TR/selectors/#lang-pseudo
  112. lang_attribute = 'xml:lang'
  113. #: The case sensitivity of document language element names,
  114. #: attribute names, and attribute values in selectors depends
  115. #: on the document language.
  116. #: http://www.w3.org/TR/selectors/#casesens
  117. #:
  118. #: When a document language defines one of these as case-insensitive,
  119. #: cssselect assumes that the document parser makes the parsed values
  120. #: lower-case. Making the selector lower-case too makes the comparaison
  121. #: case-insensitive.
  122. #:
  123. #: In HTML, element names and attributes names (but not attribute values)
  124. #: are case-insensitive. All of lxml.html, html5lib, BeautifulSoup4
  125. #: and HTMLParser make them lower-case in their parse result, so
  126. #: the assumption holds.
  127. lower_case_element_names = False
  128. lower_case_attribute_names = False
  129. lower_case_attribute_values = False
  130. # class used to represent and xpath expression
  131. xpathexpr_cls = XPathExpr
  132. def css_to_xpath(self, css, prefix='descendant-or-self::'):
  133. """Translate a *group of selectors* to XPath.
  134. Pseudo-elements are not supported here since XPath only knows
  135. about "real" elements.
  136. :param css:
  137. A *group of selectors* as an Unicode string.
  138. :param prefix:
  139. This string is prepended to the XPath expression for each selector.
  140. The default makes selectors scoped to the context nodes subtree.
  141. :raises:
  142. :class:`SelectorSyntaxError` on invalid selectors,
  143. :class:`ExpressionError` on unknown/unsupported selectors,
  144. including pseudo-elements.
  145. :returns:
  146. The equivalent XPath 1.0 expression as an Unicode string.
  147. """
  148. return ' | '.join(self.selector_to_xpath(selector, prefix,
  149. translate_pseudo_elements=True)
  150. for selector in parse(css))
  151. def selector_to_xpath(self, selector, prefix='descendant-or-self::',
  152. translate_pseudo_elements=False):
  153. """Translate a parsed selector to XPath.
  154. :param selector:
  155. A parsed :class:`Selector` object.
  156. :param prefix:
  157. This string is prepended to the resulting XPath expression.
  158. The default makes selectors scoped to the context nodes subtree.
  159. :param translate_pseudo_elements:
  160. Unless this is set to ``True`` (as :meth:`css_to_xpath` does),
  161. the :attr:`~Selector.pseudo_element` attribute of the selector
  162. is ignored.
  163. It is the caller's responsibility to reject selectors
  164. with pseudo-elements, or to account for them somehow.
  165. :raises:
  166. :class:`ExpressionError` on unknown/unsupported selectors.
  167. :returns:
  168. The equivalent XPath 1.0 expression as an Unicode string.
  169. """
  170. tree = getattr(selector, 'parsed_tree', None)
  171. if not tree:
  172. raise TypeError('Expected a parsed selector, got %r' % (selector,))
  173. xpath = self.xpath(tree)
  174. assert isinstance(xpath, self.xpathexpr_cls) # help debug a missing 'return'
  175. if translate_pseudo_elements and selector.pseudo_element:
  176. xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element)
  177. return (prefix or '') + _unicode(xpath)
  178. def xpath_pseudo_element(self, xpath, pseudo_element):
  179. """Translate a pseudo-element.
  180. Defaults to not supporting pseudo-elements at all,
  181. but can be overridden by sub-classes.
  182. """
  183. raise ExpressionError('Pseudo-elements are not supported.')
  184. @staticmethod
  185. def xpath_literal(s):
  186. s = _unicode(s)
  187. if "'" not in s:
  188. s = "'%s'" % s
  189. elif '"' not in s:
  190. s = '"%s"' % s
  191. else:
  192. s = "concat(%s)" % ','.join([
  193. (("'" in part) and '"%s"' or "'%s'") % part
  194. for part in split_at_single_quotes(s) if part
  195. ])
  196. return s
  197. def xpath(self, parsed_selector):
  198. """Translate any parsed selector object."""
  199. type_name = type(parsed_selector).__name__
  200. method = getattr(self, 'xpath_%s' % type_name.lower(), None)
  201. if method is None:
  202. raise ExpressionError('%s is not supported.' % type_name)
  203. return method(parsed_selector)
  204. # Dispatched by parsed object type
  205. def xpath_combinedselector(self, combined):
  206. """Translate a combined selector."""
  207. combinator = self.combinator_mapping[combined.combinator]
  208. method = getattr(self, 'xpath_%s_combinator' % combinator)
  209. return method(self.xpath(combined.selector),
  210. self.xpath(combined.subselector))
  211. def xpath_negation(self, negation):
  212. xpath = self.xpath(negation.selector)
  213. sub_xpath = self.xpath(negation.subselector)
  214. sub_xpath.add_name_test()
  215. if sub_xpath.condition:
  216. return xpath.add_condition('not(%s)' % sub_xpath.condition)
  217. else:
  218. return xpath.add_condition('0')
  219. def xpath_function(self, function):
  220. """Translate a functional pseudo-class."""
  221. method = 'xpath_%s_function' % function.name.replace('-', '_')
  222. method = _unicode_safe_getattr(self, method, None)
  223. if not method:
  224. raise ExpressionError(
  225. "The pseudo-class :%s() is unknown" % function.name)
  226. return method(self.xpath(function.selector), function)
  227. def xpath_pseudo(self, pseudo):
  228. """Translate a pseudo-class."""
  229. method = 'xpath_%s_pseudo' % pseudo.ident.replace('-', '_')
  230. method = _unicode_safe_getattr(self, method, None)
  231. if not method:
  232. # TODO: better error message for pseudo-elements?
  233. raise ExpressionError(
  234. "The pseudo-class :%s is unknown" % pseudo.ident)
  235. return method(self.xpath(pseudo.selector))
  236. def xpath_attrib(self, selector):
  237. """Translate an attribute selector."""
  238. operator = self.attribute_operator_mapping[selector.operator]
  239. method = getattr(self, 'xpath_attrib_%s' % operator)
  240. if self.lower_case_attribute_names:
  241. name = selector.attrib.lower()
  242. else:
  243. name = selector.attrib
  244. safe = is_safe_name(name)
  245. if selector.namespace:
  246. name = '%s:%s' % (selector.namespace, name)
  247. safe = safe and is_safe_name(selector.namespace)
  248. if safe:
  249. attrib = '@' + name
  250. else:
  251. attrib = 'attribute::*[name() = %s]' % self.xpath_literal(name)
  252. if self.lower_case_attribute_values:
  253. value = selector.value.lower()
  254. else:
  255. value = selector.value
  256. return method(self.xpath(selector.selector), attrib, value)
  257. def xpath_class(self, class_selector):
  258. """Translate a class selector."""
  259. # .foo is defined as [class~=foo] in the spec.
  260. xpath = self.xpath(class_selector.selector)
  261. return self.xpath_attrib_includes(
  262. xpath, '@class', class_selector.class_name)
  263. def xpath_hash(self, id_selector):
  264. """Translate an ID selector."""
  265. xpath = self.xpath(id_selector.selector)
  266. return self.xpath_attrib_equals(xpath, '@id', id_selector.id)
  267. def xpath_element(self, selector):
  268. """Translate a type or universal selector."""
  269. element = selector.element
  270. if not element:
  271. element = '*'
  272. safe = True
  273. else:
  274. safe = is_safe_name(element)
  275. if self.lower_case_element_names:
  276. element = element.lower()
  277. if selector.namespace:
  278. # Namespace prefixes are case-sensitive.
  279. # http://www.w3.org/TR/css3-namespace/#prefixes
  280. element = '%s:%s' % (selector.namespace, element)
  281. safe = safe and is_safe_name(selector.namespace)
  282. xpath = self.xpathexpr_cls(element=element)
  283. if not safe:
  284. xpath.add_name_test()
  285. return xpath
  286. # CombinedSelector: dispatch by combinator
  287. def xpath_descendant_combinator(self, left, right):
  288. """right is a child, grand-child or further descendant of left"""
  289. return left.join('/descendant-or-self::*/', right)
  290. def xpath_child_combinator(self, left, right):
  291. """right is an immediate child of left"""
  292. return left.join('/', right)
  293. def xpath_direct_adjacent_combinator(self, left, right):
  294. """right is a sibling immediately after left"""
  295. xpath = left.join('/following-sibling::', right)
  296. xpath.add_name_test()
  297. return xpath.add_condition('position() = 1')
  298. def xpath_indirect_adjacent_combinator(self, left, right):
  299. """right is a sibling after left, immediately or not"""
  300. return left.join('/following-sibling::', right)
  301. # Function: dispatch by function/pseudo-class name
  302. def xpath_nth_child_function(self, xpath, function, last=False,
  303. add_name_test=True):
  304. try:
  305. a, b = parse_series(function.arguments)
  306. except ValueError:
  307. raise ExpressionError("Invalid series: '%r'" % function.arguments)
  308. # From https://www.w3.org/TR/css3-selectors/#structural-pseudos:
  309. #
  310. # :nth-child(an+b)
  311. # an+b-1 siblings before
  312. #
  313. # :nth-last-child(an+b)
  314. # an+b-1 siblings after
  315. #
  316. # :nth-of-type(an+b)
  317. # an+b-1 siblings with the same expanded element name before
  318. #
  319. # :nth-last-of-type(an+b)
  320. # an+b-1 siblings with the same expanded element name after
  321. #
  322. # So,
  323. # for :nth-child and :nth-of-type
  324. #
  325. # count(preceding-sibling::<nodetest>) = an+b-1
  326. #
  327. # for :nth-last-child and :nth-last-of-type
  328. #
  329. # count(following-sibling::<nodetest>) = an+b-1
  330. #
  331. # therefore,
  332. # count(...) - (b-1) ≡ 0 (mod a)
  333. #
  334. # if a == 0:
  335. # ~~~~~~~~~~
  336. # count(...) = b-1
  337. #
  338. # if a < 0:
  339. # ~~~~~~~~~
  340. # count(...) - b +1 <= 0
  341. # -> count(...) <= b-1
  342. #
  343. # if a > 0:
  344. # ~~~~~~~~~
  345. # count(...) - b +1 >= 0
  346. # -> count(...) >= b-1
  347. # work with b-1 instead
  348. b_min_1 = b - 1
  349. # early-exit condition 1:
  350. # ~~~~~~~~~~~~~~~~~~~~~~~
  351. # for a == 1, nth-*(an+b) means n+b-1 siblings before/after,
  352. # and since n ∈ {0, 1, 2, ...}, if b-1<=0,
  353. # there is always an "n" matching any number of siblings (maybe none)
  354. if a == 1 and b_min_1 <=0:
  355. return xpath
  356. # early-exit condition 2:
  357. # ~~~~~~~~~~~~~~~~~~~~~~~
  358. # an+b-1 siblings with a<0 and (b-1)<0 is not possible
  359. if a < 0 and b_min_1 < 0:
  360. return xpath.add_condition('0')
  361. # `add_name_test` boolean is inverted and somewhat counter-intuitive:
  362. #
  363. # nth_of_type() calls nth_child(add_name_test=False)
  364. if add_name_test:
  365. nodetest = '*'
  366. else:
  367. nodetest = '%s' % xpath.element
  368. # count siblings before or after the element
  369. if not last:
  370. siblings_count = 'count(preceding-sibling::%s)' % nodetest
  371. else:
  372. siblings_count = 'count(following-sibling::%s)' % nodetest
  373. # special case of fixed position: nth-*(0n+b)
  374. # if a == 0:
  375. # ~~~~~~~~~~
  376. # count(***-sibling::***) = b-1
  377. if a == 0:
  378. return xpath.add_condition('%s = %s' % (siblings_count, b_min_1))
  379. expr = []
  380. if a > 0:
  381. # siblings count, an+b-1, is always >= 0,
  382. # so if a>0, and (b-1)<=0, an "n" exists to satisfy this,
  383. # therefore, the predicate is only interesting if (b-1)>0
  384. if b_min_1 > 0:
  385. expr.append('%s >= %s' % (siblings_count, b_min_1))
  386. else:
  387. # if a<0, and (b-1)<0, no "n" satisfies this,
  388. # this is tested above as an early exist condition
  389. # otherwise,
  390. expr.append('%s <= %s' % (siblings_count, b_min_1))
  391. # operations modulo 1 or -1 are simpler, one only needs to verify:
  392. #
  393. # - either:
  394. # count(***-sibling::***) - (b-1) = n = 0, 1, 2, 3, etc.,
  395. # i.e. count(***-sibling::***) >= (b-1)
  396. #
  397. # - or:
  398. # count(***-sibling::***) - (b-1) = -n = 0, -1, -2, -3, etc.,
  399. # i.e. count(***-sibling::***) <= (b-1)
  400. # we we just did above.
  401. #
  402. if abs(a) != 1:
  403. # count(***-sibling::***) - (b-1) ≡ 0 (mod a)
  404. left = siblings_count
  405. # apply "modulo a" on 2nd term, -(b-1),
  406. # to simplify things like "(... +6) % -3",
  407. # and also make it positive with |a|
  408. b_neg = (-b_min_1) % abs(a)
  409. if b_neg != 0:
  410. b_neg = '+%s' % b_neg
  411. left = '(%s %s)' % (left, b_neg)
  412. expr.append('%s mod %s = 0' % (left, a))
  413. xpath.add_condition(' and '.join(expr))
  414. return xpath
  415. def xpath_nth_last_child_function(self, xpath, function):
  416. return self.xpath_nth_child_function(xpath, function, last=True)
  417. def xpath_nth_of_type_function(self, xpath, function):
  418. if xpath.element == '*':
  419. raise ExpressionError(
  420. "*:nth-of-type() is not implemented")
  421. return self.xpath_nth_child_function(xpath, function,
  422. add_name_test=False)
  423. def xpath_nth_last_of_type_function(self, xpath, function):
  424. if xpath.element == '*':
  425. raise ExpressionError(
  426. "*:nth-of-type() is not implemented")
  427. return self.xpath_nth_child_function(xpath, function, last=True,
  428. add_name_test=False)
  429. def xpath_contains_function(self, xpath, function):
  430. # Defined there, removed in later drafts:
  431. # http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors
  432. if function.argument_types() not in (['STRING'], ['IDENT']):
  433. raise ExpressionError(
  434. "Expected a single string or ident for :contains(), got %r"
  435. % function.arguments)
  436. value = function.arguments[0].value
  437. return xpath.add_condition(
  438. 'contains(., %s)' % self.xpath_literal(value))
  439. def xpath_lang_function(self, xpath, function):
  440. if function.argument_types() not in (['STRING'], ['IDENT']):
  441. raise ExpressionError(
  442. "Expected a single string or ident for :lang(), got %r"
  443. % function.arguments)
  444. value = function.arguments[0].value
  445. return xpath.add_condition(
  446. "lang(%s)" % (self.xpath_literal(value)))
  447. # Pseudo: dispatch by pseudo-class name
  448. def xpath_root_pseudo(self, xpath):
  449. return xpath.add_condition("not(parent::*)")
  450. def xpath_first_child_pseudo(self, xpath):
  451. return xpath.add_condition('count(preceding-sibling::*) = 0')
  452. def xpath_last_child_pseudo(self, xpath):
  453. return xpath.add_condition('count(following-sibling::*) = 0')
  454. def xpath_first_of_type_pseudo(self, xpath):
  455. if xpath.element == '*':
  456. raise ExpressionError(
  457. "*:first-of-type is not implemented")
  458. return xpath.add_condition('count(preceding-sibling::%s) = 0' % xpath.element)
  459. def xpath_last_of_type_pseudo(self, xpath):
  460. if xpath.element == '*':
  461. raise ExpressionError(
  462. "*:last-of-type is not implemented")
  463. return xpath.add_condition('count(following-sibling::%s) = 0' % xpath.element)
  464. def xpath_only_child_pseudo(self, xpath):
  465. return xpath.add_condition('count(parent::*/child::*) = 1')
  466. def xpath_only_of_type_pseudo(self, xpath):
  467. if xpath.element == '*':
  468. raise ExpressionError(
  469. "*:only-of-type is not implemented")
  470. return xpath.add_condition('count(parent::*/child::%s) = 1' % xpath.element)
  471. def xpath_empty_pseudo(self, xpath):
  472. return xpath.add_condition("not(*) and not(string-length())")
  473. def pseudo_never_matches(self, xpath):
  474. """Common implementation for pseudo-classes that never match."""
  475. return xpath.add_condition("0")
  476. xpath_link_pseudo = pseudo_never_matches
  477. xpath_visited_pseudo = pseudo_never_matches
  478. xpath_hover_pseudo = pseudo_never_matches
  479. xpath_active_pseudo = pseudo_never_matches
  480. xpath_focus_pseudo = pseudo_never_matches
  481. xpath_target_pseudo = pseudo_never_matches
  482. xpath_enabled_pseudo = pseudo_never_matches
  483. xpath_disabled_pseudo = pseudo_never_matches
  484. xpath_checked_pseudo = pseudo_never_matches
  485. # Attrib: dispatch by attribute operator
  486. def xpath_attrib_exists(self, xpath, name, value):
  487. assert not value
  488. xpath.add_condition(name)
  489. return xpath
  490. def xpath_attrib_equals(self, xpath, name, value):
  491. xpath.add_condition('%s = %s' % (name, self.xpath_literal(value)))
  492. return xpath
  493. def xpath_attrib_different(self, xpath, name, value):
  494. # FIXME: this seems like a weird hack...
  495. if value:
  496. xpath.add_condition('not(%s) or %s != %s'
  497. % (name, name, self.xpath_literal(value)))
  498. else:
  499. xpath.add_condition('%s != %s'
  500. % (name, self.xpath_literal(value)))
  501. return xpath
  502. def xpath_attrib_includes(self, xpath, name, value):
  503. if is_non_whitespace(value):
  504. xpath.add_condition(
  505. "%s and contains(concat(' ', normalize-space(%s), ' '), %s)"
  506. % (name, name, self.xpath_literal(' '+value+' ')))
  507. else:
  508. xpath.add_condition('0')
  509. return xpath
  510. def xpath_attrib_dashmatch(self, xpath, name, value):
  511. # Weird, but true...
  512. xpath.add_condition('%s and (%s = %s or starts-with(%s, %s))' % (
  513. name,
  514. name, self.xpath_literal(value),
  515. name, self.xpath_literal(value + '-')))
  516. return xpath
  517. def xpath_attrib_prefixmatch(self, xpath, name, value):
  518. if value:
  519. xpath.add_condition('%s and starts-with(%s, %s)' % (
  520. name, name, self.xpath_literal(value)))
  521. else:
  522. xpath.add_condition('0')
  523. return xpath
  524. def xpath_attrib_suffixmatch(self, xpath, name, value):
  525. if value:
  526. # Oddly there is a starts-with in XPath 1.0, but not ends-with
  527. xpath.add_condition(
  528. '%s and substring(%s, string-length(%s)-%s) = %s'
  529. % (name, name, name, len(value)-1, self.xpath_literal(value)))
  530. else:
  531. xpath.add_condition('0')
  532. return xpath
  533. def xpath_attrib_substringmatch(self, xpath, name, value):
  534. if value:
  535. # Attribute selectors are case sensitive
  536. xpath.add_condition('%s and contains(%s, %s)' % (
  537. name, name, self.xpath_literal(value)))
  538. else:
  539. xpath.add_condition('0')
  540. return xpath
  541. class HTMLTranslator(GenericTranslator):
  542. """
  543. Translator for (X)HTML documents.
  544. Has a more useful implementation of some pseudo-classes based on
  545. HTML-specific element names and attribute names, as described in
  546. the `HTML5 specification`_. It assumes no-quirks mode.
  547. The API is the same as :class:`GenericTranslator`.
  548. .. _HTML5 specification: http://www.w3.org/TR/html5/links.html#selectors
  549. :param xhtml:
  550. If false (the default), element names and attribute names
  551. are case-insensitive.
  552. """
  553. lang_attribute = 'lang'
  554. def __init__(self, xhtml=False):
  555. self.xhtml = xhtml # Might be useful for sub-classes?
  556. if not xhtml:
  557. # See their definition in GenericTranslator.
  558. self.lower_case_element_names = True
  559. self.lower_case_attribute_names = True
  560. def xpath_checked_pseudo(self, xpath):
  561. # FIXME: is this really all the elements?
  562. return xpath.add_condition(
  563. "(@selected and name(.) = 'option') or "
  564. "(@checked "
  565. "and (name(.) = 'input' or name(.) = 'command')"
  566. "and (@type = 'checkbox' or @type = 'radio'))")
  567. def xpath_lang_function(self, xpath, function):
  568. if function.argument_types() not in (['STRING'], ['IDENT']):
  569. raise ExpressionError(
  570. "Expected a single string or ident for :lang(), got %r"
  571. % function.arguments)
  572. value = function.arguments[0].value
  573. return xpath.add_condition(
  574. "ancestor-or-self::*[@lang][1][starts-with(concat("
  575. # XPath 1.0 has no lower-case function...
  576. "translate(@%s, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', "
  577. "'abcdefghijklmnopqrstuvwxyz'), "
  578. "'-'), %s)]"
  579. % (self.lang_attribute, self.xpath_literal(value.lower() + '-')))
  580. def xpath_link_pseudo(self, xpath):
  581. return xpath.add_condition("@href and "
  582. "(name(.) = 'a' or name(.) = 'link' or name(.) = 'area')")
  583. # Links are never visited, the implementation for :visited is the same
  584. # as in GenericTranslator
  585. def xpath_disabled_pseudo(self, xpath):
  586. # http://www.w3.org/TR/html5/section-index.html#attributes-1
  587. return xpath.add_condition('''
  588. (
  589. @disabled and
  590. (
  591. (name(.) = 'input' and @type != 'hidden') or
  592. name(.) = 'button' or
  593. name(.) = 'select' or
  594. name(.) = 'textarea' or
  595. name(.) = 'command' or
  596. name(.) = 'fieldset' or
  597. name(.) = 'optgroup' or
  598. name(.) = 'option'
  599. )
  600. ) or (
  601. (
  602. (name(.) = 'input' and @type != 'hidden') or
  603. name(.) = 'button' or
  604. name(.) = 'select' or
  605. name(.) = 'textarea'
  606. )
  607. and ancestor::fieldset[@disabled]
  608. )
  609. ''')
  610. # FIXME: in the second half, add "and is not a descendant of that
  611. # fieldset element's first legend element child, if any."
  612. def xpath_enabled_pseudo(self, xpath):
  613. # http://www.w3.org/TR/html5/section-index.html#attributes-1
  614. return xpath.add_condition('''
  615. (
  616. @href and (
  617. name(.) = 'a' or
  618. name(.) = 'link' or
  619. name(.) = 'area'
  620. )
  621. ) or (
  622. (
  623. name(.) = 'command' or
  624. name(.) = 'fieldset' or
  625. name(.) = 'optgroup'
  626. )
  627. and not(@disabled)
  628. ) or (
  629. (
  630. (name(.) = 'input' and @type != 'hidden') or
  631. name(.) = 'button' or
  632. name(.) = 'select' or
  633. name(.) = 'textarea' or
  634. name(.) = 'keygen'
  635. )
  636. and not (@disabled or ancestor::fieldset[@disabled])
  637. ) or (
  638. name(.) = 'option' and not(
  639. @disabled or ancestor::optgroup[@disabled]
  640. )
  641. )
  642. ''')
  643. # FIXME: ... or "li elements that are children of menu elements,
  644. # and that have a child element that defines a command, if the first
  645. # such element's Disabled State facet is false (not disabled)".
  646. # FIXME: after ancestor::fieldset[@disabled], add "and is not a
  647. # descendant of that fieldset element's first legend element child,
  648. # if any."