You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

589 lines
25 KiB

4 years ago
  1. # The following YAML grammar is LL(1) and is parsed by a recursive descent
  2. # parser.
  3. #
  4. # stream ::= STREAM-START implicit_document? explicit_document* STREAM-END
  5. # implicit_document ::= block_node DOCUMENT-END*
  6. # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*
  7. # block_node_or_indentless_sequence ::=
  8. # ALIAS
  9. # | properties (block_content | indentless_block_sequence)?
  10. # | block_content
  11. # | indentless_block_sequence
  12. # block_node ::= ALIAS
  13. # | properties block_content?
  14. # | block_content
  15. # flow_node ::= ALIAS
  16. # | properties flow_content?
  17. # | flow_content
  18. # properties ::= TAG ANCHOR? | ANCHOR TAG?
  19. # block_content ::= block_collection | flow_collection | SCALAR
  20. # flow_content ::= flow_collection | SCALAR
  21. # block_collection ::= block_sequence | block_mapping
  22. # flow_collection ::= flow_sequence | flow_mapping
  23. # block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END
  24. # indentless_sequence ::= (BLOCK-ENTRY block_node?)+
  25. # block_mapping ::= BLOCK-MAPPING_START
  26. # ((KEY block_node_or_indentless_sequence?)?
  27. # (VALUE block_node_or_indentless_sequence?)?)*
  28. # BLOCK-END
  29. # flow_sequence ::= FLOW-SEQUENCE-START
  30. # (flow_sequence_entry FLOW-ENTRY)*
  31. # flow_sequence_entry?
  32. # FLOW-SEQUENCE-END
  33. # flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
  34. # flow_mapping ::= FLOW-MAPPING-START
  35. # (flow_mapping_entry FLOW-ENTRY)*
  36. # flow_mapping_entry?
  37. # FLOW-MAPPING-END
  38. # flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
  39. #
  40. # FIRST sets:
  41. #
  42. # stream: { STREAM-START }
  43. # explicit_document: { DIRECTIVE DOCUMENT-START }
  44. # implicit_document: FIRST(block_node)
  45. # block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START }
  46. # flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START }
  47. # block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
  48. # flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
  49. # block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START }
  50. # flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
  51. # block_sequence: { BLOCK-SEQUENCE-START }
  52. # block_mapping: { BLOCK-MAPPING-START }
  53. # block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START BLOCK-ENTRY }
  54. # indentless_sequence: { ENTRY }
  55. # flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
  56. # flow_sequence: { FLOW-SEQUENCE-START }
  57. # flow_mapping: { FLOW-MAPPING-START }
  58. # flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY }
  59. # flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY }
  60. __all__ = ['Parser', 'ParserError']
  61. from .error import MarkedYAMLError
  62. from .tokens import *
  63. from .events import *
  64. from .scanner import *
  65. class ParserError(MarkedYAMLError):
  66. pass
  67. class Parser:
  68. # Since writing a recursive-descendant parser is a straightforward task, we
  69. # do not give many comments here.
  70. DEFAULT_TAGS = {
  71. '!': '!',
  72. '!!': 'tag:yaml.org,2002:',
  73. }
  74. def __init__(self):
  75. self.current_event = None
  76. self.yaml_version = None
  77. self.tag_handles = {}
  78. self.states = []
  79. self.marks = []
  80. self.state = self.parse_stream_start
  81. def dispose(self):
  82. # Reset the state attributes (to clear self-references)
  83. self.states = []
  84. self.state = None
  85. def check_event(self, *choices):
  86. # Check the type of the next event.
  87. if self.current_event is None:
  88. if self.state:
  89. self.current_event = self.state()
  90. if self.current_event is not None:
  91. if not choices:
  92. return True
  93. for choice in choices:
  94. if isinstance(self.current_event, choice):
  95. return True
  96. return False
  97. def peek_event(self):
  98. # Get the next event.
  99. if self.current_event is None:
  100. if self.state:
  101. self.current_event = self.state()
  102. return self.current_event
  103. def get_event(self):
  104. # Get the next event and proceed further.
  105. if self.current_event is None:
  106. if self.state:
  107. self.current_event = self.state()
  108. value = self.current_event
  109. self.current_event = None
  110. return value
  111. # stream ::= STREAM-START implicit_document? explicit_document* STREAM-END
  112. # implicit_document ::= block_node DOCUMENT-END*
  113. # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*
  114. def parse_stream_start(self):
  115. # Parse the stream start.
  116. token = self.get_token()
  117. event = StreamStartEvent(token.start_mark, token.end_mark,
  118. encoding=token.encoding)
  119. # Prepare the next state.
  120. self.state = self.parse_implicit_document_start
  121. return event
  122. def parse_implicit_document_start(self):
  123. # Parse an implicit document.
  124. if not self.check_token(DirectiveToken, DocumentStartToken,
  125. StreamEndToken):
  126. self.tag_handles = self.DEFAULT_TAGS
  127. token = self.peek_token()
  128. start_mark = end_mark = token.start_mark
  129. event = DocumentStartEvent(start_mark, end_mark,
  130. explicit=False)
  131. # Prepare the next state.
  132. self.states.append(self.parse_document_end)
  133. self.state = self.parse_block_node
  134. return event
  135. else:
  136. return self.parse_document_start()
  137. def parse_document_start(self):
  138. # Parse any extra document end indicators.
  139. while self.check_token(DocumentEndToken):
  140. self.get_token()
  141. # Parse an explicit document.
  142. if not self.check_token(StreamEndToken):
  143. token = self.peek_token()
  144. start_mark = token.start_mark
  145. version, tags = self.process_directives()
  146. if not self.check_token(DocumentStartToken):
  147. raise ParserError(None, None,
  148. "expected '<document start>', but found %r"
  149. % self.peek_token().id,
  150. self.peek_token().start_mark)
  151. token = self.get_token()
  152. end_mark = token.end_mark
  153. event = DocumentStartEvent(start_mark, end_mark,
  154. explicit=True, version=version, tags=tags)
  155. self.states.append(self.parse_document_end)
  156. self.state = self.parse_document_content
  157. else:
  158. # Parse the end of the stream.
  159. token = self.get_token()
  160. event = StreamEndEvent(token.start_mark, token.end_mark)
  161. assert not self.states
  162. assert not self.marks
  163. self.state = None
  164. return event
  165. def parse_document_end(self):
  166. # Parse the document end.
  167. token = self.peek_token()
  168. start_mark = end_mark = token.start_mark
  169. explicit = False
  170. if self.check_token(DocumentEndToken):
  171. token = self.get_token()
  172. end_mark = token.end_mark
  173. explicit = True
  174. event = DocumentEndEvent(start_mark, end_mark,
  175. explicit=explicit)
  176. # Prepare the next state.
  177. self.state = self.parse_document_start
  178. return event
  179. def parse_document_content(self):
  180. if self.check_token(DirectiveToken,
  181. DocumentStartToken, DocumentEndToken, StreamEndToken):
  182. event = self.process_empty_scalar(self.peek_token().start_mark)
  183. self.state = self.states.pop()
  184. return event
  185. else:
  186. return self.parse_block_node()
  187. def process_directives(self):
  188. self.yaml_version = None
  189. self.tag_handles = {}
  190. while self.check_token(DirectiveToken):
  191. token = self.get_token()
  192. if token.name == 'YAML':
  193. if self.yaml_version is not None:
  194. raise ParserError(None, None,
  195. "found duplicate YAML directive", token.start_mark)
  196. major, minor = token.value
  197. if major != 1:
  198. raise ParserError(None, None,
  199. "found incompatible YAML document (version 1.* is required)",
  200. token.start_mark)
  201. self.yaml_version = token.value
  202. elif token.name == 'TAG':
  203. handle, prefix = token.value
  204. if handle in self.tag_handles:
  205. raise ParserError(None, None,
  206. "duplicate tag handle %r" % handle,
  207. token.start_mark)
  208. self.tag_handles[handle] = prefix
  209. if self.tag_handles:
  210. value = self.yaml_version, self.tag_handles.copy()
  211. else:
  212. value = self.yaml_version, None
  213. for key in self.DEFAULT_TAGS:
  214. if key not in self.tag_handles:
  215. self.tag_handles[key] = self.DEFAULT_TAGS[key]
  216. return value
  217. # block_node_or_indentless_sequence ::= ALIAS
  218. # | properties (block_content | indentless_block_sequence)?
  219. # | block_content
  220. # | indentless_block_sequence
  221. # block_node ::= ALIAS
  222. # | properties block_content?
  223. # | block_content
  224. # flow_node ::= ALIAS
  225. # | properties flow_content?
  226. # | flow_content
  227. # properties ::= TAG ANCHOR? | ANCHOR TAG?
  228. # block_content ::= block_collection | flow_collection | SCALAR
  229. # flow_content ::= flow_collection | SCALAR
  230. # block_collection ::= block_sequence | block_mapping
  231. # flow_collection ::= flow_sequence | flow_mapping
  232. def parse_block_node(self):
  233. return self.parse_node(block=True)
  234. def parse_flow_node(self):
  235. return self.parse_node()
  236. def parse_block_node_or_indentless_sequence(self):
  237. return self.parse_node(block=True, indentless_sequence=True)
  238. def parse_node(self, block=False, indentless_sequence=False):
  239. if self.check_token(AliasToken):
  240. token = self.get_token()
  241. event = AliasEvent(token.value, token.start_mark, token.end_mark)
  242. self.state = self.states.pop()
  243. else:
  244. anchor = None
  245. tag = None
  246. start_mark = end_mark = tag_mark = None
  247. if self.check_token(AnchorToken):
  248. token = self.get_token()
  249. start_mark = token.start_mark
  250. end_mark = token.end_mark
  251. anchor = token.value
  252. if self.check_token(TagToken):
  253. token = self.get_token()
  254. tag_mark = token.start_mark
  255. end_mark = token.end_mark
  256. tag = token.value
  257. elif self.check_token(TagToken):
  258. token = self.get_token()
  259. start_mark = tag_mark = token.start_mark
  260. end_mark = token.end_mark
  261. tag = token.value
  262. if self.check_token(AnchorToken):
  263. token = self.get_token()
  264. end_mark = token.end_mark
  265. anchor = token.value
  266. if tag is not None:
  267. handle, suffix = tag
  268. if handle is not None:
  269. if handle not in self.tag_handles:
  270. raise ParserError("while parsing a node", start_mark,
  271. "found undefined tag handle %r" % handle,
  272. tag_mark)
  273. tag = self.tag_handles[handle]+suffix
  274. else:
  275. tag = suffix
  276. #if tag == '!':
  277. # raise ParserError("while parsing a node", start_mark,
  278. # "found non-specific tag '!'", tag_mark,
  279. # "Please check 'http://pyyaml.org/wiki/YAMLNonSpecificTag' and share your opinion.")
  280. if start_mark is None:
  281. start_mark = end_mark = self.peek_token().start_mark
  282. event = None
  283. implicit = (tag is None or tag == '!')
  284. if indentless_sequence and self.check_token(BlockEntryToken):
  285. end_mark = self.peek_token().end_mark
  286. event = SequenceStartEvent(anchor, tag, implicit,
  287. start_mark, end_mark)
  288. self.state = self.parse_indentless_sequence_entry
  289. else:
  290. if self.check_token(ScalarToken):
  291. token = self.get_token()
  292. end_mark = token.end_mark
  293. if (token.plain and tag is None) or tag == '!':
  294. implicit = (True, False)
  295. elif tag is None:
  296. implicit = (False, True)
  297. else:
  298. implicit = (False, False)
  299. event = ScalarEvent(anchor, tag, implicit, token.value,
  300. start_mark, end_mark, style=token.style)
  301. self.state = self.states.pop()
  302. elif self.check_token(FlowSequenceStartToken):
  303. end_mark = self.peek_token().end_mark
  304. event = SequenceStartEvent(anchor, tag, implicit,
  305. start_mark, end_mark, flow_style=True)
  306. self.state = self.parse_flow_sequence_first_entry
  307. elif self.check_token(FlowMappingStartToken):
  308. end_mark = self.peek_token().end_mark
  309. event = MappingStartEvent(anchor, tag, implicit,
  310. start_mark, end_mark, flow_style=True)
  311. self.state = self.parse_flow_mapping_first_key
  312. elif block and self.check_token(BlockSequenceStartToken):
  313. end_mark = self.peek_token().start_mark
  314. event = SequenceStartEvent(anchor, tag, implicit,
  315. start_mark, end_mark, flow_style=False)
  316. self.state = self.parse_block_sequence_first_entry
  317. elif block and self.check_token(BlockMappingStartToken):
  318. end_mark = self.peek_token().start_mark
  319. event = MappingStartEvent(anchor, tag, implicit,
  320. start_mark, end_mark, flow_style=False)
  321. self.state = self.parse_block_mapping_first_key
  322. elif anchor is not None or tag is not None:
  323. # Empty scalars are allowed even if a tag or an anchor is
  324. # specified.
  325. event = ScalarEvent(anchor, tag, (implicit, False), '',
  326. start_mark, end_mark)
  327. self.state = self.states.pop()
  328. else:
  329. if block:
  330. node = 'block'
  331. else:
  332. node = 'flow'
  333. token = self.peek_token()
  334. raise ParserError("while parsing a %s node" % node, start_mark,
  335. "expected the node content, but found %r" % token.id,
  336. token.start_mark)
  337. return event
  338. # block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END
  339. def parse_block_sequence_first_entry(self):
  340. token = self.get_token()
  341. self.marks.append(token.start_mark)
  342. return self.parse_block_sequence_entry()
  343. def parse_block_sequence_entry(self):
  344. if self.check_token(BlockEntryToken):
  345. token = self.get_token()
  346. if not self.check_token(BlockEntryToken, BlockEndToken):
  347. self.states.append(self.parse_block_sequence_entry)
  348. return self.parse_block_node()
  349. else:
  350. self.state = self.parse_block_sequence_entry
  351. return self.process_empty_scalar(token.end_mark)
  352. if not self.check_token(BlockEndToken):
  353. token = self.peek_token()
  354. raise ParserError("while parsing a block collection", self.marks[-1],
  355. "expected <block end>, but found %r" % token.id, token.start_mark)
  356. token = self.get_token()
  357. event = SequenceEndEvent(token.start_mark, token.end_mark)
  358. self.state = self.states.pop()
  359. self.marks.pop()
  360. return event
  361. # indentless_sequence ::= (BLOCK-ENTRY block_node?)+
  362. def parse_indentless_sequence_entry(self):
  363. if self.check_token(BlockEntryToken):
  364. token = self.get_token()
  365. if not self.check_token(BlockEntryToken,
  366. KeyToken, ValueToken, BlockEndToken):
  367. self.states.append(self.parse_indentless_sequence_entry)
  368. return self.parse_block_node()
  369. else:
  370. self.state = self.parse_indentless_sequence_entry
  371. return self.process_empty_scalar(token.end_mark)
  372. token = self.peek_token()
  373. event = SequenceEndEvent(token.start_mark, token.start_mark)
  374. self.state = self.states.pop()
  375. return event
  376. # block_mapping ::= BLOCK-MAPPING_START
  377. # ((KEY block_node_or_indentless_sequence?)?
  378. # (VALUE block_node_or_indentless_sequence?)?)*
  379. # BLOCK-END
  380. def parse_block_mapping_first_key(self):
  381. token = self.get_token()
  382. self.marks.append(token.start_mark)
  383. return self.parse_block_mapping_key()
  384. def parse_block_mapping_key(self):
  385. if self.check_token(KeyToken):
  386. token = self.get_token()
  387. if not self.check_token(KeyToken, ValueToken, BlockEndToken):
  388. self.states.append(self.parse_block_mapping_value)
  389. return self.parse_block_node_or_indentless_sequence()
  390. else:
  391. self.state = self.parse_block_mapping_value
  392. return self.process_empty_scalar(token.end_mark)
  393. if not self.check_token(BlockEndToken):
  394. token = self.peek_token()
  395. raise ParserError("while parsing a block mapping", self.marks[-1],
  396. "expected <block end>, but found %r" % token.id, token.start_mark)
  397. token = self.get_token()
  398. event = MappingEndEvent(token.start_mark, token.end_mark)
  399. self.state = self.states.pop()
  400. self.marks.pop()
  401. return event
  402. def parse_block_mapping_value(self):
  403. if self.check_token(ValueToken):
  404. token = self.get_token()
  405. if not self.check_token(KeyToken, ValueToken, BlockEndToken):
  406. self.states.append(self.parse_block_mapping_key)
  407. return self.parse_block_node_or_indentless_sequence()
  408. else:
  409. self.state = self.parse_block_mapping_key
  410. return self.process_empty_scalar(token.end_mark)
  411. else:
  412. self.state = self.parse_block_mapping_key
  413. token = self.peek_token()
  414. return self.process_empty_scalar(token.start_mark)
  415. # flow_sequence ::= FLOW-SEQUENCE-START
  416. # (flow_sequence_entry FLOW-ENTRY)*
  417. # flow_sequence_entry?
  418. # FLOW-SEQUENCE-END
  419. # flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
  420. #
  421. # Note that while production rules for both flow_sequence_entry and
  422. # flow_mapping_entry are equal, their interpretations are different.
  423. # For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?`
  424. # generate an inline mapping (set syntax).
  425. def parse_flow_sequence_first_entry(self):
  426. token = self.get_token()
  427. self.marks.append(token.start_mark)
  428. return self.parse_flow_sequence_entry(first=True)
  429. def parse_flow_sequence_entry(self, first=False):
  430. if not self.check_token(FlowSequenceEndToken):
  431. if not first:
  432. if self.check_token(FlowEntryToken):
  433. self.get_token()
  434. else:
  435. token = self.peek_token()
  436. raise ParserError("while parsing a flow sequence", self.marks[-1],
  437. "expected ',' or ']', but got %r" % token.id, token.start_mark)
  438. if self.check_token(KeyToken):
  439. token = self.peek_token()
  440. event = MappingStartEvent(None, None, True,
  441. token.start_mark, token.end_mark,
  442. flow_style=True)
  443. self.state = self.parse_flow_sequence_entry_mapping_key
  444. return event
  445. elif not self.check_token(FlowSequenceEndToken):
  446. self.states.append(self.parse_flow_sequence_entry)
  447. return self.parse_flow_node()
  448. token = self.get_token()
  449. event = SequenceEndEvent(token.start_mark, token.end_mark)
  450. self.state = self.states.pop()
  451. self.marks.pop()
  452. return event
  453. def parse_flow_sequence_entry_mapping_key(self):
  454. token = self.get_token()
  455. if not self.check_token(ValueToken,
  456. FlowEntryToken, FlowSequenceEndToken):
  457. self.states.append(self.parse_flow_sequence_entry_mapping_value)
  458. return self.parse_flow_node()
  459. else:
  460. self.state = self.parse_flow_sequence_entry_mapping_value
  461. return self.process_empty_scalar(token.end_mark)
  462. def parse_flow_sequence_entry_mapping_value(self):
  463. if self.check_token(ValueToken):
  464. token = self.get_token()
  465. if not self.check_token(FlowEntryToken, FlowSequenceEndToken):
  466. self.states.append(self.parse_flow_sequence_entry_mapping_end)
  467. return self.parse_flow_node()
  468. else:
  469. self.state = self.parse_flow_sequence_entry_mapping_end
  470. return self.process_empty_scalar(token.end_mark)
  471. else:
  472. self.state = self.parse_flow_sequence_entry_mapping_end
  473. token = self.peek_token()
  474. return self.process_empty_scalar(token.start_mark)
  475. def parse_flow_sequence_entry_mapping_end(self):
  476. self.state = self.parse_flow_sequence_entry
  477. token = self.peek_token()
  478. return MappingEndEvent(token.start_mark, token.start_mark)
  479. # flow_mapping ::= FLOW-MAPPING-START
  480. # (flow_mapping_entry FLOW-ENTRY)*
  481. # flow_mapping_entry?
  482. # FLOW-MAPPING-END
  483. # flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
  484. def parse_flow_mapping_first_key(self):
  485. token = self.get_token()
  486. self.marks.append(token.start_mark)
  487. return self.parse_flow_mapping_key(first=True)
  488. def parse_flow_mapping_key(self, first=False):
  489. if not self.check_token(FlowMappingEndToken):
  490. if not first:
  491. if self.check_token(FlowEntryToken):
  492. self.get_token()
  493. else:
  494. token = self.peek_token()
  495. raise ParserError("while parsing a flow mapping", self.marks[-1],
  496. "expected ',' or '}', but got %r" % token.id, token.start_mark)
  497. if self.check_token(KeyToken):
  498. token = self.get_token()
  499. if not self.check_token(ValueToken,
  500. FlowEntryToken, FlowMappingEndToken):
  501. self.states.append(self.parse_flow_mapping_value)
  502. return self.parse_flow_node()
  503. else:
  504. self.state = self.parse_flow_mapping_value
  505. return self.process_empty_scalar(token.end_mark)
  506. elif not self.check_token(FlowMappingEndToken):
  507. self.states.append(self.parse_flow_mapping_empty_value)
  508. return self.parse_flow_node()
  509. token = self.get_token()
  510. event = MappingEndEvent(token.start_mark, token.end_mark)
  511. self.state = self.states.pop()
  512. self.marks.pop()
  513. return event
  514. def parse_flow_mapping_value(self):
  515. if self.check_token(ValueToken):
  516. token = self.get_token()
  517. if not self.check_token(FlowEntryToken, FlowMappingEndToken):
  518. self.states.append(self.parse_flow_mapping_key)
  519. return self.parse_flow_node()
  520. else:
  521. self.state = self.parse_flow_mapping_key
  522. return self.process_empty_scalar(token.end_mark)
  523. else:
  524. self.state = self.parse_flow_mapping_key
  525. token = self.peek_token()
  526. return self.process_empty_scalar(token.start_mark)
  527. def parse_flow_mapping_empty_value(self):
  528. self.state = self.parse_flow_mapping_key
  529. return self.process_empty_scalar(self.peek_token().start_mark)
  530. def process_empty_scalar(self, mark):
  531. return ScalarEvent(None, None, (True, False), '', mark, mark)