You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

208 lines
6.9 KiB

4 years ago
  1. # Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
  2. # Licensed to PSF under a Contributor Agreement.
  3. # Modifications:
  4. # Copyright David Halter and Contributors
  5. # Modifications are dual-licensed: MIT and PSF.
  6. # 99% of the code is different from pgen2, now.
  7. """
  8. The ``Parser`` tries to convert the available Python code in an easy to read
  9. format, something like an abstract syntax tree. The classes who represent this
  10. tree, are sitting in the :mod:`parso.tree` module.
  11. The Python module ``tokenize`` is a very important part in the ``Parser``,
  12. because it splits the code into different words (tokens). Sometimes it looks a
  13. bit messy. Sorry for that! You might ask now: "Why didn't you use the ``ast``
  14. module for this? Well, ``ast`` does a very good job understanding proper Python
  15. code, but fails to work as soon as there's a single line of broken code.
  16. There's one important optimization that needs to be known: Statements are not
  17. being parsed completely. ``Statement`` is just a representation of the tokens
  18. within the statement. This lowers memory usage and cpu time and reduces the
  19. complexity of the ``Parser`` (there's another parser sitting inside
  20. ``Statement``, which produces ``Array`` and ``Call``).
  21. """
  22. from parso import tree
  23. from parso.pgen2.generator import ReservedString
  24. class ParserSyntaxError(Exception):
  25. """
  26. Contains error information about the parser tree.
  27. May be raised as an exception.
  28. """
  29. def __init__(self, message, error_leaf):
  30. self.message = message
  31. self.error_leaf = error_leaf
  32. class InternalParseError(Exception):
  33. """
  34. Exception to signal the parser is stuck and error recovery didn't help.
  35. Basically this shouldn't happen. It's a sign that something is really
  36. wrong.
  37. """
  38. def __init__(self, msg, type_, value, start_pos):
  39. Exception.__init__(self, "%s: type=%r, value=%r, start_pos=%r" %
  40. (msg, type_.name, value, start_pos))
  41. self.msg = msg
  42. self.type = type
  43. self.value = value
  44. self.start_pos = start_pos
  45. class Stack(list):
  46. def _allowed_transition_names_and_token_types(self):
  47. def iterate():
  48. # An API just for Jedi.
  49. for stack_node in reversed(self):
  50. for transition in stack_node.dfa.transitions:
  51. if isinstance(transition, ReservedString):
  52. yield transition.value
  53. else:
  54. yield transition # A token type
  55. if not stack_node.dfa.is_final:
  56. break
  57. return list(iterate())
  58. class StackNode(object):
  59. def __init__(self, dfa):
  60. self.dfa = dfa
  61. self.nodes = []
  62. @property
  63. def nonterminal(self):
  64. return self.dfa.from_rule
  65. def __repr__(self):
  66. return '%s(%s, %s)' % (self.__class__.__name__, self.dfa, self.nodes)
  67. def _token_to_transition(grammar, type_, value):
  68. # Map from token to label
  69. if type_.contains_syntax:
  70. # Check for reserved words (keywords)
  71. try:
  72. return grammar.reserved_syntax_strings[value]
  73. except KeyError:
  74. pass
  75. return type_
  76. class BaseParser(object):
  77. """Parser engine.
  78. A Parser instance contains state pertaining to the current token
  79. sequence, and should not be used concurrently by different threads
  80. to parse separate token sequences.
  81. See python/tokenize.py for how to get input tokens by a string.
  82. When a syntax error occurs, error_recovery() is called.
  83. """
  84. node_map = {}
  85. default_node = tree.Node
  86. leaf_map = {
  87. }
  88. default_leaf = tree.Leaf
  89. def __init__(self, pgen_grammar, start_nonterminal='file_input', error_recovery=False):
  90. self._pgen_grammar = pgen_grammar
  91. self._start_nonterminal = start_nonterminal
  92. self._error_recovery = error_recovery
  93. def parse(self, tokens):
  94. first_dfa = self._pgen_grammar.nonterminal_to_dfas[self._start_nonterminal][0]
  95. self.stack = Stack([StackNode(first_dfa)])
  96. for token in tokens:
  97. self._add_token(token)
  98. while True:
  99. tos = self.stack[-1]
  100. if not tos.dfa.is_final:
  101. # We never broke out -- EOF is too soon -- Unfinished statement.
  102. # However, the error recovery might have added the token again, if
  103. # the stack is empty, we're fine.
  104. raise InternalParseError(
  105. "incomplete input", token.type, token.value, token.start_pos
  106. )
  107. if len(self.stack) > 1:
  108. self._pop()
  109. else:
  110. return self.convert_node(tos.nonterminal, tos.nodes)
  111. def error_recovery(self, token):
  112. if self._error_recovery:
  113. raise NotImplementedError("Error Recovery is not implemented")
  114. else:
  115. type_, value, start_pos, prefix = token
  116. error_leaf = tree.ErrorLeaf(type_, value, start_pos, prefix)
  117. raise ParserSyntaxError('SyntaxError: invalid syntax', error_leaf)
  118. def convert_node(self, nonterminal, children):
  119. try:
  120. return self.node_map[nonterminal](children)
  121. except KeyError:
  122. return self.default_node(nonterminal, children)
  123. def convert_leaf(self, type_, value, prefix, start_pos):
  124. try:
  125. return self.leaf_map[type_](value, start_pos, prefix)
  126. except KeyError:
  127. return self.default_leaf(value, start_pos, prefix)
  128. def _add_token(self, token):
  129. """
  130. This is the only core function for parsing. Here happens basically
  131. everything. Everything is well prepared by the parser generator and we
  132. only apply the necessary steps here.
  133. """
  134. grammar = self._pgen_grammar
  135. stack = self.stack
  136. type_, value, start_pos, prefix = token
  137. transition = _token_to_transition(grammar, type_, value)
  138. while True:
  139. try:
  140. plan = stack[-1].dfa.transitions[transition]
  141. break
  142. except KeyError:
  143. if stack[-1].dfa.is_final:
  144. self._pop()
  145. else:
  146. self.error_recovery(token)
  147. return
  148. except IndexError:
  149. raise InternalParseError("too much input", type_, value, start_pos)
  150. stack[-1].dfa = plan.next_dfa
  151. for push in plan.dfa_pushes:
  152. stack.append(StackNode(push))
  153. leaf = self.convert_leaf(type_, value, prefix, start_pos)
  154. stack[-1].nodes.append(leaf)
  155. def _pop(self):
  156. tos = self.stack.pop()
  157. # If there's exactly one child, return that child instead of
  158. # creating a new node. We still create expr_stmt and
  159. # file_input though, because a lot of Jedi depends on its
  160. # logic.
  161. if len(tos.nodes) == 1:
  162. new_node = tos.nodes[0]
  163. else:
  164. new_node = self.convert_node(tos.dfa.from_rule, tos.nodes)
  165. self.stack[-1].nodes.append(new_node)