You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

250 lines
9.7 KiB

4 years ago
  1. import hashlib
  2. import os
  3. from parso._compatibility import FileNotFoundError, is_pypy
  4. from parso.pgen2 import generate_grammar
  5. from parso.utils import split_lines, python_bytes_to_unicode, parse_version_string
  6. from parso.python.diff import DiffParser
  7. from parso.python.tokenize import tokenize_lines, tokenize
  8. from parso.python.token import PythonTokenTypes
  9. from parso.cache import parser_cache, load_module, save_module
  10. from parso.parser import BaseParser
  11. from parso.python.parser import Parser as PythonParser
  12. from parso.python.errors import ErrorFinderConfig
  13. from parso.python import pep8
  14. _loaded_grammars = {}
  15. class Grammar(object):
  16. """
  17. :py:func:`parso.load_grammar` returns instances of this class.
  18. Creating custom none-python grammars by calling this is not supported, yet.
  19. """
  20. #:param text: A BNF representation of your grammar.
  21. _error_normalizer_config = None
  22. _token_namespace = None
  23. _default_normalizer_config = pep8.PEP8NormalizerConfig()
  24. def __init__(self, text, tokenizer, parser=BaseParser, diff_parser=None):
  25. self._pgen_grammar = generate_grammar(
  26. text,
  27. token_namespace=self._get_token_namespace()
  28. )
  29. self._parser = parser
  30. self._tokenizer = tokenizer
  31. self._diff_parser = diff_parser
  32. self._hashed = hashlib.sha256(text.encode("utf-8")).hexdigest()
  33. def parse(self, code=None, **kwargs):
  34. """
  35. If you want to parse a Python file you want to start here, most likely.
  36. If you need finer grained control over the parsed instance, there will be
  37. other ways to access it.
  38. :param str code: A unicode or bytes string. When it's not possible to
  39. decode bytes to a string, returns a
  40. :py:class:`UnicodeDecodeError`.
  41. :param bool error_recovery: If enabled, any code will be returned. If
  42. it is invalid, it will be returned as an error node. If disabled,
  43. you will get a ParseError when encountering syntax errors in your
  44. code.
  45. :param str start_symbol: The grammar rule (nonterminal) that you want
  46. to parse. Only allowed to be used when error_recovery is False.
  47. :param str path: The path to the file you want to open. Only needed for caching.
  48. :param bool cache: Keeps a copy of the parser tree in RAM and on disk
  49. if a path is given. Returns the cached trees if the corresponding
  50. files on disk have not changed.
  51. :param bool diff_cache: Diffs the cached python module against the new
  52. code and tries to parse only the parts that have changed. Returns
  53. the same (changed) module that is found in cache. Using this option
  54. requires you to not do anything anymore with the cached modules
  55. under that path, because the contents of it might change. This
  56. option is still somewhat experimental. If you want stability,
  57. please don't use it.
  58. :param bool cache_path: If given saves the parso cache in this
  59. directory. If not given, defaults to the default cache places on
  60. each platform.
  61. :return: A subclass of :py:class:`parso.tree.NodeOrLeaf`. Typically a
  62. :py:class:`parso.python.tree.Module`.
  63. """
  64. if 'start_pos' in kwargs:
  65. raise TypeError("parse() got an unexpected keyword argument.")
  66. return self._parse(code=code, **kwargs)
  67. def _parse(self, code=None, error_recovery=True, path=None,
  68. start_symbol=None, cache=False, diff_cache=False,
  69. cache_path=None, start_pos=(1, 0)):
  70. """
  71. Wanted python3.5 * operator and keyword only arguments. Therefore just
  72. wrap it all.
  73. start_pos here is just a parameter internally used. Might be public
  74. sometime in the future.
  75. """
  76. if code is None and path is None:
  77. raise TypeError("Please provide either code or a path.")
  78. if start_symbol is None:
  79. start_symbol = self._start_nonterminal
  80. if error_recovery and start_symbol != 'file_input':
  81. raise NotImplementedError("This is currently not implemented.")
  82. if cache and path is not None:
  83. module_node = load_module(self._hashed, path, cache_path=cache_path)
  84. if module_node is not None:
  85. return module_node
  86. if code is None:
  87. with open(path, 'rb') as f:
  88. code = f.read()
  89. code = python_bytes_to_unicode(code)
  90. lines = split_lines(code, keepends=True)
  91. if diff_cache:
  92. if self._diff_parser is None:
  93. raise TypeError("You have to define a diff parser to be able "
  94. "to use this option.")
  95. try:
  96. module_cache_item = parser_cache[self._hashed][path]
  97. except KeyError:
  98. pass
  99. else:
  100. module_node = module_cache_item.node
  101. old_lines = module_cache_item.lines
  102. if old_lines == lines:
  103. return module_node
  104. new_node = self._diff_parser(
  105. self._pgen_grammar, self._tokenizer, module_node
  106. ).update(
  107. old_lines=old_lines,
  108. new_lines=lines
  109. )
  110. save_module(self._hashed, path, new_node, lines,
  111. # Never pickle in pypy, it's slow as hell.
  112. pickling=cache and not is_pypy,
  113. cache_path=cache_path)
  114. return new_node
  115. tokens = self._tokenizer(lines, start_pos)
  116. p = self._parser(
  117. self._pgen_grammar,
  118. error_recovery=error_recovery,
  119. start_nonterminal=start_symbol
  120. )
  121. root_node = p.parse(tokens=tokens)
  122. if cache or diff_cache:
  123. save_module(self._hashed, path, root_node, lines,
  124. # Never pickle in pypy, it's slow as hell.
  125. pickling=cache and not is_pypy,
  126. cache_path=cache_path)
  127. return root_node
  128. def _get_token_namespace(self):
  129. ns = self._token_namespace
  130. if ns is None:
  131. raise ValueError("The token namespace should be set.")
  132. return ns
  133. def iter_errors(self, node):
  134. """
  135. Given a :py:class:`parso.tree.NodeOrLeaf` returns a generator of
  136. :py:class:`parso.normalizer.Issue` objects. For Python this is
  137. a list of syntax/indentation errors.
  138. """
  139. if self._error_normalizer_config is None:
  140. raise ValueError("No error normalizer specified for this grammar.")
  141. return self._get_normalizer_issues(node, self._error_normalizer_config)
  142. def _get_normalizer(self, normalizer_config):
  143. if normalizer_config is None:
  144. normalizer_config = self._default_normalizer_config
  145. if normalizer_config is None:
  146. raise ValueError("You need to specify a normalizer, because "
  147. "there's no default normalizer for this tree.")
  148. return normalizer_config.create_normalizer(self)
  149. def _normalize(self, node, normalizer_config=None):
  150. """
  151. TODO this is not public, yet.
  152. The returned code will be normalized, e.g. PEP8 for Python.
  153. """
  154. normalizer = self._get_normalizer(normalizer_config)
  155. return normalizer.walk(node)
  156. def _get_normalizer_issues(self, node, normalizer_config=None):
  157. normalizer = self._get_normalizer(normalizer_config)
  158. normalizer.walk(node)
  159. return normalizer.issues
  160. def __repr__(self):
  161. nonterminals = self._pgen_grammar._nonterminal_to_dfas.keys()
  162. txt = ' '.join(list(nonterminals)[:3]) + ' ...'
  163. return '<%s:%s>' % (self.__class__.__name__, txt)
  164. class PythonGrammar(Grammar):
  165. _error_normalizer_config = ErrorFinderConfig()
  166. _token_namespace = PythonTokenTypes
  167. _start_nonterminal = 'file_input'
  168. def __init__(self, version_info, bnf_text):
  169. super(PythonGrammar, self).__init__(
  170. bnf_text,
  171. tokenizer=self._tokenize_lines,
  172. parser=PythonParser,
  173. diff_parser=DiffParser
  174. )
  175. self.version_info = version_info
  176. def _tokenize_lines(self, lines, start_pos):
  177. return tokenize_lines(lines, self.version_info, start_pos=start_pos)
  178. def _tokenize(self, code):
  179. # Used by Jedi.
  180. return tokenize(code, self.version_info)
  181. def load_grammar(**kwargs):
  182. """
  183. Loads a :py:class:`parso.Grammar`. The default version is the current Python
  184. version.
  185. :param str version: A python version string, e.g. ``version='3.3'``.
  186. :param str path: A path to a grammar file
  187. """
  188. def load_grammar(language='python', version=None, path=None):
  189. if language == 'python':
  190. version_info = parse_version_string(version)
  191. file = path or os.path.join(
  192. 'python',
  193. 'grammar%s%s.txt' % (version_info.major, version_info.minor)
  194. )
  195. global _loaded_grammars
  196. path = os.path.join(os.path.dirname(__file__), file)
  197. try:
  198. return _loaded_grammars[path]
  199. except KeyError:
  200. try:
  201. with open(path) as f:
  202. bnf_text = f.read()
  203. grammar = PythonGrammar(version_info, bnf_text)
  204. return _loaded_grammars.setdefault(path, grammar)
  205. except FileNotFoundError:
  206. message = "Python version %s is currently not supported." % version
  207. raise NotImplementedError(message)
  208. else:
  209. raise NotImplementedError("No support for language %s." % language)
  210. return load_grammar(**kwargs)