|
|
- # -*- coding: utf-8 -*-
- """
- pygments.scanner
- ~~~~~~~~~~~~~~~~
-
- This library implements a regex based scanner. Some languages
- like Pascal are easy to parse but have some keywords that
- depend on the context. Because of this it's impossible to lex
- that just by using a regular expression lexer like the
- `RegexLexer`.
-
- Have a look at the `DelphiLexer` to get an idea of how to use
- this scanner.
-
- :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
- :license: BSD, see LICENSE for details.
- """
- import re
-
-
- class EndOfText(RuntimeError):
- """
- Raise if end of text is reached and the user
- tried to call a match function.
- """
-
-
- class Scanner:
- """
- Simple scanner
-
- All method patterns are regular expression strings (not
- compiled expressions!)
- """
-
- def __init__(self, text, flags=0):
- """
- :param text: The text which should be scanned
- :param flags: default regular expression flags
- """
- self.data = text
- self.data_length = len(text)
- self.start_pos = 0
- self.pos = 0
- self.flags = flags
- self.last = None
- self.match = None
- self._re_cache = {}
-
- def eos(self):
- """`True` if the scanner reached the end of text."""
- return self.pos >= self.data_length
- eos = property(eos, eos.__doc__)
-
- def check(self, pattern):
- """
- Apply `pattern` on the current position and return
- the match object. (Doesn't touch pos). Use this for
- lookahead.
- """
- if self.eos:
- raise EndOfText()
- if pattern not in self._re_cache:
- self._re_cache[pattern] = re.compile(pattern, self.flags)
- return self._re_cache[pattern].match(self.data, self.pos)
-
- def test(self, pattern):
- """Apply a pattern on the current position and check
- if it patches. Doesn't touch pos.
- """
- return self.check(pattern) is not None
-
- def scan(self, pattern):
- """
- Scan the text for the given pattern and update pos/match
- and related fields. The return value is a boolen that
- indicates if the pattern matched. The matched value is
- stored on the instance as ``match``, the last value is
- stored as ``last``. ``start_pos`` is the position of the
- pointer before the pattern was matched, ``pos`` is the
- end position.
- """
- if self.eos:
- raise EndOfText()
- if pattern not in self._re_cache:
- self._re_cache[pattern] = re.compile(pattern, self.flags)
- self.last = self.match
- m = self._re_cache[pattern].match(self.data, self.pos)
- if m is None:
- return False
- self.start_pos = m.start()
- self.pos = m.end()
- self.match = m.group()
- return True
-
- def get_char(self):
- """Scan exactly one char."""
- self.scan('.')
-
- def __repr__(self):
- return '<%s %d/%d>' % (
- self.__class__.__name__,
- self.pos,
- self.data_length
- )
|