You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

6004 lines
236 KiB

4 years ago
  1. #-*- coding: utf-8 -*-
  2. # module pyparsing.py
  3. #
  4. # Copyright (c) 2003-2018 Paul T. McGuire
  5. #
  6. # Permission is hereby granted, free of charge, to any person obtaining
  7. # a copy of this software and associated documentation files (the
  8. # "Software"), to deal in the Software without restriction, including
  9. # without limitation the rights to use, copy, modify, merge, publish,
  10. # distribute, sublicense, and/or sell copies of the Software, and to
  11. # permit persons to whom the Software is furnished to do so, subject to
  12. # the following conditions:
  13. #
  14. # The above copyright notice and this permission notice shall be
  15. # included in all copies or substantial portions of the Software.
  16. #
  17. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  18. # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  19. # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  20. # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  21. # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  22. # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  23. # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  24. #
  25. __doc__ = \
  26. """
  27. pyparsing module - Classes and methods to define and execute parsing grammars
  28. =============================================================================
  29. The pyparsing module is an alternative approach to creating and executing simple grammars,
  30. vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
  31. don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
  32. provides a library of classes that you use to construct the grammar directly in Python.
  33. Here is a program to parse "Hello, World!" (or any greeting of the form
  34. C{"<salutation>, <addressee>!"}), built up using L{Word}, L{Literal}, and L{And} elements
  35. (L{'+'<ParserElement.__add__>} operator gives L{And} expressions, strings are auto-converted to
  36. L{Literal} expressions)::
  37. from pyparsing import Word, alphas
  38. # define grammar of a greeting
  39. greet = Word(alphas) + "," + Word(alphas) + "!"
  40. hello = "Hello, World!"
  41. print (hello, "->", greet.parseString(hello))
  42. The program outputs the following::
  43. Hello, World! -> ['Hello', ',', 'World', '!']
  44. The Python representation of the grammar is quite readable, owing to the self-explanatory
  45. class names, and the use of '+', '|' and '^' operators.
  46. The L{ParseResults} object returned from L{ParserElement.parseString<ParserElement.parseString>} can be accessed as a nested list, a dictionary, or an
  47. object with named attributes.
  48. The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
  49. - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
  50. - quoted strings
  51. - embedded comments
  52. Getting Started -
  53. -----------------
  54. Visit the classes L{ParserElement} and L{ParseResults} to see the base classes that most other pyparsing
  55. classes inherit from. Use the docstrings for examples of how to:
  56. - construct literal match expressions from L{Literal} and L{CaselessLiteral} classes
  57. - construct character word-group expressions using the L{Word} class
  58. - see how to create repetitive expressions using L{ZeroOrMore} and L{OneOrMore} classes
  59. - use L{'+'<And>}, L{'|'<MatchFirst>}, L{'^'<Or>}, and L{'&'<Each>} operators to combine simple expressions into more complex ones
  60. - associate names with your parsed results using L{ParserElement.setResultsName}
  61. - find some helpful expression short-cuts like L{delimitedList} and L{oneOf}
  62. - find more useful common expressions in the L{pyparsing_common} namespace class
  63. """
  64. __version__ = "2.3.0"
  65. __versionTime__ = "28 Oct 2018 01:57 UTC"
  66. __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
  67. import string
  68. from weakref import ref as wkref
  69. import copy
  70. import sys
  71. import warnings
  72. import re
  73. import sre_constants
  74. import collections
  75. import pprint
  76. import traceback
  77. import types
  78. from datetime import datetime
  79. try:
  80. # Python 3
  81. from itertools import filterfalse
  82. except ImportError:
  83. from itertools import ifilterfalse as filterfalse
  84. try:
  85. from _thread import RLock
  86. except ImportError:
  87. from threading import RLock
  88. try:
  89. # Python 3
  90. from collections.abc import Iterable
  91. from collections.abc import MutableMapping
  92. except ImportError:
  93. # Python 2.7
  94. from collections import Iterable
  95. from collections import MutableMapping
  96. try:
  97. from collections import OrderedDict as _OrderedDict
  98. except ImportError:
  99. try:
  100. from ordereddict import OrderedDict as _OrderedDict
  101. except ImportError:
  102. _OrderedDict = None
  103. try:
  104. from types import SimpleNamespace
  105. except ImportError:
  106. class SimpleNamespace: pass
  107. #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
  108. __all__ = [
  109. 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
  110. 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
  111. 'PrecededBy', 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
  112. 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
  113. 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
  114. 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',
  115. 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 'Char',
  116. 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
  117. 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
  118. 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
  119. 'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno',
  120. 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
  121. 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
  122. 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
  123. 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
  124. 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
  125. 'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass',
  126. 'CloseMatch', 'tokenMap', 'pyparsing_common', 'pyparsing_unicode', 'unicode_set',
  127. ]
  128. system_version = tuple(sys.version_info)[:3]
  129. PY_3 = system_version[0] == 3
  130. if PY_3:
  131. _MAX_INT = sys.maxsize
  132. basestring = str
  133. unichr = chr
  134. unicode = str
  135. _ustr = str
  136. # build list of single arg builtins, that can be used as parse actions
  137. singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max]
  138. else:
  139. _MAX_INT = sys.maxint
  140. range = xrange
  141. def _ustr(obj):
  142. """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
  143. str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
  144. then < returns the unicode object | encodes it with the default encoding | ... >.
  145. """
  146. if isinstance(obj,unicode):
  147. return obj
  148. try:
  149. # If this works, then _ustr(obj) has the same behaviour as str(obj), so
  150. # it won't break any existing code.
  151. return str(obj)
  152. except UnicodeEncodeError:
  153. # Else encode it
  154. ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace')
  155. xmlcharref = Regex(r'&#\d+;')
  156. xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:])
  157. return xmlcharref.transformString(ret)
  158. # build list of single arg builtins, tolerant of Python version, that can be used as parse actions
  159. singleArgBuiltins = []
  160. import __builtin__
  161. for fname in "sum len sorted reversed list tuple set any all min max".split():
  162. try:
  163. singleArgBuiltins.append(getattr(__builtin__,fname))
  164. except AttributeError:
  165. continue
  166. _generatorType = type((y for y in range(1)))
  167. def _xml_escape(data):
  168. """Escape &, <, >, ", ', etc. in a string of data."""
  169. # ampersand must be replaced first
  170. from_symbols = '&><"\''
  171. to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split())
  172. for from_,to_ in zip(from_symbols, to_symbols):
  173. data = data.replace(from_, to_)
  174. return data
  175. alphas = string.ascii_uppercase + string.ascii_lowercase
  176. nums = "0123456789"
  177. hexnums = nums + "ABCDEFabcdef"
  178. alphanums = alphas + nums
  179. _bslash = chr(92)
  180. printables = "".join(c for c in string.printable if c not in string.whitespace)
  181. class ParseBaseException(Exception):
  182. """base exception class for all parsing runtime exceptions"""
  183. # Performance tuning: we construct a *lot* of these, so keep this
  184. # constructor as small and fast as possible
  185. def __init__( self, pstr, loc=0, msg=None, elem=None ):
  186. self.loc = loc
  187. if msg is None:
  188. self.msg = pstr
  189. self.pstr = ""
  190. else:
  191. self.msg = msg
  192. self.pstr = pstr
  193. self.parserElement = elem
  194. self.args = (pstr, loc, msg)
  195. @classmethod
  196. def _from_exception(cls, pe):
  197. """
  198. internal factory method to simplify creating one type of ParseException
  199. from another - avoids having __init__ signature conflicts among subclasses
  200. """
  201. return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)
  202. def __getattr__( self, aname ):
  203. """supported attributes by name are:
  204. - lineno - returns the line number of the exception text
  205. - col - returns the column number of the exception text
  206. - line - returns the line containing the exception text
  207. """
  208. if( aname == "lineno" ):
  209. return lineno( self.loc, self.pstr )
  210. elif( aname in ("col", "column") ):
  211. return col( self.loc, self.pstr )
  212. elif( aname == "line" ):
  213. return line( self.loc, self.pstr )
  214. else:
  215. raise AttributeError(aname)
  216. def __str__( self ):
  217. return "%s (at char %d), (line:%d, col:%d)" % \
  218. ( self.msg, self.loc, self.lineno, self.column )
  219. def __repr__( self ):
  220. return _ustr(self)
  221. def markInputline( self, markerString = ">!<" ):
  222. """Extracts the exception line from the input string, and marks
  223. the location of the exception with a special symbol.
  224. """
  225. line_str = self.line
  226. line_column = self.column - 1
  227. if markerString:
  228. line_str = "".join((line_str[:line_column],
  229. markerString, line_str[line_column:]))
  230. return line_str.strip()
  231. def __dir__(self):
  232. return "lineno col line".split() + dir(type(self))
  233. class ParseException(ParseBaseException):
  234. """
  235. Exception thrown when parse expressions don't match class;
  236. supported attributes by name are:
  237. - lineno - returns the line number of the exception text
  238. - col - returns the column number of the exception text
  239. - line - returns the line containing the exception text
  240. Example::
  241. try:
  242. Word(nums).setName("integer").parseString("ABC")
  243. except ParseException as pe:
  244. print(pe)
  245. print("column: {}".format(pe.col))
  246. prints::
  247. Expected integer (at char 0), (line:1, col:1)
  248. column: 1
  249. """
  250. pass
  251. class ParseFatalException(ParseBaseException):
  252. """user-throwable exception thrown when inconsistent parse content
  253. is found; stops all parsing immediately"""
  254. pass
  255. class ParseSyntaxException(ParseFatalException):
  256. """just like L{ParseFatalException}, but thrown internally when an
  257. L{ErrorStop<And._ErrorStop>} ('-' operator) indicates that parsing is to stop
  258. immediately because an unbacktrackable syntax error has been found"""
  259. pass
  260. #~ class ReparseException(ParseBaseException):
  261. #~ """Experimental class - parse actions can raise this exception to cause
  262. #~ pyparsing to reparse the input string:
  263. #~ - with a modified input string, and/or
  264. #~ - with a modified start location
  265. #~ Set the values of the ReparseException in the constructor, and raise the
  266. #~ exception in a parse action to cause pyparsing to use the new string/location.
  267. #~ Setting the values as None causes no change to be made.
  268. #~ """
  269. #~ def __init_( self, newstring, restartLoc ):
  270. #~ self.newParseText = newstring
  271. #~ self.reparseLoc = restartLoc
  272. class RecursiveGrammarException(Exception):
  273. """exception thrown by L{ParserElement.validate} if the grammar could be improperly recursive"""
  274. def __init__( self, parseElementList ):
  275. self.parseElementTrace = parseElementList
  276. def __str__( self ):
  277. return "RecursiveGrammarException: %s" % self.parseElementTrace
  278. class _ParseResultsWithOffset(object):
  279. def __init__(self,p1,p2):
  280. self.tup = (p1,p2)
  281. def __getitem__(self,i):
  282. return self.tup[i]
  283. def __repr__(self):
  284. return repr(self.tup[0])
  285. def setOffset(self,i):
  286. self.tup = (self.tup[0],i)
  287. class ParseResults(object):
  288. """
  289. Structured parse results, to provide multiple means of access to the parsed data:
  290. - as a list (C{len(results)})
  291. - by list index (C{results[0], results[1]}, etc.)
  292. - by attribute (C{results.<resultsName>} - see L{ParserElement.setResultsName})
  293. Example::
  294. integer = Word(nums)
  295. date_str = (integer.setResultsName("year") + '/'
  296. + integer.setResultsName("month") + '/'
  297. + integer.setResultsName("day"))
  298. # equivalent form:
  299. # date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  300. # parseString returns a ParseResults object
  301. result = date_str.parseString("1999/12/31")
  302. def test(s, fn=repr):
  303. print("%s -> %s" % (s, fn(eval(s))))
  304. test("list(result)")
  305. test("result[0]")
  306. test("result['month']")
  307. test("result.day")
  308. test("'month' in result")
  309. test("'minutes' in result")
  310. test("result.dump()", str)
  311. prints::
  312. list(result) -> ['1999', '/', '12', '/', '31']
  313. result[0] -> '1999'
  314. result['month'] -> '12'
  315. result.day -> '31'
  316. 'month' in result -> True
  317. 'minutes' in result -> False
  318. result.dump() -> ['1999', '/', '12', '/', '31']
  319. - day: 31
  320. - month: 12
  321. - year: 1999
  322. """
  323. def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
  324. if isinstance(toklist, cls):
  325. return toklist
  326. retobj = object.__new__(cls)
  327. retobj.__doinit = True
  328. return retobj
  329. # Performance tuning: we construct a *lot* of these, so keep this
  330. # constructor as small and fast as possible
  331. def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):
  332. if self.__doinit:
  333. self.__doinit = False
  334. self.__name = None
  335. self.__parent = None
  336. self.__accumNames = {}
  337. self.__asList = asList
  338. self.__modal = modal
  339. if toklist is None:
  340. toklist = []
  341. if isinstance(toklist, list):
  342. self.__toklist = toklist[:]
  343. elif isinstance(toklist, _generatorType):
  344. self.__toklist = list(toklist)
  345. else:
  346. self.__toklist = [toklist]
  347. self.__tokdict = dict()
  348. if name is not None and name:
  349. if not modal:
  350. self.__accumNames[name] = 0
  351. if isinstance(name,int):
  352. name = _ustr(name) # will always return a str, but use _ustr for consistency
  353. self.__name = name
  354. if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])):
  355. if isinstance(toklist,basestring):
  356. toklist = [ toklist ]
  357. if asList:
  358. if isinstance(toklist,ParseResults):
  359. self[name] = _ParseResultsWithOffset(ParseResults(toklist.__toklist), 0)
  360. else:
  361. self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
  362. self[name].__name = name
  363. else:
  364. try:
  365. self[name] = toklist[0]
  366. except (KeyError,TypeError,IndexError):
  367. self[name] = toklist
  368. def __getitem__( self, i ):
  369. if isinstance( i, (int,slice) ):
  370. return self.__toklist[i]
  371. else:
  372. if i not in self.__accumNames:
  373. return self.__tokdict[i][-1][0]
  374. else:
  375. return ParseResults([ v[0] for v in self.__tokdict[i] ])
  376. def __setitem__( self, k, v, isinstance=isinstance ):
  377. if isinstance(v,_ParseResultsWithOffset):
  378. self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
  379. sub = v[0]
  380. elif isinstance(k,(int,slice)):
  381. self.__toklist[k] = v
  382. sub = v
  383. else:
  384. self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
  385. sub = v
  386. if isinstance(sub,ParseResults):
  387. sub.__parent = wkref(self)
  388. def __delitem__( self, i ):
  389. if isinstance(i,(int,slice)):
  390. mylen = len( self.__toklist )
  391. del self.__toklist[i]
  392. # convert int to slice
  393. if isinstance(i, int):
  394. if i < 0:
  395. i += mylen
  396. i = slice(i, i+1)
  397. # get removed indices
  398. removed = list(range(*i.indices(mylen)))
  399. removed.reverse()
  400. # fixup indices in token dictionary
  401. for name,occurrences in self.__tokdict.items():
  402. for j in removed:
  403. for k, (value, position) in enumerate(occurrences):
  404. occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
  405. else:
  406. del self.__tokdict[i]
  407. def __contains__( self, k ):
  408. return k in self.__tokdict
  409. def __len__( self ): return len( self.__toklist )
  410. def __bool__(self): return ( not not self.__toklist )
  411. __nonzero__ = __bool__
  412. def __iter__( self ): return iter( self.__toklist )
  413. def __reversed__( self ): return iter( self.__toklist[::-1] )
  414. def _iterkeys( self ):
  415. if hasattr(self.__tokdict, "iterkeys"):
  416. return self.__tokdict.iterkeys()
  417. else:
  418. return iter(self.__tokdict)
  419. def _itervalues( self ):
  420. return (self[k] for k in self._iterkeys())
  421. def _iteritems( self ):
  422. return ((k, self[k]) for k in self._iterkeys())
  423. if PY_3:
  424. keys = _iterkeys
  425. """Returns an iterator of all named result keys (Python 3.x only)."""
  426. values = _itervalues
  427. """Returns an iterator of all named result values (Python 3.x only)."""
  428. items = _iteritems
  429. """Returns an iterator of all named result key-value tuples (Python 3.x only)."""
  430. else:
  431. iterkeys = _iterkeys
  432. """Returns an iterator of all named result keys (Python 2.x only)."""
  433. itervalues = _itervalues
  434. """Returns an iterator of all named result values (Python 2.x only)."""
  435. iteritems = _iteritems
  436. """Returns an iterator of all named result key-value tuples (Python 2.x only)."""
  437. def keys( self ):
  438. """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x)."""
  439. return list(self.iterkeys())
  440. def values( self ):
  441. """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x)."""
  442. return list(self.itervalues())
  443. def items( self ):
  444. """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x)."""
  445. return list(self.iteritems())
  446. def haskeys( self ):
  447. """Since keys() returns an iterator, this method is helpful in bypassing
  448. code that looks for the existence of any defined results names."""
  449. return bool(self.__tokdict)
  450. def pop( self, *args, **kwargs):
  451. """
  452. Removes and returns item at specified index (default=C{last}).
  453. Supports both C{list} and C{dict} semantics for C{pop()}. If passed no
  454. argument or an integer argument, it will use C{list} semantics
  455. and pop tokens from the list of parsed tokens. If passed a
  456. non-integer argument (most likely a string), it will use C{dict}
  457. semantics and pop the corresponding value from any defined
  458. results names. A second default return value argument is
  459. supported, just as in C{dict.pop()}.
  460. Example::
  461. def remove_first(tokens):
  462. tokens.pop(0)
  463. print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
  464. print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321']
  465. label = Word(alphas)
  466. patt = label("LABEL") + OneOrMore(Word(nums))
  467. print(patt.parseString("AAB 123 321").dump())
  468. # Use pop() in a parse action to remove named result (note that corresponding value is not
  469. # removed from list form of results)
  470. def remove_LABEL(tokens):
  471. tokens.pop("LABEL")
  472. return tokens
  473. patt.addParseAction(remove_LABEL)
  474. print(patt.parseString("AAB 123 321").dump())
  475. prints::
  476. ['AAB', '123', '321']
  477. - LABEL: AAB
  478. ['AAB', '123', '321']
  479. """
  480. if not args:
  481. args = [-1]
  482. for k,v in kwargs.items():
  483. if k == 'default':
  484. args = (args[0], v)
  485. else:
  486. raise TypeError("pop() got an unexpected keyword argument '%s'" % k)
  487. if (isinstance(args[0], int) or
  488. len(args) == 1 or
  489. args[0] in self):
  490. index = args[0]
  491. ret = self[index]
  492. del self[index]
  493. return ret
  494. else:
  495. defaultvalue = args[1]
  496. return defaultvalue
  497. def get(self, key, defaultValue=None):
  498. """
  499. Returns named result matching the given key, or if there is no
  500. such name, then returns the given C{defaultValue} or C{None} if no
  501. C{defaultValue} is specified.
  502. Similar to C{dict.get()}.
  503. Example::
  504. integer = Word(nums)
  505. date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  506. result = date_str.parseString("1999/12/31")
  507. print(result.get("year")) # -> '1999'
  508. print(result.get("hour", "not specified")) # -> 'not specified'
  509. print(result.get("hour")) # -> None
  510. """
  511. if key in self:
  512. return self[key]
  513. else:
  514. return defaultValue
  515. def insert( self, index, insStr ):
  516. """
  517. Inserts new element at location index in the list of parsed tokens.
  518. Similar to C{list.insert()}.
  519. Example::
  520. print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
  521. # use a parse action to insert the parse location in the front of the parsed results
  522. def insert_locn(locn, tokens):
  523. tokens.insert(0, locn)
  524. print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321']
  525. """
  526. self.__toklist.insert(index, insStr)
  527. # fixup indices in token dictionary
  528. for name,occurrences in self.__tokdict.items():
  529. for k, (value, position) in enumerate(occurrences):
  530. occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
  531. def append( self, item ):
  532. """
  533. Add single element to end of ParseResults list of elements.
  534. Example::
  535. print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
  536. # use a parse action to compute the sum of the parsed integers, and add it to the end
  537. def append_sum(tokens):
  538. tokens.append(sum(map(int, tokens)))
  539. print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444]
  540. """
  541. self.__toklist.append(item)
  542. def extend( self, itemseq ):
  543. """
  544. Add sequence of elements to end of ParseResults list of elements.
  545. Example::
  546. patt = OneOrMore(Word(alphas))
  547. # use a parse action to append the reverse of the matched strings, to make a palindrome
  548. def make_palindrome(tokens):
  549. tokens.extend(reversed([t[::-1] for t in tokens]))
  550. return ''.join(tokens)
  551. print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
  552. """
  553. if isinstance(itemseq, ParseResults):
  554. self += itemseq
  555. else:
  556. self.__toklist.extend(itemseq)
  557. def clear( self ):
  558. """
  559. Clear all elements and results names.
  560. """
  561. del self.__toklist[:]
  562. self.__tokdict.clear()
  563. def __getattr__( self, name ):
  564. try:
  565. return self[name]
  566. except KeyError:
  567. return ""
  568. if name in self.__tokdict:
  569. if name not in self.__accumNames:
  570. return self.__tokdict[name][-1][0]
  571. else:
  572. return ParseResults([ v[0] for v in self.__tokdict[name] ])
  573. else:
  574. return ""
  575. def __add__( self, other ):
  576. ret = self.copy()
  577. ret += other
  578. return ret
  579. def __iadd__( self, other ):
  580. if other.__tokdict:
  581. offset = len(self.__toklist)
  582. addoffset = lambda a: offset if a<0 else a+offset
  583. otheritems = other.__tokdict.items()
  584. otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
  585. for (k,vlist) in otheritems for v in vlist]
  586. for k,v in otherdictitems:
  587. self[k] = v
  588. if isinstance(v[0],ParseResults):
  589. v[0].__parent = wkref(self)
  590. self.__toklist += other.__toklist
  591. self.__accumNames.update( other.__accumNames )
  592. return self
  593. def __radd__(self, other):
  594. if isinstance(other,int) and other == 0:
  595. # useful for merging many ParseResults using sum() builtin
  596. return self.copy()
  597. else:
  598. # this may raise a TypeError - so be it
  599. return other + self
  600. def __repr__( self ):
  601. return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
  602. def __str__( self ):
  603. return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
  604. def _asStringList( self, sep='' ):
  605. out = []
  606. for item in self.__toklist:
  607. if out and sep:
  608. out.append(sep)
  609. if isinstance( item, ParseResults ):
  610. out += item._asStringList()
  611. else:
  612. out.append( _ustr(item) )
  613. return out
  614. def asList( self ):
  615. """
  616. Returns the parse results as a nested list of matching tokens, all converted to strings.
  617. Example::
  618. patt = OneOrMore(Word(alphas))
  619. result = patt.parseString("sldkj lsdkj sldkj")
  620. # even though the result prints in string-like form, it is actually a pyparsing ParseResults
  621. print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj']
  622. # Use asList() to create an actual list
  623. result_list = result.asList()
  624. print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']
  625. """
  626. return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
  627. def asDict( self ):
  628. """
  629. Returns the named parse results as a nested dictionary.
  630. Example::
  631. integer = Word(nums)
  632. date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  633. result = date_str.parseString('12/31/1999')
  634. print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})
  635. result_dict = result.asDict()
  636. print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'}
  637. # even though a ParseResults supports dict-like access, sometime you just need to have a dict
  638. import json
  639. print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable
  640. print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"}
  641. """
  642. if PY_3:
  643. item_fn = self.items
  644. else:
  645. item_fn = self.iteritems
  646. def toItem(obj):
  647. if isinstance(obj, ParseResults):
  648. if obj.haskeys():
  649. return obj.asDict()
  650. else:
  651. return [toItem(v) for v in obj]
  652. else:
  653. return obj
  654. return dict((k,toItem(v)) for k,v in item_fn())
  655. def copy( self ):
  656. """
  657. Returns a new copy of a C{ParseResults} object.
  658. """
  659. ret = ParseResults( self.__toklist )
  660. ret.__tokdict = dict(self.__tokdict.items())
  661. ret.__parent = self.__parent
  662. ret.__accumNames.update( self.__accumNames )
  663. ret.__name = self.__name
  664. return ret
  665. def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
  666. """
  667. (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.
  668. """
  669. nl = "\n"
  670. out = []
  671. namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items()
  672. for v in vlist)
  673. nextLevelIndent = indent + " "
  674. # collapse out indents if formatting is not desired
  675. if not formatted:
  676. indent = ""
  677. nextLevelIndent = ""
  678. nl = ""
  679. selfTag = None
  680. if doctag is not None:
  681. selfTag = doctag
  682. else:
  683. if self.__name:
  684. selfTag = self.__name
  685. if not selfTag:
  686. if namedItemsOnly:
  687. return ""
  688. else:
  689. selfTag = "ITEM"
  690. out += [ nl, indent, "<", selfTag, ">" ]
  691. for i,res in enumerate(self.__toklist):
  692. if isinstance(res,ParseResults):
  693. if i in namedItems:
  694. out += [ res.asXML(namedItems[i],
  695. namedItemsOnly and doctag is None,
  696. nextLevelIndent,
  697. formatted)]
  698. else:
  699. out += [ res.asXML(None,
  700. namedItemsOnly and doctag is None,
  701. nextLevelIndent,
  702. formatted)]
  703. else:
  704. # individual token, see if there is a name for it
  705. resTag = None
  706. if i in namedItems:
  707. resTag = namedItems[i]
  708. if not resTag:
  709. if namedItemsOnly:
  710. continue
  711. else:
  712. resTag = "ITEM"
  713. xmlBodyText = _xml_escape(_ustr(res))
  714. out += [ nl, nextLevelIndent, "<", resTag, ">",
  715. xmlBodyText,
  716. "</", resTag, ">" ]
  717. out += [ nl, indent, "</", selfTag, ">" ]
  718. return "".join(out)
  719. def __lookup(self,sub):
  720. for k,vlist in self.__tokdict.items():
  721. for v,loc in vlist:
  722. if sub is v:
  723. return k
  724. return None
  725. def getName(self):
  726. r"""
  727. Returns the results name for this token expression. Useful when several
  728. different expressions might match at a particular location.
  729. Example::
  730. integer = Word(nums)
  731. ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")
  732. house_number_expr = Suppress('#') + Word(nums, alphanums)
  733. user_data = (Group(house_number_expr)("house_number")
  734. | Group(ssn_expr)("ssn")
  735. | Group(integer)("age"))
  736. user_info = OneOrMore(user_data)
  737. result = user_info.parseString("22 111-22-3333 #221B")
  738. for item in result:
  739. print(item.getName(), ':', item[0])
  740. prints::
  741. age : 22
  742. ssn : 111-22-3333
  743. house_number : 221B
  744. """
  745. if self.__name:
  746. return self.__name
  747. elif self.__parent:
  748. par = self.__parent()
  749. if par:
  750. return par.__lookup(self)
  751. else:
  752. return None
  753. elif (len(self) == 1 and
  754. len(self.__tokdict) == 1 and
  755. next(iter(self.__tokdict.values()))[0][1] in (0,-1)):
  756. return next(iter(self.__tokdict.keys()))
  757. else:
  758. return None
  759. def dump(self, indent='', depth=0, full=True):
  760. """
  761. Diagnostic method for listing out the contents of a C{ParseResults}.
  762. Accepts an optional C{indent} argument so that this string can be embedded
  763. in a nested display of other data.
  764. Example::
  765. integer = Word(nums)
  766. date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  767. result = date_str.parseString('12/31/1999')
  768. print(result.dump())
  769. prints::
  770. ['12', '/', '31', '/', '1999']
  771. - day: 1999
  772. - month: 31
  773. - year: 12
  774. """
  775. out = []
  776. NL = '\n'
  777. out.append( indent+_ustr(self.asList()) )
  778. if full:
  779. if self.haskeys():
  780. items = sorted((str(k), v) for k,v in self.items())
  781. for k,v in items:
  782. if out:
  783. out.append(NL)
  784. out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
  785. if isinstance(v,ParseResults):
  786. if v:
  787. out.append( v.dump(indent,depth+1) )
  788. else:
  789. out.append(_ustr(v))
  790. else:
  791. out.append(repr(v))
  792. elif any(isinstance(vv,ParseResults) for vv in self):
  793. v = self
  794. for i,vv in enumerate(v):
  795. if isinstance(vv,ParseResults):
  796. out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) ))
  797. else:
  798. out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv)))
  799. return "".join(out)
  800. def pprint(self, *args, **kwargs):
  801. """
  802. Pretty-printer for parsed results as a list, using the C{pprint} module.
  803. Accepts additional positional or keyword args as defined for the
  804. C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})
  805. Example::
  806. ident = Word(alphas, alphanums)
  807. num = Word(nums)
  808. func = Forward()
  809. term = ident | num | Group('(' + func + ')')
  810. func <<= ident + Group(Optional(delimitedList(term)))
  811. result = func.parseString("fna a,b,(fnb c,d,200),100")
  812. result.pprint(width=40)
  813. prints::
  814. ['fna',
  815. ['a',
  816. 'b',
  817. ['(', 'fnb', ['c', 'd', '200'], ')'],
  818. '100']]
  819. """
  820. pprint.pprint(self.asList(), *args, **kwargs)
  821. # add support for pickle protocol
  822. def __getstate__(self):
  823. return ( self.__toklist,
  824. ( self.__tokdict.copy(),
  825. self.__parent is not None and self.__parent() or None,
  826. self.__accumNames,
  827. self.__name ) )
  828. def __setstate__(self,state):
  829. self.__toklist = state[0]
  830. (self.__tokdict,
  831. par,
  832. inAccumNames,
  833. self.__name) = state[1]
  834. self.__accumNames = {}
  835. self.__accumNames.update(inAccumNames)
  836. if par is not None:
  837. self.__parent = wkref(par)
  838. else:
  839. self.__parent = None
  840. def __getnewargs__(self):
  841. return self.__toklist, self.__name, self.__asList, self.__modal
  842. def __dir__(self):
  843. return (dir(type(self)) + list(self.keys()))
  844. MutableMapping.register(ParseResults)
  845. def col (loc,strg):
  846. """Returns current column within a string, counting newlines as line separators.
  847. The first column is number 1.
  848. Note: the default parsing behavior is to expand tabs in the input string
  849. before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
  850. on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
  851. consistent view of the parsed string, the parse location, and line and column
  852. positions within the parsed string.
  853. """
  854. s = strg
  855. return 1 if 0<loc<len(s) and s[loc-1] == '\n' else loc - s.rfind("\n", 0, loc)
  856. def lineno(loc,strg):
  857. """Returns current line number within a string, counting newlines as line separators.
  858. The first line is number 1.
  859. Note: the default parsing behavior is to expand tabs in the input string
  860. before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
  861. on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
  862. consistent view of the parsed string, the parse location, and line and column
  863. positions within the parsed string.
  864. """
  865. return strg.count("\n",0,loc) + 1
  866. def line( loc, strg ):
  867. """Returns the line of text containing loc within a string, counting newlines as line separators.
  868. """
  869. lastCR = strg.rfind("\n", 0, loc)
  870. nextCR = strg.find("\n", loc)
  871. if nextCR >= 0:
  872. return strg[lastCR+1:nextCR]
  873. else:
  874. return strg[lastCR+1:]
  875. def _defaultStartDebugAction( instring, loc, expr ):
  876. print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
  877. def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
  878. print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
  879. def _defaultExceptionDebugAction( instring, loc, expr, exc ):
  880. print ("Exception raised:" + _ustr(exc))
  881. def nullDebugAction(*args):
  882. """'Do-nothing' debug action, to suppress debugging output during parsing."""
  883. pass
  884. # Only works on Python 3.x - nonlocal is toxic to Python 2 installs
  885. #~ 'decorator to trim function calls to match the arity of the target'
  886. #~ def _trim_arity(func, maxargs=3):
  887. #~ if func in singleArgBuiltins:
  888. #~ return lambda s,l,t: func(t)
  889. #~ limit = 0
  890. #~ foundArity = False
  891. #~ def wrapper(*args):
  892. #~ nonlocal limit,foundArity
  893. #~ while 1:
  894. #~ try:
  895. #~ ret = func(*args[limit:])
  896. #~ foundArity = True
  897. #~ return ret
  898. #~ except TypeError:
  899. #~ if limit == maxargs or foundArity:
  900. #~ raise
  901. #~ limit += 1
  902. #~ continue
  903. #~ return wrapper
  904. # this version is Python 2.x-3.x cross-compatible
  905. 'decorator to trim function calls to match the arity of the target'
  906. def _trim_arity(func, maxargs=2):
  907. if func in singleArgBuiltins:
  908. return lambda s,l,t: func(t)
  909. limit = [0]
  910. foundArity = [False]
  911. # traceback return data structure changed in Py3.5 - normalize back to plain tuples
  912. if system_version[:2] >= (3,5):
  913. def extract_stack(limit=0):
  914. # special handling for Python 3.5.0 - extra deep call stack by 1
  915. offset = -3 if system_version == (3,5,0) else -2
  916. frame_summary = traceback.extract_stack(limit=-offset+limit-1)[offset]
  917. return [frame_summary[:2]]
  918. def extract_tb(tb, limit=0):
  919. frames = traceback.extract_tb(tb, limit=limit)
  920. frame_summary = frames[-1]
  921. return [frame_summary[:2]]
  922. else:
  923. extract_stack = traceback.extract_stack
  924. extract_tb = traceback.extract_tb
  925. # synthesize what would be returned by traceback.extract_stack at the call to
  926. # user's parse action 'func', so that we don't incur call penalty at parse time
  927. LINE_DIFF = 6
  928. # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND
  929. # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!
  930. this_line = extract_stack(limit=2)[-1]
  931. pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF)
  932. def wrapper(*args):
  933. while 1:
  934. try:
  935. ret = func(*args[limit[0]:])
  936. foundArity[0] = True
  937. return ret
  938. except TypeError:
  939. # re-raise TypeErrors if they did not come from our arity testing
  940. if foundArity[0]:
  941. raise
  942. else:
  943. try:
  944. tb = sys.exc_info()[-1]
  945. if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth:
  946. raise
  947. finally:
  948. del tb
  949. if limit[0] <= maxargs:
  950. limit[0] += 1
  951. continue
  952. raise
  953. # copy func name to wrapper for sensible debug output
  954. func_name = "<parse action>"
  955. try:
  956. func_name = getattr(func, '__name__',
  957. getattr(func, '__class__').__name__)
  958. except Exception:
  959. func_name = str(func)
  960. wrapper.__name__ = func_name
  961. return wrapper
  962. class ParserElement(object):
  963. """Abstract base level parser element class."""
  964. DEFAULT_WHITE_CHARS = " \n\t\r"
  965. verbose_stacktrace = False
  966. @staticmethod
  967. def setDefaultWhitespaceChars( chars ):
  968. r"""
  969. Overrides the default whitespace chars
  970. Example::
  971. # default whitespace chars are space, <TAB> and newline
  972. OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl']
  973. # change to just treat newline as significant
  974. ParserElement.setDefaultWhitespaceChars(" \t")
  975. OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def']
  976. """
  977. ParserElement.DEFAULT_WHITE_CHARS = chars
  978. @staticmethod
  979. def inlineLiteralsUsing(cls):
  980. """
  981. Set class to be used for inclusion of string literals into a parser.
  982. Example::
  983. # default literal class used is Literal
  984. integer = Word(nums)
  985. date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  986. date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']
  987. # change to Suppress
  988. ParserElement.inlineLiteralsUsing(Suppress)
  989. date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  990. date_str.parseString("1999/12/31") # -> ['1999', '12', '31']
  991. """
  992. ParserElement._literalStringClass = cls
  993. def __init__( self, savelist=False ):
  994. self.parseAction = list()
  995. self.failAction = None
  996. #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall
  997. self.strRepr = None
  998. self.resultsName = None
  999. self.saveAsList = savelist
  1000. self.skipWhitespace = True
  1001. self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
  1002. self.copyDefaultWhiteChars = True
  1003. self.mayReturnEmpty = False # used when checking for left-recursion
  1004. self.keepTabs = False
  1005. self.ignoreExprs = list()
  1006. self.debug = False
  1007. self.streamlined = False
  1008. self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index
  1009. self.errmsg = ""
  1010. self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)
  1011. self.debugActions = ( None, None, None ) #custom debug actions
  1012. self.re = None
  1013. self.callPreparse = True # used to avoid redundant calls to preParse
  1014. self.callDuringTry = False
  1015. def copy( self ):
  1016. """
  1017. Make a copy of this C{ParserElement}. Useful for defining different parse actions
  1018. for the same parsing pattern, using copies of the original parse element.
  1019. Example::
  1020. integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
  1021. integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K")
  1022. integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
  1023. print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M"))
  1024. prints::
  1025. [5120, 100, 655360, 268435456]
  1026. Equivalent form of C{expr.copy()} is just C{expr()}::
  1027. integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
  1028. """
  1029. cpy = copy.copy( self )
  1030. cpy.parseAction = self.parseAction[:]
  1031. cpy.ignoreExprs = self.ignoreExprs[:]
  1032. if self.copyDefaultWhiteChars:
  1033. cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
  1034. return cpy
  1035. def setName( self, name ):
  1036. """
  1037. Define name for this expression, makes debugging and exception messages clearer.
  1038. Example::
  1039. Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1)
  1040. Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1)
  1041. """
  1042. self.name = name
  1043. self.errmsg = "Expected " + self.name
  1044. if hasattr(self,"exception"):
  1045. self.exception.msg = self.errmsg
  1046. return self
  1047. def setResultsName( self, name, listAllMatches=False ):
  1048. """
  1049. Define name for referencing matching tokens as a nested attribute
  1050. of the returned parse results.
  1051. NOTE: this returns a *copy* of the original C{ParserElement} object;
  1052. this is so that the client can define a basic element, such as an
  1053. integer, and reference it in multiple places with different names.
  1054. You can also set results names using the abbreviated syntax,
  1055. C{expr("name")} in place of C{expr.setResultsName("name")} -
  1056. see L{I{__call__}<__call__>}.
  1057. Example::
  1058. date_str = (integer.setResultsName("year") + '/'
  1059. + integer.setResultsName("month") + '/'
  1060. + integer.setResultsName("day"))
  1061. # equivalent form:
  1062. date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
  1063. """
  1064. newself = self.copy()
  1065. if name.endswith("*"):
  1066. name = name[:-1]
  1067. listAllMatches=True
  1068. newself.resultsName = name
  1069. newself.modalResults = not listAllMatches
  1070. return newself
  1071. def setBreak(self,breakFlag = True):
  1072. """Method to invoke the Python pdb debugger when this element is
  1073. about to be parsed. Set C{breakFlag} to True to enable, False to
  1074. disable.
  1075. """
  1076. if breakFlag:
  1077. _parseMethod = self._parse
  1078. def breaker(instring, loc, doActions=True, callPreParse=True):
  1079. import pdb
  1080. pdb.set_trace()
  1081. return _parseMethod( instring, loc, doActions, callPreParse )
  1082. breaker._originalParseMethod = _parseMethod
  1083. self._parse = breaker
  1084. else:
  1085. if hasattr(self._parse,"_originalParseMethod"):
  1086. self._parse = self._parse._originalParseMethod
  1087. return self
  1088. def setParseAction( self, *fns, **kwargs ):
  1089. """
  1090. Define one or more actions to perform when successfully matching parse element definition.
  1091. Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
  1092. C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:
  1093. - s = the original string being parsed (see note below)
  1094. - loc = the location of the matching substring
  1095. - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object
  1096. If the functions in fns modify the tokens, they can return them as the return
  1097. value from fn, and the modified list of tokens will replace the original.
  1098. Otherwise, fn does not need to return any value.
  1099. Optional keyword arguments:
  1100. - callDuringTry = (default=C{False}) indicate if parse action should be run during lookaheads and alternate testing
  1101. Note: the default parsing behavior is to expand tabs in the input string
  1102. before starting the parsing process. See L{I{parseString}<parseString>} for more information
  1103. on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
  1104. consistent view of the parsed string, the parse location, and line and column
  1105. positions within the parsed string.
  1106. Example::
  1107. integer = Word(nums)
  1108. date_str = integer + '/' + integer + '/' + integer
  1109. date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']
  1110. # use parse action to convert to ints at parse time
  1111. integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
  1112. date_str = integer + '/' + integer + '/' + integer
  1113. # note that integer fields are now ints, not strings
  1114. date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31]
  1115. """
  1116. self.parseAction = list(map(_trim_arity, list(fns)))
  1117. self.callDuringTry = kwargs.get("callDuringTry", False)
  1118. return self
  1119. def addParseAction( self, *fns, **kwargs ):
  1120. """
  1121. Add one or more parse actions to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.
  1122. See examples in L{I{copy}<copy>}.
  1123. """
  1124. self.parseAction += list(map(_trim_arity, list(fns)))
  1125. self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
  1126. return self
  1127. def addCondition(self, *fns, **kwargs):
  1128. """Add a boolean predicate function to expression's list of parse actions. See
  1129. L{I{setParseAction}<setParseAction>} for function call signatures. Unlike C{setParseAction},
  1130. functions passed to C{addCondition} need to return boolean success/fail of the condition.
  1131. Optional keyword arguments:
  1132. - message = define a custom message to be used in the raised exception
  1133. - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException
  1134. Example::
  1135. integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
  1136. year_int = integer.copy()
  1137. year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")
  1138. date_str = year_int + '/' + integer + '/' + integer
  1139. result = date_str.parseString("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1)
  1140. """
  1141. msg = kwargs.get("message", "failed user-defined condition")
  1142. exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException
  1143. for fn in fns:
  1144. def pa(s,l,t):
  1145. if not bool(_trim_arity(fn)(s,l,t)):
  1146. raise exc_type(s,l,msg)
  1147. self.parseAction.append(pa)
  1148. self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
  1149. return self
  1150. def setFailAction( self, fn ):
  1151. """Define action to perform if parsing fails at this expression.
  1152. Fail acton fn is a callable function that takes the arguments
  1153. C{fn(s,loc,expr,err)} where:
  1154. - s = string being parsed
  1155. - loc = location where expression match was attempted and failed
  1156. - expr = the parse expression that failed
  1157. - err = the exception thrown
  1158. The function returns no value. It may throw C{L{ParseFatalException}}
  1159. if it is desired to stop parsing immediately."""
  1160. self.failAction = fn
  1161. return self
  1162. def _skipIgnorables( self, instring, loc ):
  1163. exprsFound = True
  1164. while exprsFound:
  1165. exprsFound = False
  1166. for e in self.ignoreExprs:
  1167. try:
  1168. while 1:
  1169. loc,dummy = e._parse( instring, loc )
  1170. exprsFound = True
  1171. except ParseException:
  1172. pass
  1173. return loc
  1174. def preParse( self, instring, loc ):
  1175. if self.ignoreExprs:
  1176. loc = self._skipIgnorables( instring, loc )
  1177. if self.skipWhitespace:
  1178. wt = self.whiteChars
  1179. instrlen = len(instring)
  1180. while loc < instrlen and instring[loc] in wt:
  1181. loc += 1
  1182. return loc
  1183. def parseImpl( self, instring, loc, doActions=True ):
  1184. return loc, []
  1185. def postParse( self, instring, loc, tokenlist ):
  1186. return tokenlist
  1187. #~ @profile
  1188. def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
  1189. debugging = ( self.debug ) #and doActions )
  1190. if debugging or self.failAction:
  1191. #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
  1192. if (self.debugActions[0] ):
  1193. self.debugActions[0]( instring, loc, self )
  1194. if callPreParse and self.callPreparse:
  1195. preloc = self.preParse( instring, loc )
  1196. else:
  1197. preloc = loc
  1198. tokensStart = preloc
  1199. try:
  1200. try:
  1201. loc,tokens = self.parseImpl( instring, preloc, doActions )
  1202. except IndexError:
  1203. raise ParseException( instring, len(instring), self.errmsg, self )
  1204. except ParseBaseException as err:
  1205. #~ print ("Exception raised:", err)
  1206. if self.debugActions[2]:
  1207. self.debugActions[2]( instring, tokensStart, self, err )
  1208. if self.failAction:
  1209. self.failAction( instring, tokensStart, self, err )
  1210. raise
  1211. else:
  1212. if callPreParse and self.callPreparse:
  1213. preloc = self.preParse( instring, loc )
  1214. else:
  1215. preloc = loc
  1216. tokensStart = preloc
  1217. if self.mayIndexError or preloc >= len(instring):
  1218. try:
  1219. loc,tokens = self.parseImpl( instring, preloc, doActions )
  1220. except IndexError:
  1221. raise ParseException( instring, len(instring), self.errmsg, self )
  1222. else:
  1223. loc,tokens = self.parseImpl( instring, preloc, doActions )
  1224. tokens = self.postParse( instring, loc, tokens )
  1225. retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
  1226. if self.parseAction and (doActions or self.callDuringTry):
  1227. if debugging:
  1228. try:
  1229. for fn in self.parseAction:
  1230. try:
  1231. tokens = fn( instring, tokensStart, retTokens )
  1232. except IndexError as parse_action_exc:
  1233. exc = ParseException("exception raised in parse action")
  1234. exc.__cause__ = parse_action_exc
  1235. raise exc
  1236. if tokens is not None and tokens is not retTokens:
  1237. retTokens = ParseResults( tokens,
  1238. self.resultsName,
  1239. asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
  1240. modal=self.modalResults )
  1241. except ParseBaseException as err:
  1242. #~ print "Exception raised in user parse action:", err
  1243. if (self.debugActions[2] ):
  1244. self.debugActions[2]( instring, tokensStart, self, err )
  1245. raise
  1246. else:
  1247. for fn in self.parseAction:
  1248. try:
  1249. tokens = fn( instring, tokensStart, retTokens )
  1250. except IndexError as parse_action_exc:
  1251. exc = ParseException("exception raised in parse action")
  1252. exc.__cause__ = parse_action_exc
  1253. raise exc
  1254. if tokens is not None and tokens is not retTokens:
  1255. retTokens = ParseResults( tokens,
  1256. self.resultsName,
  1257. asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
  1258. modal=self.modalResults )
  1259. if debugging:
  1260. #~ print ("Matched",self,"->",retTokens.asList())
  1261. if (self.debugActions[1] ):
  1262. self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
  1263. return loc, retTokens
  1264. def tryParse( self, instring, loc ):
  1265. try:
  1266. return self._parse( instring, loc, doActions=False )[0]
  1267. except ParseFatalException:
  1268. raise ParseException( instring, loc, self.errmsg, self)
  1269. def canParseNext(self, instring, loc):
  1270. try:
  1271. self.tryParse(instring, loc)
  1272. except (ParseException, IndexError):
  1273. return False
  1274. else:
  1275. return True
  1276. class _UnboundedCache(object):
  1277. def __init__(self):
  1278. cache = {}
  1279. self.not_in_cache = not_in_cache = object()
  1280. def get(self, key):
  1281. return cache.get(key, not_in_cache)
  1282. def set(self, key, value):
  1283. cache[key] = value
  1284. def clear(self):
  1285. cache.clear()
  1286. def cache_len(self):
  1287. return len(cache)
  1288. self.get = types.MethodType(get, self)
  1289. self.set = types.MethodType(set, self)
  1290. self.clear = types.MethodType(clear, self)
  1291. self.__len__ = types.MethodType(cache_len, self)
  1292. if _OrderedDict is not None:
  1293. class _FifoCache(object):
  1294. def __init__(self, size):
  1295. self.not_in_cache = not_in_cache = object()
  1296. cache = _OrderedDict()
  1297. def get(self, key):
  1298. return cache.get(key, not_in_cache)
  1299. def set(self, key, value):
  1300. cache[key] = value
  1301. while len(cache) > size:
  1302. try:
  1303. cache.popitem(False)
  1304. except KeyError:
  1305. pass
  1306. def clear(self):
  1307. cache.clear()
  1308. def cache_len(self):
  1309. return len(cache)
  1310. self.get = types.MethodType(get, self)
  1311. self.set = types.MethodType(set, self)
  1312. self.clear = types.MethodType(clear, self)
  1313. self.__len__ = types.MethodType(cache_len, self)
  1314. else:
  1315. class _FifoCache(object):
  1316. def __init__(self, size):
  1317. self.not_in_cache = not_in_cache = object()
  1318. cache = {}
  1319. key_fifo = collections.deque([], size)
  1320. def get(self, key):
  1321. return cache.get(key, not_in_cache)
  1322. def set(self, key, value):
  1323. cache[key] = value
  1324. while len(key_fifo) > size:
  1325. cache.pop(key_fifo.popleft(), None)
  1326. key_fifo.append(key)
  1327. def clear(self):
  1328. cache.clear()
  1329. key_fifo.clear()
  1330. def cache_len(self):
  1331. return len(cache)
  1332. self.get = types.MethodType(get, self)
  1333. self.set = types.MethodType(set, self)
  1334. self.clear = types.MethodType(clear, self)
  1335. self.__len__ = types.MethodType(cache_len, self)
  1336. # argument cache for optimizing repeated calls when backtracking through recursive expressions
  1337. packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail
  1338. packrat_cache_lock = RLock()
  1339. packrat_cache_stats = [0, 0]
  1340. # this method gets repeatedly called during backtracking with the same arguments -
  1341. # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
  1342. def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
  1343. HIT, MISS = 0, 1
  1344. lookup = (self, instring, loc, callPreParse, doActions)
  1345. with ParserElement.packrat_cache_lock:
  1346. cache = ParserElement.packrat_cache
  1347. value = cache.get(lookup)
  1348. if value is cache.not_in_cache:
  1349. ParserElement.packrat_cache_stats[MISS] += 1
  1350. try:
  1351. value = self._parseNoCache(instring, loc, doActions, callPreParse)
  1352. except ParseBaseException as pe:
  1353. # cache a copy of the exception, without the traceback
  1354. cache.set(lookup, pe.__class__(*pe.args))
  1355. raise
  1356. else:
  1357. cache.set(lookup, (value[0], value[1].copy()))
  1358. return value
  1359. else:
  1360. ParserElement.packrat_cache_stats[HIT] += 1
  1361. if isinstance(value, Exception):
  1362. raise value
  1363. return (value[0], value[1].copy())
  1364. _parse = _parseNoCache
  1365. @staticmethod
  1366. def resetCache():
  1367. ParserElement.packrat_cache.clear()
  1368. ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats)
  1369. _packratEnabled = False
  1370. @staticmethod
  1371. def enablePackrat(cache_size_limit=128):
  1372. """Enables "packrat" parsing, which adds memoizing to the parsing logic.
  1373. Repeated parse attempts at the same string location (which happens
  1374. often in many complex grammars) can immediately return a cached value,
  1375. instead of re-executing parsing/validating code. Memoizing is done of
  1376. both valid results and parsing exceptions.
  1377. Parameters:
  1378. - cache_size_limit - (default=C{128}) - if an integer value is provided
  1379. will limit the size of the packrat cache; if None is passed, then
  1380. the cache size will be unbounded; if 0 is passed, the cache will
  1381. be effectively disabled.
  1382. This speedup may break existing programs that use parse actions that
  1383. have side-effects. For this reason, packrat parsing is disabled when
  1384. you first import pyparsing. To activate the packrat feature, your
  1385. program must call the class method C{ParserElement.enablePackrat()}. If
  1386. your program uses C{psyco} to "compile as you go", you must call
  1387. C{enablePackrat} before calling C{psyco.full()}. If you do not do this,
  1388. Python will crash. For best results, call C{enablePackrat()} immediately
  1389. after importing pyparsing.
  1390. Example::
  1391. import pyparsing
  1392. pyparsing.ParserElement.enablePackrat()
  1393. """
  1394. if not ParserElement._packratEnabled:
  1395. ParserElement._packratEnabled = True
  1396. if cache_size_limit is None:
  1397. ParserElement.packrat_cache = ParserElement._UnboundedCache()
  1398. else:
  1399. ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit)
  1400. ParserElement._parse = ParserElement._parseCache
  1401. def parseString( self, instring, parseAll=False ):
  1402. """
  1403. Execute the parse expression with the given string.
  1404. This is the main interface to the client code, once the complete
  1405. expression has been built.
  1406. If you want the grammar to require that the entire input string be
  1407. successfully parsed, then set C{parseAll} to True (equivalent to ending
  1408. the grammar with C{L{StringEnd()}}).
  1409. Note: C{parseString} implicitly calls C{expandtabs()} on the input string,
  1410. in order to report proper column numbers in parse actions.
  1411. If the input string contains tabs and
  1412. the grammar uses parse actions that use the C{loc} argument to index into the
  1413. string being parsed, you can ensure you have a consistent view of the input
  1414. string by:
  1415. - calling C{parseWithTabs} on your grammar before calling C{parseString}
  1416. (see L{I{parseWithTabs}<parseWithTabs>})
  1417. - define your parse action using the full C{(s,loc,toks)} signature, and
  1418. reference the input string using the parse action's C{s} argument
  1419. - explictly expand the tabs in your input string before calling
  1420. C{parseString}
  1421. Example::
  1422. Word('a').parseString('aaaaabaaa') # -> ['aaaaa']
  1423. Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text
  1424. """
  1425. ParserElement.resetCache()
  1426. if not self.streamlined:
  1427. self.streamline()
  1428. #~ self.saveAsList = True
  1429. for e in self.ignoreExprs:
  1430. e.streamline()
  1431. if not self.keepTabs:
  1432. instring = instring.expandtabs()
  1433. try:
  1434. loc, tokens = self._parse( instring, 0 )
  1435. if parseAll:
  1436. loc = self.preParse( instring, loc )
  1437. se = Empty() + StringEnd()
  1438. se._parse( instring, loc )
  1439. except ParseBaseException as exc:
  1440. if ParserElement.verbose_stacktrace:
  1441. raise
  1442. else:
  1443. # catch and re-raise exception from here, clears out pyparsing internal stack trace
  1444. raise exc
  1445. else:
  1446. return tokens
  1447. def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
  1448. """
  1449. Scan the input string for expression matches. Each match will return the
  1450. matching tokens, start location, and end location. May be called with optional
  1451. C{maxMatches} argument, to clip scanning after 'n' matches are found. If
  1452. C{overlap} is specified, then overlapping matches will be reported.
  1453. Note that the start and end locations are reported relative to the string
  1454. being parsed. See L{I{parseString}<parseString>} for more information on parsing
  1455. strings with embedded tabs.
  1456. Example::
  1457. source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"
  1458. print(source)
  1459. for tokens,start,end in Word(alphas).scanString(source):
  1460. print(' '*start + '^'*(end-start))
  1461. print(' '*start + tokens[0])
  1462. prints::
  1463. sldjf123lsdjjkf345sldkjf879lkjsfd987
  1464. ^^^^^
  1465. sldjf
  1466. ^^^^^^^
  1467. lsdjjkf
  1468. ^^^^^^
  1469. sldkjf
  1470. ^^^^^^
  1471. lkjsfd
  1472. """
  1473. if not self.streamlined:
  1474. self.streamline()
  1475. for e in self.ignoreExprs:
  1476. e.streamline()
  1477. if not self.keepTabs:
  1478. instring = _ustr(instring).expandtabs()
  1479. instrlen = len(instring)
  1480. loc = 0
  1481. preparseFn = self.preParse
  1482. parseFn = self._parse
  1483. ParserElement.resetCache()
  1484. matches = 0
  1485. try:
  1486. while loc <= instrlen and matches < maxMatches:
  1487. try:
  1488. preloc = preparseFn( instring, loc )
  1489. nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
  1490. except ParseException:
  1491. loc = preloc+1
  1492. else:
  1493. if nextLoc > loc:
  1494. matches += 1
  1495. yield tokens, preloc, nextLoc
  1496. if overlap:
  1497. nextloc = preparseFn( instring, loc )
  1498. if nextloc > loc:
  1499. loc = nextLoc
  1500. else:
  1501. loc += 1
  1502. else:
  1503. loc = nextLoc
  1504. else:
  1505. loc = preloc+1
  1506. except ParseBaseException as exc:
  1507. if ParserElement.verbose_stacktrace:
  1508. raise
  1509. else:
  1510. # catch and re-raise exception from here, clears out pyparsing internal stack trace
  1511. raise exc
  1512. def transformString( self, instring ):
  1513. """
  1514. Extension to C{L{scanString}}, to modify matching text with modified tokens that may
  1515. be returned from a parse action. To use C{transformString}, define a grammar and
  1516. attach a parse action to it that modifies the returned token list.
  1517. Invoking C{transformString()} on a target string will then scan for matches,
  1518. and replace the matched text patterns according to the logic in the parse
  1519. action. C{transformString()} returns the resulting transformed string.
  1520. Example::
  1521. wd = Word(alphas)
  1522. wd.setParseAction(lambda toks: toks[0].title())
  1523. print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york."))
  1524. Prints::
  1525. Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York.
  1526. """
  1527. out = []
  1528. lastE = 0
  1529. # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
  1530. # keep string locs straight between transformString and scanString
  1531. self.keepTabs = True
  1532. try:
  1533. for t,s,e in self.scanString( instring ):
  1534. out.append( instring[lastE:s] )
  1535. if t:
  1536. if isinstance(t,ParseResults):
  1537. out += t.asList()
  1538. elif isinstance(t,list):
  1539. out += t
  1540. else:
  1541. out.append(t)
  1542. lastE = e
  1543. out.append(instring[lastE:])
  1544. out = [o for o in out if o]
  1545. return "".join(map(_ustr,_flatten(out)))
  1546. except ParseBaseException as exc:
  1547. if ParserElement.verbose_stacktrace:
  1548. raise
  1549. else:
  1550. # catch and re-raise exception from here, clears out pyparsing internal stack trace
  1551. raise exc
  1552. def searchString( self, instring, maxMatches=_MAX_INT ):
  1553. """
  1554. Another extension to C{L{scanString}}, simplifying the access to the tokens found
  1555. to match the given parse expression. May be called with optional
  1556. C{maxMatches} argument, to clip searching after 'n' matches are found.
  1557. Example::
  1558. # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters
  1559. cap_word = Word(alphas.upper(), alphas.lower())
  1560. print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))
  1561. # the sum() builtin can be used to merge results into a single ParseResults object
  1562. print(sum(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")))
  1563. prints::
  1564. [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']]
  1565. ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity']
  1566. """
  1567. try:
  1568. return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
  1569. except ParseBaseException as exc:
  1570. if ParserElement.verbose_stacktrace:
  1571. raise
  1572. else:
  1573. # catch and re-raise exception from here, clears out pyparsing internal stack trace
  1574. raise exc
  1575. def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):
  1576. """
  1577. Generator method to split a string using the given expression as a separator.
  1578. May be called with optional C{maxsplit} argument, to limit the number of splits;
  1579. and the optional C{includeSeparators} argument (default=C{False}), if the separating
  1580. matching text should be included in the split results.
  1581. Example::
  1582. punc = oneOf(list(".,;:/-!?"))
  1583. print(list(punc.split("This, this?, this sentence, is badly punctuated!")))
  1584. prints::
  1585. ['This', ' this', '', ' this sentence', ' is badly punctuated', '']
  1586. """
  1587. splits = 0
  1588. last = 0
  1589. for t,s,e in self.scanString(instring, maxMatches=maxsplit):
  1590. yield instring[last:s]
  1591. if includeSeparators:
  1592. yield t[0]
  1593. last = e
  1594. yield instring[last:]
  1595. def __add__(self, other ):
  1596. """
  1597. Implementation of + operator - returns C{L{And}}. Adding strings to a ParserElement
  1598. converts them to L{Literal}s by default.
  1599. Example::
  1600. greet = Word(alphas) + "," + Word(alphas) + "!"
  1601. hello = "Hello, World!"
  1602. print (hello, "->", greet.parseString(hello))
  1603. Prints::
  1604. Hello, World! -> ['Hello', ',', 'World', '!']
  1605. """
  1606. if isinstance( other, basestring ):
  1607. other = ParserElement._literalStringClass( other )
  1608. if not isinstance( other, ParserElement ):
  1609. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1610. SyntaxWarning, stacklevel=2)
  1611. return None
  1612. return And( [ self, other ] )
  1613. def __radd__(self, other ):
  1614. """
  1615. Implementation of + operator when left operand is not a C{L{ParserElement}}
  1616. """
  1617. if isinstance( other, basestring ):
  1618. other = ParserElement._literalStringClass( other )
  1619. if not isinstance( other, ParserElement ):
  1620. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1621. SyntaxWarning, stacklevel=2)
  1622. return None
  1623. return other + self
  1624. def __sub__(self, other):
  1625. """
  1626. Implementation of - operator, returns C{L{And}} with error stop
  1627. """
  1628. if isinstance( other, basestring ):
  1629. other = ParserElement._literalStringClass( other )
  1630. if not isinstance( other, ParserElement ):
  1631. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1632. SyntaxWarning, stacklevel=2)
  1633. return None
  1634. return self + And._ErrorStop() + other
  1635. def __rsub__(self, other ):
  1636. """
  1637. Implementation of - operator when left operand is not a C{L{ParserElement}}
  1638. """
  1639. if isinstance( other, basestring ):
  1640. other = ParserElement._literalStringClass( other )
  1641. if not isinstance( other, ParserElement ):
  1642. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1643. SyntaxWarning, stacklevel=2)
  1644. return None
  1645. return other - self
  1646. def __mul__(self,other):
  1647. """
  1648. Implementation of * operator, allows use of C{expr * 3} in place of
  1649. C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer
  1650. tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples
  1651. may also include C{None} as in:
  1652. - C{expr*(n,None)} or C{expr*(n,)} is equivalent
  1653. to C{expr*n + L{ZeroOrMore}(expr)}
  1654. (read as "at least n instances of C{expr}")
  1655. - C{expr*(None,n)} is equivalent to C{expr*(0,n)}
  1656. (read as "0 to n instances of C{expr}")
  1657. - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)}
  1658. - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)}
  1659. Note that C{expr*(None,n)} does not raise an exception if
  1660. more than n exprs exist in the input stream; that is,
  1661. C{expr*(None,n)} does not enforce a maximum number of expr
  1662. occurrences. If this behavior is desired, then write
  1663. C{expr*(None,n) + ~expr}
  1664. """
  1665. if isinstance(other,int):
  1666. minElements, optElements = other,0
  1667. elif isinstance(other,tuple):
  1668. other = (other + (None, None))[:2]
  1669. if other[0] is None:
  1670. other = (0, other[1])
  1671. if isinstance(other[0],int) and other[1] is None:
  1672. if other[0] == 0:
  1673. return ZeroOrMore(self)
  1674. if other[0] == 1:
  1675. return OneOrMore(self)
  1676. else:
  1677. return self*other[0] + ZeroOrMore(self)
  1678. elif isinstance(other[0],int) and isinstance(other[1],int):
  1679. minElements, optElements = other
  1680. optElements -= minElements
  1681. else:
  1682. raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
  1683. else:
  1684. raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
  1685. if minElements < 0:
  1686. raise ValueError("cannot multiply ParserElement by negative value")
  1687. if optElements < 0:
  1688. raise ValueError("second tuple value must be greater or equal to first tuple value")
  1689. if minElements == optElements == 0:
  1690. raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
  1691. if (optElements):
  1692. def makeOptionalList(n):
  1693. if n>1:
  1694. return Optional(self + makeOptionalList(n-1))
  1695. else:
  1696. return Optional(self)
  1697. if minElements:
  1698. if minElements == 1:
  1699. ret = self + makeOptionalList(optElements)
  1700. else:
  1701. ret = And([self]*minElements) + makeOptionalList(optElements)
  1702. else:
  1703. ret = makeOptionalList(optElements)
  1704. else:
  1705. if minElements == 1:
  1706. ret = self
  1707. else:
  1708. ret = And([self]*minElements)
  1709. return ret
  1710. def __rmul__(self, other):
  1711. return self.__mul__(other)
  1712. def __or__(self, other ):
  1713. """
  1714. Implementation of | operator - returns C{L{MatchFirst}}
  1715. """
  1716. if isinstance( other, basestring ):
  1717. other = ParserElement._literalStringClass( other )
  1718. if not isinstance( other, ParserElement ):
  1719. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1720. SyntaxWarning, stacklevel=2)
  1721. return None
  1722. return MatchFirst( [ self, other ] )
  1723. def __ror__(self, other ):
  1724. """
  1725. Implementation of | operator when left operand is not a C{L{ParserElement}}
  1726. """
  1727. if isinstance( other, basestring ):
  1728. other = ParserElement._literalStringClass( other )
  1729. if not isinstance( other, ParserElement ):
  1730. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1731. SyntaxWarning, stacklevel=2)
  1732. return None
  1733. return other | self
  1734. def __xor__(self, other ):
  1735. """
  1736. Implementation of ^ operator - returns C{L{Or}}
  1737. """
  1738. if isinstance( other, basestring ):
  1739. other = ParserElement._literalStringClass( other )
  1740. if not isinstance( other, ParserElement ):
  1741. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1742. SyntaxWarning, stacklevel=2)
  1743. return None
  1744. return Or( [ self, other ] )
  1745. def __rxor__(self, other ):
  1746. """
  1747. Implementation of ^ operator when left operand is not a C{L{ParserElement}}
  1748. """
  1749. if isinstance( other, basestring ):
  1750. other = ParserElement._literalStringClass( other )
  1751. if not isinstance( other, ParserElement ):
  1752. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1753. SyntaxWarning, stacklevel=2)
  1754. return None
  1755. return other ^ self
  1756. def __and__(self, other ):
  1757. """
  1758. Implementation of & operator - returns C{L{Each}}
  1759. """
  1760. if isinstance( other, basestring ):
  1761. other = ParserElement._literalStringClass( other )
  1762. if not isinstance( other, ParserElement ):
  1763. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1764. SyntaxWarning, stacklevel=2)
  1765. return None
  1766. return Each( [ self, other ] )
  1767. def __rand__(self, other ):
  1768. """
  1769. Implementation of & operator when left operand is not a C{L{ParserElement}}
  1770. """
  1771. if isinstance( other, basestring ):
  1772. other = ParserElement._literalStringClass( other )
  1773. if not isinstance( other, ParserElement ):
  1774. warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
  1775. SyntaxWarning, stacklevel=2)
  1776. return None
  1777. return other & self
  1778. def __invert__( self ):
  1779. """
  1780. Implementation of ~ operator - returns C{L{NotAny}}
  1781. """
  1782. return NotAny( self )
  1783. def __call__(self, name=None):
  1784. """
  1785. Shortcut for C{L{setResultsName}}, with C{listAllMatches=False}.
  1786. If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be
  1787. passed as C{True}.
  1788. If C{name} is omitted, same as calling C{L{copy}}.
  1789. Example::
  1790. # these are equivalent
  1791. userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
  1792. userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
  1793. """
  1794. if name is not None:
  1795. return self.setResultsName(name)
  1796. else:
  1797. return self.copy()
  1798. def suppress( self ):
  1799. """
  1800. Suppresses the output of this C{ParserElement}; useful to keep punctuation from
  1801. cluttering up returned output.
  1802. """
  1803. return Suppress( self )
  1804. def leaveWhitespace( self ):
  1805. """
  1806. Disables the skipping of whitespace before matching the characters in the
  1807. C{ParserElement}'s defined pattern. This is normally only used internally by
  1808. the pyparsing module, but may be needed in some whitespace-sensitive grammars.
  1809. """
  1810. self.skipWhitespace = False
  1811. return self
  1812. def setWhitespaceChars( self, chars ):
  1813. """
  1814. Overrides the default whitespace chars
  1815. """
  1816. self.skipWhitespace = True
  1817. self.whiteChars = chars
  1818. self.copyDefaultWhiteChars = False
  1819. return self
  1820. def parseWithTabs( self ):
  1821. """
  1822. Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string.
  1823. Must be called before C{parseString} when the input grammar contains elements that
  1824. match C{<TAB>} characters.
  1825. """
  1826. self.keepTabs = True
  1827. return self
  1828. def ignore( self, other ):
  1829. """
  1830. Define expression to be ignored (e.g., comments) while doing pattern
  1831. matching; may be called repeatedly, to define multiple comment or other
  1832. ignorable patterns.
  1833. Example::
  1834. patt = OneOrMore(Word(alphas))
  1835. patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj']
  1836. patt.ignore(cStyleComment)
  1837. patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd']
  1838. """
  1839. if isinstance(other, basestring):
  1840. other = Suppress(other)
  1841. if isinstance( other, Suppress ):
  1842. if other not in self.ignoreExprs:
  1843. self.ignoreExprs.append(other)
  1844. else:
  1845. self.ignoreExprs.append( Suppress( other.copy() ) )
  1846. return self
  1847. def setDebugActions( self, startAction, successAction, exceptionAction ):
  1848. """
  1849. Enable display of debugging messages while doing pattern matching.
  1850. """
  1851. self.debugActions = (startAction or _defaultStartDebugAction,
  1852. successAction or _defaultSuccessDebugAction,
  1853. exceptionAction or _defaultExceptionDebugAction)
  1854. self.debug = True
  1855. return self
  1856. def setDebug( self, flag=True ):
  1857. """
  1858. Enable display of debugging messages while doing pattern matching.
  1859. Set C{flag} to True to enable, False to disable.
  1860. Example::
  1861. wd = Word(alphas).setName("alphaword")
  1862. integer = Word(nums).setName("numword")
  1863. term = wd | integer
  1864. # turn on debugging for wd
  1865. wd.setDebug()
  1866. OneOrMore(term).parseString("abc 123 xyz 890")
  1867. prints::
  1868. Match alphaword at loc 0(1,1)
  1869. Matched alphaword -> ['abc']
  1870. Match alphaword at loc 3(1,4)
  1871. Exception raised:Expected alphaword (at char 4), (line:1, col:5)
  1872. Match alphaword at loc 7(1,8)
  1873. Matched alphaword -> ['xyz']
  1874. Match alphaword at loc 11(1,12)
  1875. Exception raised:Expected alphaword (at char 12), (line:1, col:13)
  1876. Match alphaword at loc 15(1,16)
  1877. Exception raised:Expected alphaword (at char 15), (line:1, col:16)
  1878. The output shown is that produced by the default debug actions - custom debug actions can be
  1879. specified using L{setDebugActions}. Prior to attempting
  1880. to match the C{wd} expression, the debugging message C{"Match <exprname> at loc <n>(<line>,<col>)"}
  1881. is shown. Then if the parse succeeds, a C{"Matched"} message is shown, or an C{"Exception raised"}
  1882. message is shown. Also note the use of L{setName} to assign a human-readable name to the expression,
  1883. which makes debugging and exception messages easier to understand - for instance, the default
  1884. name created for the C{Word} expression without calling C{setName} is C{"W:(ABCD...)"}.
  1885. """
  1886. if flag:
  1887. self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
  1888. else:
  1889. self.debug = False
  1890. return self
  1891. def __str__( self ):
  1892. return self.name
  1893. def __repr__( self ):
  1894. return _ustr(self)
  1895. def streamline( self ):
  1896. self.streamlined = True
  1897. self.strRepr = None
  1898. return self
  1899. def checkRecursion( self, parseElementList ):
  1900. pass
  1901. def validate( self, validateTrace=[] ):
  1902. """
  1903. Check defined expressions for valid structure, check for infinite recursive definitions.
  1904. """
  1905. self.checkRecursion( [] )
  1906. def parseFile( self, file_or_filename, parseAll=False ):
  1907. """
  1908. Execute the parse expression on the given file or filename.
  1909. If a filename is specified (instead of a file object),
  1910. the entire file is opened, read, and closed before parsing.
  1911. """
  1912. try:
  1913. file_contents = file_or_filename.read()
  1914. except AttributeError:
  1915. with open(file_or_filename, "r") as f:
  1916. file_contents = f.read()
  1917. try:
  1918. return self.parseString(file_contents, parseAll)
  1919. except ParseBaseException as exc:
  1920. if ParserElement.verbose_stacktrace:
  1921. raise
  1922. else:
  1923. # catch and re-raise exception from here, clears out pyparsing internal stack trace
  1924. raise exc
  1925. def __eq__(self,other):
  1926. if isinstance(other, ParserElement):
  1927. return self is other or vars(self) == vars(other)
  1928. elif isinstance(other, basestring):
  1929. return self.matches(other)
  1930. else:
  1931. return super(ParserElement,self)==other
  1932. def __ne__(self,other):
  1933. return not (self == other)
  1934. def __hash__(self):
  1935. return hash(id(self))
  1936. def __req__(self,other):
  1937. return self == other
  1938. def __rne__(self,other):
  1939. return not (self == other)
  1940. def matches(self, testString, parseAll=True):
  1941. """
  1942. Method for quick testing of a parser against a test string. Good for simple
  1943. inline microtests of sub expressions while building up larger parser.
  1944. Parameters:
  1945. - testString - to test against this expression for a match
  1946. - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests
  1947. Example::
  1948. expr = Word(nums)
  1949. assert expr.matches("100")
  1950. """
  1951. try:
  1952. self.parseString(_ustr(testString), parseAll=parseAll)
  1953. return True
  1954. except ParseBaseException:
  1955. return False
  1956. def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResults=True, failureTests=False):
  1957. """
  1958. Execute the parse expression on a series of test strings, showing each
  1959. test, the parsed results or where the parse failed. Quick and easy way to
  1960. run a parse expression against a list of sample strings.
  1961. Parameters:
  1962. - tests - a list of separate test strings, or a multiline string of test strings
  1963. - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests
  1964. - comment - (default=C{'#'}) - expression for indicating embedded comments in the test
  1965. string; pass None to disable comment filtering
  1966. - fullDump - (default=C{True}) - dump results as list followed by results names in nested outline;
  1967. if False, only dump nested list
  1968. - printResults - (default=C{True}) prints test output to stdout
  1969. - failureTests - (default=C{False}) indicates if these tests are expected to fail parsing
  1970. Returns: a (success, results) tuple, where success indicates that all tests succeeded
  1971. (or failed if C{failureTests} is True), and the results contain a list of lines of each
  1972. test's output
  1973. Example::
  1974. number_expr = pyparsing_common.number.copy()
  1975. result = number_expr.runTests('''
  1976. # unsigned integer
  1977. 100
  1978. # negative integer
  1979. -100
  1980. # float with scientific notation
  1981. 6.02e23
  1982. # integer with scientific notation
  1983. 1e-12
  1984. ''')
  1985. print("Success" if result[0] else "Failed!")
  1986. result = number_expr.runTests('''
  1987. # stray character
  1988. 100Z
  1989. # missing leading digit before '.'
  1990. -.100
  1991. # too many '.'
  1992. 3.14.159
  1993. ''', failureTests=True)
  1994. print("Success" if result[0] else "Failed!")
  1995. prints::
  1996. # unsigned integer
  1997. 100
  1998. [100]
  1999. # negative integer
  2000. -100
  2001. [-100]
  2002. # float with scientific notation
  2003. 6.02e23
  2004. [6.02e+23]
  2005. # integer with scientific notation
  2006. 1e-12
  2007. [1e-12]
  2008. Success
  2009. # stray character
  2010. 100Z
  2011. ^
  2012. FAIL: Expected end of text (at char 3), (line:1, col:4)
  2013. # missing leading digit before '.'
  2014. -.100
  2015. ^
  2016. FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)
  2017. # too many '.'
  2018. 3.14.159
  2019. ^
  2020. FAIL: Expected end of text (at char 4), (line:1, col:5)
  2021. Success
  2022. Each test string must be on a single line. If you want to test a string that spans multiple
  2023. lines, create a test like this::
  2024. expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines")
  2025. (Note that this is a raw string literal, you must include the leading 'r'.)
  2026. """
  2027. if isinstance(tests, basestring):
  2028. tests = list(map(str.strip, tests.rstrip().splitlines()))
  2029. if isinstance(comment, basestring):
  2030. comment = Literal(comment)
  2031. allResults = []
  2032. comments = []
  2033. success = True
  2034. for t in tests:
  2035. if comment is not None and comment.matches(t, False) or comments and not t:
  2036. comments.append(t)
  2037. continue
  2038. if not t:
  2039. continue
  2040. out = ['\n'.join(comments), t]
  2041. comments = []
  2042. try:
  2043. t = t.replace(r'\n','\n')
  2044. result = self.parseString(t, parseAll=parseAll)
  2045. out.append(result.dump(full=fullDump))
  2046. success = success and not failureTests
  2047. except ParseBaseException as pe:
  2048. fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""
  2049. if '\n' in t:
  2050. out.append(line(pe.loc, t))
  2051. out.append(' '*(col(pe.loc,t)-1) + '^' + fatal)
  2052. else:
  2053. out.append(' '*pe.loc + '^' + fatal)
  2054. out.append("FAIL: " + str(pe))
  2055. success = success and failureTests
  2056. result = pe
  2057. except Exception as exc:
  2058. out.append("FAIL-EXCEPTION: " + str(exc))
  2059. success = success and failureTests
  2060. result = exc
  2061. if printResults:
  2062. if fullDump:
  2063. out.append('')
  2064. print('\n'.join(out))
  2065. allResults.append((t, result))
  2066. return success, allResults
  2067. class Token(ParserElement):
  2068. """
  2069. Abstract C{ParserElement} subclass, for defining atomic matching patterns.
  2070. """
  2071. def __init__( self ):
  2072. super(Token,self).__init__( savelist=False )
  2073. class Empty(Token):
  2074. """
  2075. An empty token, will always match.
  2076. """
  2077. def __init__( self ):
  2078. super(Empty,self).__init__()
  2079. self.name = "Empty"
  2080. self.mayReturnEmpty = True
  2081. self.mayIndexError = False
  2082. class NoMatch(Token):
  2083. """
  2084. A token that will never match.
  2085. """
  2086. def __init__( self ):
  2087. super(NoMatch,self).__init__()
  2088. self.name = "NoMatch"
  2089. self.mayReturnEmpty = True
  2090. self.mayIndexError = False
  2091. self.errmsg = "Unmatchable token"
  2092. def parseImpl( self, instring, loc, doActions=True ):
  2093. raise ParseException(instring, loc, self.errmsg, self)
  2094. class Literal(Token):
  2095. """
  2096. Token to exactly match a specified string.
  2097. Example::
  2098. Literal('blah').parseString('blah') # -> ['blah']
  2099. Literal('blah').parseString('blahfooblah') # -> ['blah']
  2100. Literal('blah').parseString('bla') # -> Exception: Expected "blah"
  2101. For case-insensitive matching, use L{CaselessLiteral}.
  2102. For keyword matching (force word break before and after the matched string),
  2103. use L{Keyword} or L{CaselessKeyword}.
  2104. """
  2105. def __init__( self, matchString ):
  2106. super(Literal,self).__init__()
  2107. self.match = matchString
  2108. self.matchLen = len(matchString)
  2109. try:
  2110. self.firstMatchChar = matchString[0]
  2111. except IndexError:
  2112. warnings.warn("null string passed to Literal; use Empty() instead",
  2113. SyntaxWarning, stacklevel=2)
  2114. self.__class__ = Empty
  2115. self.name = '"%s"' % _ustr(self.match)
  2116. self.errmsg = "Expected " + self.name
  2117. self.mayReturnEmpty = False
  2118. self.mayIndexError = False
  2119. # Performance tuning: this routine gets called a *lot*
  2120. # if this is a single character match string and the first character matches,
  2121. # short-circuit as quickly as possible, and avoid calling startswith
  2122. #~ @profile
  2123. def parseImpl( self, instring, loc, doActions=True ):
  2124. if (instring[loc] == self.firstMatchChar and
  2125. (self.matchLen==1 or instring.startswith(self.match,loc)) ):
  2126. return loc+self.matchLen, self.match
  2127. raise ParseException(instring, loc, self.errmsg, self)
  2128. _L = Literal
  2129. ParserElement._literalStringClass = Literal
  2130. class Keyword(Token):
  2131. """
  2132. Token to exactly match a specified string as a keyword, that is, it must be
  2133. immediately followed by a non-keyword character. Compare with C{L{Literal}}:
  2134. - C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}.
  2135. - C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
  2136. Accepts two optional constructor arguments in addition to the keyword string:
  2137. - C{identChars} is a string of characters that would be valid identifier characters,
  2138. defaulting to all alphanumerics + "_" and "$"
  2139. - C{caseless} allows case-insensitive matching, default is C{False}.
  2140. Example::
  2141. Keyword("start").parseString("start") # -> ['start']
  2142. Keyword("start").parseString("starting") # -> Exception
  2143. For case-insensitive matching, use L{CaselessKeyword}.
  2144. """
  2145. DEFAULT_KEYWORD_CHARS = alphanums+"_$"
  2146. def __init__( self, matchString, identChars=None, caseless=False ):
  2147. super(Keyword,self).__init__()
  2148. if identChars is None:
  2149. identChars = Keyword.DEFAULT_KEYWORD_CHARS
  2150. self.match = matchString
  2151. self.matchLen = len(matchString)
  2152. try:
  2153. self.firstMatchChar = matchString[0]
  2154. except IndexError:
  2155. warnings.warn("null string passed to Keyword; use Empty() instead",
  2156. SyntaxWarning, stacklevel=2)
  2157. self.name = '"%s"' % self.match
  2158. self.errmsg = "Expected " + self.name
  2159. self.mayReturnEmpty = False
  2160. self.mayIndexError = False
  2161. self.caseless = caseless
  2162. if caseless:
  2163. self.caselessmatch = matchString.upper()
  2164. identChars = identChars.upper()
  2165. self.identChars = set(identChars)
  2166. def parseImpl( self, instring, loc, doActions=True ):
  2167. if self.caseless:
  2168. if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
  2169. (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
  2170. (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
  2171. return loc+self.matchLen, self.match
  2172. else:
  2173. if (instring[loc] == self.firstMatchChar and
  2174. (self.matchLen==1 or instring.startswith(self.match,loc)) and
  2175. (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
  2176. (loc == 0 or instring[loc-1] not in self.identChars) ):
  2177. return loc+self.matchLen, self.match
  2178. raise ParseException(instring, loc, self.errmsg, self)
  2179. def copy(self):
  2180. c = super(Keyword,self).copy()
  2181. c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
  2182. return c
  2183. @staticmethod
  2184. def setDefaultKeywordChars( chars ):
  2185. """Overrides the default Keyword chars
  2186. """
  2187. Keyword.DEFAULT_KEYWORD_CHARS = chars
  2188. class CaselessLiteral(Literal):
  2189. """
  2190. Token to match a specified string, ignoring case of letters.
  2191. Note: the matched results will always be in the case of the given
  2192. match string, NOT the case of the input text.
  2193. Example::
  2194. OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD']
  2195. (Contrast with example for L{CaselessKeyword}.)
  2196. """
  2197. def __init__( self, matchString ):
  2198. super(CaselessLiteral,self).__init__( matchString.upper() )
  2199. # Preserve the defining literal.
  2200. self.returnString = matchString
  2201. self.name = "'%s'" % self.returnString
  2202. self.errmsg = "Expected " + self.name
  2203. def parseImpl( self, instring, loc, doActions=True ):
  2204. if instring[ loc:loc+self.matchLen ].upper() == self.match:
  2205. return loc+self.matchLen, self.returnString
  2206. raise ParseException(instring, loc, self.errmsg, self)
  2207. class CaselessKeyword(Keyword):
  2208. """
  2209. Caseless version of L{Keyword}.
  2210. Example::
  2211. OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD']
  2212. (Contrast with example for L{CaselessLiteral}.)
  2213. """
  2214. def __init__( self, matchString, identChars=None ):
  2215. super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
  2216. def parseImpl( self, instring, loc, doActions=True ):
  2217. if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
  2218. (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
  2219. return loc+self.matchLen, self.match
  2220. raise ParseException(instring, loc, self.errmsg, self)
  2221. class CloseMatch(Token):
  2222. """
  2223. A variation on L{Literal} which matches "close" matches, that is,
  2224. strings with at most 'n' mismatching characters. C{CloseMatch} takes parameters:
  2225. - C{match_string} - string to be matched
  2226. - C{maxMismatches} - (C{default=1}) maximum number of mismatches allowed to count as a match
  2227. The results from a successful parse will contain the matched text from the input string and the following named results:
  2228. - C{mismatches} - a list of the positions within the match_string where mismatches were found
  2229. - C{original} - the original match_string used to compare against the input string
  2230. If C{mismatches} is an empty list, then the match was an exact match.
  2231. Example::
  2232. patt = CloseMatch("ATCATCGAATGGA")
  2233. patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})
  2234. patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)
  2235. # exact match
  2236. patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})
  2237. # close match allowing up to 2 mismatches
  2238. patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2)
  2239. patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})
  2240. """
  2241. def __init__(self, match_string, maxMismatches=1):
  2242. super(CloseMatch,self).__init__()
  2243. self.name = match_string
  2244. self.match_string = match_string
  2245. self.maxMismatches = maxMismatches
  2246. self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches)
  2247. self.mayIndexError = False
  2248. self.mayReturnEmpty = False
  2249. def parseImpl( self, instring, loc, doActions=True ):
  2250. start = loc
  2251. instrlen = len(instring)
  2252. maxloc = start + len(self.match_string)
  2253. if maxloc <= instrlen:
  2254. match_string = self.match_string
  2255. match_stringloc = 0
  2256. mismatches = []
  2257. maxMismatches = self.maxMismatches
  2258. for match_stringloc,s_m in enumerate(zip(instring[loc:maxloc], self.match_string)):
  2259. src,mat = s_m
  2260. if src != mat:
  2261. mismatches.append(match_stringloc)
  2262. if len(mismatches) > maxMismatches:
  2263. break
  2264. else:
  2265. loc = match_stringloc + 1
  2266. results = ParseResults([instring[start:loc]])
  2267. results['original'] = self.match_string
  2268. results['mismatches'] = mismatches
  2269. return loc, results
  2270. raise ParseException(instring, loc, self.errmsg, self)
  2271. class Word(Token):
  2272. """
  2273. Token for matching words composed of allowed character sets.
  2274. Defined with string containing all allowed initial characters,
  2275. an optional string containing allowed body characters (if omitted,
  2276. defaults to the initial character set), and an optional minimum,
  2277. maximum, and/or exact length. The default value for C{min} is 1 (a
  2278. minimum value < 1 is not valid); the default values for C{max} and C{exact}
  2279. are 0, meaning no maximum or exact length restriction. An optional
  2280. C{excludeChars} parameter can list characters that might be found in
  2281. the input C{bodyChars} string; useful to define a word of all printables
  2282. except for one or two characters, for instance.
  2283. L{srange} is useful for defining custom character set strings for defining
  2284. C{Word} expressions, using range notation from regular expression character sets.
  2285. A common mistake is to use C{Word} to match a specific literal string, as in
  2286. C{Word("Address")}. Remember that C{Word} uses the string argument to define
  2287. I{sets} of matchable characters. This expression would match "Add", "AAA",
  2288. "dAred", or any other word made up of the characters 'A', 'd', 'r', 'e', and 's'.
  2289. To match an exact literal string, use L{Literal} or L{Keyword}.
  2290. pyparsing includes helper strings for building Words:
  2291. - L{alphas}
  2292. - L{nums}
  2293. - L{alphanums}
  2294. - L{hexnums}
  2295. - L{alphas8bit} (alphabetic characters in ASCII range 128-255 - accented, tilded, umlauted, etc.)
  2296. - L{punc8bit} (non-alphabetic characters in ASCII range 128-255 - currency, symbols, superscripts, diacriticals, etc.)
  2297. - L{printables} (any non-whitespace character)
  2298. Example::
  2299. # a word composed of digits
  2300. integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))
  2301. # a word with a leading capital, and zero or more lowercase
  2302. capital_word = Word(alphas.upper(), alphas.lower())
  2303. # hostnames are alphanumeric, with leading alpha, and '-'
  2304. hostname = Word(alphas, alphanums+'-')
  2305. # roman numeral (not a strict parser, accepts invalid mix of characters)
  2306. roman = Word("IVXLCDM")
  2307. # any string of non-whitespace characters, except for ','
  2308. csv_value = Word(printables, excludeChars=",")
  2309. """
  2310. def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
  2311. super(Word,self).__init__()
  2312. if excludeChars:
  2313. initChars = ''.join(c for c in initChars if c not in excludeChars)
  2314. if bodyChars:
  2315. bodyChars = ''.join(c for c in bodyChars if c not in excludeChars)
  2316. self.initCharsOrig = initChars
  2317. self.initChars = set(initChars)
  2318. if bodyChars :
  2319. self.bodyCharsOrig = bodyChars
  2320. self.bodyChars = set(bodyChars)
  2321. else:
  2322. self.bodyCharsOrig = initChars
  2323. self.bodyChars = set(initChars)
  2324. self.maxSpecified = max > 0
  2325. if min < 1:
  2326. raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
  2327. self.minLen = min
  2328. if max > 0:
  2329. self.maxLen = max
  2330. else:
  2331. self.maxLen = _MAX_INT
  2332. if exact > 0:
  2333. self.maxLen = exact
  2334. self.minLen = exact
  2335. self.name = _ustr(self)
  2336. self.errmsg = "Expected " + self.name
  2337. self.mayIndexError = False
  2338. self.asKeyword = asKeyword
  2339. if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
  2340. if self.bodyCharsOrig == self.initCharsOrig:
  2341. self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
  2342. elif len(self.initCharsOrig) == 1:
  2343. self.reString = "%s[%s]*" % \
  2344. (re.escape(self.initCharsOrig),
  2345. _escapeRegexRangeChars(self.bodyCharsOrig),)
  2346. else:
  2347. self.reString = "[%s][%s]*" % \
  2348. (_escapeRegexRangeChars(self.initCharsOrig),
  2349. _escapeRegexRangeChars(self.bodyCharsOrig),)
  2350. if self.asKeyword:
  2351. self.reString = r"\b"+self.reString+r"\b"
  2352. try:
  2353. self.re = re.compile( self.reString )
  2354. except Exception:
  2355. self.re = None
  2356. def parseImpl( self, instring, loc, doActions=True ):
  2357. if self.re:
  2358. result = self.re.match(instring,loc)
  2359. if not result:
  2360. raise ParseException(instring, loc, self.errmsg, self)
  2361. loc = result.end()
  2362. return loc, result.group()
  2363. if not(instring[ loc ] in self.initChars):
  2364. raise ParseException(instring, loc, self.errmsg, self)
  2365. start = loc
  2366. loc += 1
  2367. instrlen = len(instring)
  2368. bodychars = self.bodyChars
  2369. maxloc = start + self.maxLen
  2370. maxloc = min( maxloc, instrlen )
  2371. while loc < maxloc and instring[loc] in bodychars:
  2372. loc += 1
  2373. throwException = False
  2374. if loc - start < self.minLen:
  2375. throwException = True
  2376. if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
  2377. throwException = True
  2378. if self.asKeyword:
  2379. if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
  2380. throwException = True
  2381. if throwException:
  2382. raise ParseException(instring, loc, self.errmsg, self)
  2383. return loc, instring[start:loc]
  2384. def __str__( self ):
  2385. try:
  2386. return super(Word,self).__str__()
  2387. except Exception:
  2388. pass
  2389. if self.strRepr is None:
  2390. def charsAsStr(s):
  2391. if len(s)>4:
  2392. return s[:4]+"..."
  2393. else:
  2394. return s
  2395. if ( self.initCharsOrig != self.bodyCharsOrig ):
  2396. self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
  2397. else:
  2398. self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
  2399. return self.strRepr
  2400. class Char(Word):
  2401. """
  2402. A short-cut class for defining C{Word(characters, exact=1)},
  2403. when defining a match of any single character in a string of characters.
  2404. """
  2405. def __init__(self, charset):
  2406. super(Char, self).__init__(charset, exact=1)
  2407. self.reString = "[%s]" % _escapeRegexRangeChars(self.initCharsOrig)
  2408. self.re = re.compile( self.reString )
  2409. class Regex(Token):
  2410. r"""
  2411. Token for matching strings that match a given regular expression.
  2412. Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
  2413. If the given regex contains named groups (defined using C{(?P<name>...)}), these will be preserved as
  2414. named parse results.
  2415. Example::
  2416. realnum = Regex(r"[+-]?\d+\.\d*")
  2417. date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')
  2418. # ref: http://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
  2419. roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
  2420. make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>")
  2421. print(make_html.transformString("h1:main title:"))
  2422. # prints "<h1>main title</h1>"
  2423. """
  2424. compiledREtype = type(re.compile("[A-Z]"))
  2425. def __init__( self, pattern, flags=0, asGroupList=False, asMatch=False):
  2426. """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags."""
  2427. super(Regex,self).__init__()
  2428. if isinstance(pattern, basestring):
  2429. if not pattern:
  2430. warnings.warn("null string passed to Regex; use Empty() instead",
  2431. SyntaxWarning, stacklevel=2)
  2432. self.pattern = pattern
  2433. self.flags = flags
  2434. try:
  2435. self.re = re.compile(self.pattern, self.flags)
  2436. self.reString = self.pattern
  2437. except sre_constants.error:
  2438. warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
  2439. SyntaxWarning, stacklevel=2)
  2440. raise
  2441. elif isinstance(pattern, Regex.compiledREtype):
  2442. self.re = pattern
  2443. self.pattern = \
  2444. self.reString = str(pattern)
  2445. self.flags = flags
  2446. else:
  2447. raise ValueError("Regex may only be constructed with a string or a compiled RE object")
  2448. self.name = _ustr(self)
  2449. self.errmsg = "Expected " + self.name
  2450. self.mayIndexError = False
  2451. self.mayReturnEmpty = True
  2452. self.asGroupList = asGroupList
  2453. self.asMatch = asMatch
  2454. def parseImpl( self, instring, loc, doActions=True ):
  2455. result = self.re.match(instring,loc)
  2456. if not result:
  2457. raise ParseException(instring, loc, self.errmsg, self)
  2458. loc = result.end()
  2459. d = result.groupdict()
  2460. if self.asMatch:
  2461. ret = result
  2462. elif self.asGroupList:
  2463. ret = result.groups()
  2464. else:
  2465. ret = ParseResults(result.group())
  2466. if d:
  2467. for k in d:
  2468. ret[k] = d[k]
  2469. return loc,ret
  2470. def __str__( self ):
  2471. try:
  2472. return super(Regex,self).__str__()
  2473. except Exception:
  2474. pass
  2475. if self.strRepr is None:
  2476. self.strRepr = "Re:(%s)" % repr(self.pattern)
  2477. return self.strRepr
  2478. def sub(self, repl):
  2479. """
  2480. Return Regex with an attached parse action to transform the parsed
  2481. result as if called using C{re.sub(expr, repl, string)}.
  2482. """
  2483. if self.asGroupList:
  2484. warnings.warn("cannot use sub() with Regex(asGroupList=True)",
  2485. SyntaxWarning, stacklevel=2)
  2486. raise SyntaxError()
  2487. if self.asMatch and callable(repl):
  2488. warnings.warn("cannot use sub() with a callable with Regex(asMatch=True)",
  2489. SyntaxWarning, stacklevel=2)
  2490. raise SyntaxError()
  2491. if self.asMatch:
  2492. def pa(tokens):
  2493. return tokens[0].expand(repl)
  2494. else:
  2495. def pa(tokens):
  2496. return self.re.sub(repl, tokens[0])
  2497. return self.addParseAction(pa)
  2498. class QuotedString(Token):
  2499. r"""
  2500. Token for matching strings that are delimited by quoting characters.
  2501. Defined with the following parameters:
  2502. - quoteChar - string of one or more characters defining the quote delimiting string
  2503. - escChar - character to escape quotes, typically backslash (default=C{None})
  2504. - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=C{None})
  2505. - multiline - boolean indicating whether quotes can span multiple lines (default=C{False})
  2506. - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True})
  2507. - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar)
  2508. - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True})
  2509. Example::
  2510. qs = QuotedString('"')
  2511. print(qs.searchString('lsjdf "This is the quote" sldjf'))
  2512. complex_qs = QuotedString('{{', endQuoteChar='}}')
  2513. print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf'))
  2514. sql_qs = QuotedString('"', escQuote='""')
  2515. print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))
  2516. prints::
  2517. [['This is the quote']]
  2518. [['This is the "quote"']]
  2519. [['This is the quote with "embedded" quotes']]
  2520. """
  2521. def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
  2522. super(QuotedString,self).__init__()
  2523. # remove white space from quote chars - wont work anyway
  2524. quoteChar = quoteChar.strip()
  2525. if not quoteChar:
  2526. warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
  2527. raise SyntaxError()
  2528. if endQuoteChar is None:
  2529. endQuoteChar = quoteChar
  2530. else:
  2531. endQuoteChar = endQuoteChar.strip()
  2532. if not endQuoteChar:
  2533. warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
  2534. raise SyntaxError()
  2535. self.quoteChar = quoteChar
  2536. self.quoteCharLen = len(quoteChar)
  2537. self.firstQuoteChar = quoteChar[0]
  2538. self.endQuoteChar = endQuoteChar
  2539. self.endQuoteCharLen = len(endQuoteChar)
  2540. self.escChar = escChar
  2541. self.escQuote = escQuote
  2542. self.unquoteResults = unquoteResults
  2543. self.convertWhitespaceEscapes = convertWhitespaceEscapes
  2544. if multiline:
  2545. self.flags = re.MULTILINE | re.DOTALL
  2546. self.pattern = r'%s(?:[^%s%s]' % \
  2547. ( re.escape(self.quoteChar),
  2548. _escapeRegexRangeChars(self.endQuoteChar[0]),
  2549. (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
  2550. else:
  2551. self.flags = 0
  2552. self.pattern = r'%s(?:[^%s\n\r%s]' % \
  2553. ( re.escape(self.quoteChar),
  2554. _escapeRegexRangeChars(self.endQuoteChar[0]),
  2555. (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
  2556. if len(self.endQuoteChar) > 1:
  2557. self.pattern += (
  2558. '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
  2559. _escapeRegexRangeChars(self.endQuoteChar[i]))
  2560. for i in range(len(self.endQuoteChar)-1,0,-1)) + ')'
  2561. )
  2562. if escQuote:
  2563. self.pattern += (r'|(?:%s)' % re.escape(escQuote))
  2564. if escChar:
  2565. self.pattern += (r'|(?:%s.)' % re.escape(escChar))
  2566. self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
  2567. self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
  2568. try:
  2569. self.re = re.compile(self.pattern, self.flags)
  2570. self.reString = self.pattern
  2571. except sre_constants.error:
  2572. warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
  2573. SyntaxWarning, stacklevel=2)
  2574. raise
  2575. self.name = _ustr(self)
  2576. self.errmsg = "Expected " + self.name
  2577. self.mayIndexError = False
  2578. self.mayReturnEmpty = True
  2579. def parseImpl( self, instring, loc, doActions=True ):
  2580. result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
  2581. if not result:
  2582. raise ParseException(instring, loc, self.errmsg, self)
  2583. loc = result.end()
  2584. ret = result.group()
  2585. if self.unquoteResults:
  2586. # strip off quotes
  2587. ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
  2588. if isinstance(ret,basestring):
  2589. # replace escaped whitespace
  2590. if '\\' in ret and self.convertWhitespaceEscapes:
  2591. ws_map = {
  2592. r'\t' : '\t',
  2593. r'\n' : '\n',
  2594. r'\f' : '\f',
  2595. r'\r' : '\r',
  2596. }
  2597. for wslit,wschar in ws_map.items():
  2598. ret = ret.replace(wslit, wschar)
  2599. # replace escaped characters
  2600. if self.escChar:
  2601. ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret)
  2602. # replace escaped quotes
  2603. if self.escQuote:
  2604. ret = ret.replace(self.escQuote, self.endQuoteChar)
  2605. return loc, ret
  2606. def __str__( self ):
  2607. try:
  2608. return super(QuotedString,self).__str__()
  2609. except Exception:
  2610. pass
  2611. if self.strRepr is None:
  2612. self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
  2613. return self.strRepr
  2614. class CharsNotIn(Token):
  2615. """
  2616. Token for matching words composed of characters I{not} in a given set (will
  2617. include whitespace in matched characters if not listed in the provided exclusion set - see example).
  2618. Defined with string containing all disallowed characters, and an optional
  2619. minimum, maximum, and/or exact length. The default value for C{min} is 1 (a
  2620. minimum value < 1 is not valid); the default values for C{max} and C{exact}
  2621. are 0, meaning no maximum or exact length restriction.
  2622. Example::
  2623. # define a comma-separated-value as anything that is not a ','
  2624. csv_value = CharsNotIn(',')
  2625. print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213"))
  2626. prints::
  2627. ['dkls', 'lsdkjf', 's12 34', '@!#', '213']
  2628. """
  2629. def __init__( self, notChars, min=1, max=0, exact=0 ):
  2630. super(CharsNotIn,self).__init__()
  2631. self.skipWhitespace = False
  2632. self.notChars = notChars
  2633. if min < 1:
  2634. raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
  2635. self.minLen = min
  2636. if max > 0:
  2637. self.maxLen = max
  2638. else:
  2639. self.maxLen = _MAX_INT
  2640. if exact > 0:
  2641. self.maxLen = exact
  2642. self.minLen = exact
  2643. self.name = _ustr(self)
  2644. self.errmsg = "Expected " + self.name
  2645. self.mayReturnEmpty = ( self.minLen == 0 )
  2646. self.mayIndexError = False
  2647. def parseImpl( self, instring, loc, doActions=True ):
  2648. if instring[loc] in self.notChars:
  2649. raise ParseException(instring, loc, self.errmsg, self)
  2650. start = loc
  2651. loc += 1
  2652. notchars = self.notChars
  2653. maxlen = min( start+self.maxLen, len(instring) )
  2654. while loc < maxlen and \
  2655. (instring[loc] not in notchars):
  2656. loc += 1
  2657. if loc - start < self.minLen:
  2658. raise ParseException(instring, loc, self.errmsg, self)
  2659. return loc, instring[start:loc]
  2660. def __str__( self ):
  2661. try:
  2662. return super(CharsNotIn, self).__str__()
  2663. except Exception:
  2664. pass
  2665. if self.strRepr is None:
  2666. if len(self.notChars) > 4:
  2667. self.strRepr = "!W:(%s...)" % self.notChars[:4]
  2668. else:
  2669. self.strRepr = "!W:(%s)" % self.notChars
  2670. return self.strRepr
  2671. class White(Token):
  2672. """
  2673. Special matching class for matching whitespace. Normally, whitespace is ignored
  2674. by pyparsing grammars. This class is included when some whitespace structures
  2675. are significant. Define with a string containing the whitespace characters to be
  2676. matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments,
  2677. as defined for the C{L{Word}} class.
  2678. """
  2679. whiteStrs = {
  2680. " " : "<SPC>",
  2681. "\t": "<TAB>",
  2682. "\n": "<LF>",
  2683. "\r": "<CR>",
  2684. "\f": "<FF>",
  2685. }
  2686. def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
  2687. super(White,self).__init__()
  2688. self.matchWhite = ws
  2689. self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) )
  2690. #~ self.leaveWhitespace()
  2691. self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite))
  2692. self.mayReturnEmpty = True
  2693. self.errmsg = "Expected " + self.name
  2694. self.minLen = min
  2695. if max > 0:
  2696. self.maxLen = max
  2697. else:
  2698. self.maxLen = _MAX_INT
  2699. if exact > 0:
  2700. self.maxLen = exact
  2701. self.minLen = exact
  2702. def parseImpl( self, instring, loc, doActions=True ):
  2703. if not(instring[ loc ] in self.matchWhite):
  2704. raise ParseException(instring, loc, self.errmsg, self)
  2705. start = loc
  2706. loc += 1
  2707. maxloc = start + self.maxLen
  2708. maxloc = min( maxloc, len(instring) )
  2709. while loc < maxloc and instring[loc] in self.matchWhite:
  2710. loc += 1
  2711. if loc - start < self.minLen:
  2712. raise ParseException(instring, loc, self.errmsg, self)
  2713. return loc, instring[start:loc]
  2714. class _PositionToken(Token):
  2715. def __init__( self ):
  2716. super(_PositionToken,self).__init__()
  2717. self.name=self.__class__.__name__
  2718. self.mayReturnEmpty = True
  2719. self.mayIndexError = False
  2720. class GoToColumn(_PositionToken):
  2721. """
  2722. Token to advance to a specific column of input text; useful for tabular report scraping.
  2723. """
  2724. def __init__( self, colno ):
  2725. super(GoToColumn,self).__init__()
  2726. self.col = colno
  2727. def preParse( self, instring, loc ):
  2728. if col(loc,instring) != self.col:
  2729. instrlen = len(instring)
  2730. if self.ignoreExprs:
  2731. loc = self._skipIgnorables( instring, loc )
  2732. while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
  2733. loc += 1
  2734. return loc
  2735. def parseImpl( self, instring, loc, doActions=True ):
  2736. thiscol = col( loc, instring )
  2737. if thiscol > self.col:
  2738. raise ParseException( instring, loc, "Text not in expected column", self )
  2739. newloc = loc + self.col - thiscol
  2740. ret = instring[ loc: newloc ]
  2741. return newloc, ret
  2742. class LineStart(_PositionToken):
  2743. """
  2744. Matches if current position is at the beginning of a line within the parse string
  2745. Example::
  2746. test = '''\
  2747. AAA this line
  2748. AAA and this line
  2749. AAA but not this one
  2750. B AAA and definitely not this one
  2751. '''
  2752. for t in (LineStart() + 'AAA' + restOfLine).searchString(test):
  2753. print(t)
  2754. Prints::
  2755. ['AAA', ' this line']
  2756. ['AAA', ' and this line']
  2757. """
  2758. def __init__( self ):
  2759. super(LineStart,self).__init__()
  2760. self.errmsg = "Expected start of line"
  2761. def parseImpl( self, instring, loc, doActions=True ):
  2762. if col(loc, instring) == 1:
  2763. return loc, []
  2764. raise ParseException(instring, loc, self.errmsg, self)
  2765. class LineEnd(_PositionToken):
  2766. """
  2767. Matches if current position is at the end of a line within the parse string
  2768. """
  2769. def __init__( self ):
  2770. super(LineEnd,self).__init__()
  2771. self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
  2772. self.errmsg = "Expected end of line"
  2773. def parseImpl( self, instring, loc, doActions=True ):
  2774. if loc<len(instring):
  2775. if instring[loc] == "\n":
  2776. return loc+1, "\n"
  2777. else:
  2778. raise ParseException(instring, loc, self.errmsg, self)
  2779. elif loc == len(instring):
  2780. return loc+1, []
  2781. else:
  2782. raise ParseException(instring, loc, self.errmsg, self)
  2783. class StringStart(_PositionToken):
  2784. """
  2785. Matches if current position is at the beginning of the parse string
  2786. """
  2787. def __init__( self ):
  2788. super(StringStart,self).__init__()
  2789. self.errmsg = "Expected start of text"
  2790. def parseImpl( self, instring, loc, doActions=True ):
  2791. if loc != 0:
  2792. # see if entire string up to here is just whitespace and ignoreables
  2793. if loc != self.preParse( instring, 0 ):
  2794. raise ParseException(instring, loc, self.errmsg, self)
  2795. return loc, []
  2796. class StringEnd(_PositionToken):
  2797. """
  2798. Matches if current position is at the end of the parse string
  2799. """
  2800. def __init__( self ):
  2801. super(StringEnd,self).__init__()
  2802. self.errmsg = "Expected end of text"
  2803. def parseImpl( self, instring, loc, doActions=True ):
  2804. if loc < len(instring):
  2805. raise ParseException(instring, loc, self.errmsg, self)
  2806. elif loc == len(instring):
  2807. return loc+1, []
  2808. elif loc > len(instring):
  2809. return loc, []
  2810. else:
  2811. raise ParseException(instring, loc, self.errmsg, self)
  2812. class WordStart(_PositionToken):
  2813. """
  2814. Matches if the current position is at the beginning of a Word, and
  2815. is not preceded by any character in a given set of C{wordChars}
  2816. (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
  2817. use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of
  2818. the string being parsed, or at the beginning of a line.
  2819. """
  2820. def __init__(self, wordChars = printables):
  2821. super(WordStart,self).__init__()
  2822. self.wordChars = set(wordChars)
  2823. self.errmsg = "Not at the start of a word"
  2824. def parseImpl(self, instring, loc, doActions=True ):
  2825. if loc != 0:
  2826. if (instring[loc-1] in self.wordChars or
  2827. instring[loc] not in self.wordChars):
  2828. raise ParseException(instring, loc, self.errmsg, self)
  2829. return loc, []
  2830. class WordEnd(_PositionToken):
  2831. """
  2832. Matches if the current position is at the end of a Word, and
  2833. is not followed by any character in a given set of C{wordChars}
  2834. (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
  2835. use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of
  2836. the string being parsed, or at the end of a line.
  2837. """
  2838. def __init__(self, wordChars = printables):
  2839. super(WordEnd,self).__init__()
  2840. self.wordChars = set(wordChars)
  2841. self.skipWhitespace = False
  2842. self.errmsg = "Not at the end of a word"
  2843. def parseImpl(self, instring, loc, doActions=True ):
  2844. instrlen = len(instring)
  2845. if instrlen>0 and loc<instrlen:
  2846. if (instring[loc] in self.wordChars or
  2847. instring[loc-1] not in self.wordChars):
  2848. raise ParseException(instring, loc, self.errmsg, self)
  2849. return loc, []
  2850. class ParseExpression(ParserElement):
  2851. """
  2852. Abstract subclass of ParserElement, for combining and post-processing parsed tokens.
  2853. """
  2854. def __init__( self, exprs, savelist = False ):
  2855. super(ParseExpression,self).__init__(savelist)
  2856. if isinstance( exprs, _generatorType ):
  2857. exprs = list(exprs)
  2858. if isinstance( exprs, basestring ):
  2859. self.exprs = [ ParserElement._literalStringClass( exprs ) ]
  2860. elif isinstance( exprs, Iterable ):
  2861. exprs = list(exprs)
  2862. # if sequence of strings provided, wrap with Literal
  2863. if all(isinstance(expr, basestring) for expr in exprs):
  2864. exprs = map(ParserElement._literalStringClass, exprs)
  2865. self.exprs = list(exprs)
  2866. else:
  2867. try:
  2868. self.exprs = list( exprs )
  2869. except TypeError:
  2870. self.exprs = [ exprs ]
  2871. self.callPreparse = False
  2872. def __getitem__( self, i ):
  2873. return self.exprs[i]
  2874. def append( self, other ):
  2875. self.exprs.append( other )
  2876. self.strRepr = None
  2877. return self
  2878. def leaveWhitespace( self ):
  2879. """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on
  2880. all contained expressions."""
  2881. self.skipWhitespace = False
  2882. self.exprs = [ e.copy() for e in self.exprs ]
  2883. for e in self.exprs:
  2884. e.leaveWhitespace()
  2885. return self
  2886. def ignore( self, other ):
  2887. if isinstance( other, Suppress ):
  2888. if other not in self.ignoreExprs:
  2889. super( ParseExpression, self).ignore( other )
  2890. for e in self.exprs:
  2891. e.ignore( self.ignoreExprs[-1] )
  2892. else:
  2893. super( ParseExpression, self).ignore( other )
  2894. for e in self.exprs:
  2895. e.ignore( self.ignoreExprs[-1] )
  2896. return self
  2897. def __str__( self ):
  2898. try:
  2899. return super(ParseExpression,self).__str__()
  2900. except Exception:
  2901. pass
  2902. if self.strRepr is None:
  2903. self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
  2904. return self.strRepr
  2905. def streamline( self ):
  2906. super(ParseExpression,self).streamline()
  2907. for e in self.exprs:
  2908. e.streamline()
  2909. # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
  2910. # but only if there are no parse actions or resultsNames on the nested And's
  2911. # (likewise for Or's and MatchFirst's)
  2912. if ( len(self.exprs) == 2 ):
  2913. other = self.exprs[0]
  2914. if ( isinstance( other, self.__class__ ) and
  2915. not(other.parseAction) and
  2916. other.resultsName is None and
  2917. not other.debug ):
  2918. self.exprs = other.exprs[:] + [ self.exprs[1] ]
  2919. self.strRepr = None
  2920. self.mayReturnEmpty |= other.mayReturnEmpty
  2921. self.mayIndexError |= other.mayIndexError
  2922. other = self.exprs[-1]
  2923. if ( isinstance( other, self.__class__ ) and
  2924. not(other.parseAction) and
  2925. other.resultsName is None and
  2926. not other.debug ):
  2927. self.exprs = self.exprs[:-1] + other.exprs[:]
  2928. self.strRepr = None
  2929. self.mayReturnEmpty |= other.mayReturnEmpty
  2930. self.mayIndexError |= other.mayIndexError
  2931. self.errmsg = "Expected " + _ustr(self)
  2932. return self
  2933. def setResultsName( self, name, listAllMatches=False ):
  2934. ret = super(ParseExpression,self).setResultsName(name,listAllMatches)
  2935. return ret
  2936. def validate( self, validateTrace=[] ):
  2937. tmp = validateTrace[:]+[self]
  2938. for e in self.exprs:
  2939. e.validate(tmp)
  2940. self.checkRecursion( [] )
  2941. def copy(self):
  2942. ret = super(ParseExpression,self).copy()
  2943. ret.exprs = [e.copy() for e in self.exprs]
  2944. return ret
  2945. class And(ParseExpression):
  2946. """
  2947. Requires all given C{ParseExpression}s to be found in the given order.
  2948. Expressions may be separated by whitespace.
  2949. May be constructed using the C{'+'} operator.
  2950. May also be constructed using the C{'-'} operator, which will suppress backtracking.
  2951. Example::
  2952. integer = Word(nums)
  2953. name_expr = OneOrMore(Word(alphas))
  2954. expr = And([integer("id"),name_expr("name"),integer("age")])
  2955. # more easily written as:
  2956. expr = integer("id") + name_expr("name") + integer("age")
  2957. """
  2958. class _ErrorStop(Empty):
  2959. def __init__(self, *args, **kwargs):
  2960. super(And._ErrorStop,self).__init__(*args, **kwargs)
  2961. self.name = '-'
  2962. self.leaveWhitespace()
  2963. def __init__( self, exprs, savelist = True ):
  2964. super(And,self).__init__(exprs, savelist)
  2965. self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
  2966. self.setWhitespaceChars( self.exprs[0].whiteChars )
  2967. self.skipWhitespace = self.exprs[0].skipWhitespace
  2968. self.callPreparse = True
  2969. def parseImpl( self, instring, loc, doActions=True ):
  2970. # pass False as last arg to _parse for first element, since we already
  2971. # pre-parsed the string as part of our And pre-parsing
  2972. loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
  2973. errorStop = False
  2974. for e in self.exprs[1:]:
  2975. if isinstance(e, And._ErrorStop):
  2976. errorStop = True
  2977. continue
  2978. if errorStop:
  2979. try:
  2980. loc, exprtokens = e._parse( instring, loc, doActions )
  2981. except ParseSyntaxException:
  2982. raise
  2983. except ParseBaseException as pe:
  2984. pe.__traceback__ = None
  2985. raise ParseSyntaxException._from_exception(pe)
  2986. except IndexError:
  2987. raise ParseSyntaxException(instring, len(instring), self.errmsg, self)
  2988. else:
  2989. loc, exprtokens = e._parse( instring, loc, doActions )
  2990. if exprtokens or exprtokens.haskeys():
  2991. resultlist += exprtokens
  2992. return loc, resultlist
  2993. def __iadd__(self, other ):
  2994. if isinstance( other, basestring ):
  2995. other = ParserElement._literalStringClass( other )
  2996. return self.append( other ) #And( [ self, other ] )
  2997. def checkRecursion( self, parseElementList ):
  2998. subRecCheckList = parseElementList[:] + [ self ]
  2999. for e in self.exprs:
  3000. e.checkRecursion( subRecCheckList )
  3001. if not e.mayReturnEmpty:
  3002. break
  3003. def __str__( self ):
  3004. if hasattr(self,"name"):
  3005. return self.name
  3006. if self.strRepr is None:
  3007. self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}"
  3008. return self.strRepr
  3009. class Or(ParseExpression):
  3010. """
  3011. Requires that at least one C{ParseExpression} is found.
  3012. If two expressions match, the expression that matches the longest string will be used.
  3013. May be constructed using the C{'^'} operator.
  3014. Example::
  3015. # construct Or using '^' operator
  3016. number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))
  3017. print(number.searchString("123 3.1416 789"))
  3018. prints::
  3019. [['123'], ['3.1416'], ['789']]
  3020. """
  3021. def __init__( self, exprs, savelist = False ):
  3022. super(Or,self).__init__(exprs, savelist)
  3023. if self.exprs:
  3024. self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
  3025. else:
  3026. self.mayReturnEmpty = True
  3027. def parseImpl( self, instring, loc, doActions=True ):
  3028. maxExcLoc = -1
  3029. maxException = None
  3030. matches = []
  3031. for e in self.exprs:
  3032. try:
  3033. loc2 = e.tryParse( instring, loc )
  3034. except ParseException as err:
  3035. err.__traceback__ = None
  3036. if err.loc > maxExcLoc:
  3037. maxException = err
  3038. maxExcLoc = err.loc
  3039. except IndexError:
  3040. if len(instring) > maxExcLoc:
  3041. maxException = ParseException(instring,len(instring),e.errmsg,self)
  3042. maxExcLoc = len(instring)
  3043. else:
  3044. # save match among all matches, to retry longest to shortest
  3045. matches.append((loc2, e))
  3046. if matches:
  3047. matches.sort(key=lambda x: -x[0])
  3048. for _,e in matches:
  3049. try:
  3050. return e._parse( instring, loc, doActions )
  3051. except ParseException as err:
  3052. err.__traceback__ = None
  3053. if err.loc > maxExcLoc:
  3054. maxException = err
  3055. maxExcLoc = err.loc
  3056. if maxException is not None:
  3057. maxException.msg = self.errmsg
  3058. raise maxException
  3059. else:
  3060. raise ParseException(instring, loc, "no defined alternatives to match", self)
  3061. def __ixor__(self, other ):
  3062. if isinstance( other, basestring ):
  3063. other = ParserElement._literalStringClass( other )
  3064. return self.append( other ) #Or( [ self, other ] )
  3065. def __str__( self ):
  3066. if hasattr(self,"name"):
  3067. return self.name
  3068. if self.strRepr is None:
  3069. self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}"
  3070. return self.strRepr
  3071. def checkRecursion( self, parseElementList ):
  3072. subRecCheckList = parseElementList[:] + [ self ]
  3073. for e in self.exprs:
  3074. e.checkRecursion( subRecCheckList )
  3075. class MatchFirst(ParseExpression):
  3076. """
  3077. Requires that at least one C{ParseExpression} is found.
  3078. If two expressions match, the first one listed is the one that will match.
  3079. May be constructed using the C{'|'} operator.
  3080. Example::
  3081. # construct MatchFirst using '|' operator
  3082. # watch the order of expressions to match
  3083. number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))
  3084. print(number.searchString("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']]
  3085. # put more selective expression first
  3086. number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)
  3087. print(number.searchString("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']]
  3088. """
  3089. def __init__( self, exprs, savelist = False ):
  3090. super(MatchFirst,self).__init__(exprs, savelist)
  3091. if self.exprs:
  3092. self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
  3093. else:
  3094. self.mayReturnEmpty = True
  3095. def parseImpl( self, instring, loc, doActions=True ):
  3096. maxExcLoc = -1
  3097. maxException = None
  3098. for e in self.exprs:
  3099. try:
  3100. ret = e._parse( instring, loc, doActions )
  3101. return ret
  3102. except ParseException as err:
  3103. if err.loc > maxExcLoc:
  3104. maxException = err
  3105. maxExcLoc = err.loc
  3106. except IndexError:
  3107. if len(instring) > maxExcLoc:
  3108. maxException = ParseException(instring,len(instring),e.errmsg,self)
  3109. maxExcLoc = len(instring)
  3110. # only got here if no expression matched, raise exception for match that made it the furthest
  3111. else:
  3112. if maxException is not None:
  3113. maxException.msg = self.errmsg
  3114. raise maxException
  3115. else:
  3116. raise ParseException(instring, loc, "no defined alternatives to match", self)
  3117. def __ior__(self, other ):
  3118. if isinstance( other, basestring ):
  3119. other = ParserElement._literalStringClass( other )
  3120. return self.append( other ) #MatchFirst( [ self, other ] )
  3121. def __str__( self ):
  3122. if hasattr(self,"name"):
  3123. return self.name
  3124. if self.strRepr is None:
  3125. self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}"
  3126. return self.strRepr
  3127. def checkRecursion( self, parseElementList ):
  3128. subRecCheckList = parseElementList[:] + [ self ]
  3129. for e in self.exprs:
  3130. e.checkRecursion( subRecCheckList )
  3131. class Each(ParseExpression):
  3132. """
  3133. Requires all given C{ParseExpression}s to be found, but in any order.
  3134. Expressions may be separated by whitespace.
  3135. May be constructed using the C{'&'} operator.
  3136. Example::
  3137. color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")
  3138. shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")
  3139. integer = Word(nums)
  3140. shape_attr = "shape:" + shape_type("shape")
  3141. posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")
  3142. color_attr = "color:" + color("color")
  3143. size_attr = "size:" + integer("size")
  3144. # use Each (using operator '&') to accept attributes in any order
  3145. # (shape and posn are required, color and size are optional)
  3146. shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr)
  3147. shape_spec.runTests('''
  3148. shape: SQUARE color: BLACK posn: 100, 120
  3149. shape: CIRCLE size: 50 color: BLUE posn: 50,80
  3150. color:GREEN size:20 shape:TRIANGLE posn:20,40
  3151. '''
  3152. )
  3153. prints::
  3154. shape: SQUARE color: BLACK posn: 100, 120
  3155. ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]
  3156. - color: BLACK
  3157. - posn: ['100', ',', '120']
  3158. - x: 100
  3159. - y: 120
  3160. - shape: SQUARE
  3161. shape: CIRCLE size: 50 color: BLUE posn: 50,80
  3162. ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]
  3163. - color: BLUE
  3164. - posn: ['50', ',', '80']
  3165. - x: 50
  3166. - y: 80
  3167. - shape: CIRCLE
  3168. - size: 50
  3169. color: GREEN size: 20 shape: TRIANGLE posn: 20,40
  3170. ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]
  3171. - color: GREEN
  3172. - posn: ['20', ',', '40']
  3173. - x: 20
  3174. - y: 40
  3175. - shape: TRIANGLE
  3176. - size: 20
  3177. """
  3178. def __init__( self, exprs, savelist = True ):
  3179. super(Each,self).__init__(exprs, savelist)
  3180. self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
  3181. self.skipWhitespace = True
  3182. self.initExprGroups = True
  3183. def parseImpl( self, instring, loc, doActions=True ):
  3184. if self.initExprGroups:
  3185. self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional))
  3186. opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
  3187. opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)]
  3188. self.optionals = opt1 + opt2
  3189. self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
  3190. self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
  3191. self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
  3192. self.required += self.multirequired
  3193. self.initExprGroups = False
  3194. tmpLoc = loc
  3195. tmpReqd = self.required[:]
  3196. tmpOpt = self.optionals[:]
  3197. matchOrder = []
  3198. keepMatching = True
  3199. while keepMatching:
  3200. tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
  3201. failed = []
  3202. for e in tmpExprs:
  3203. try:
  3204. tmpLoc = e.tryParse( instring, tmpLoc )
  3205. except ParseException:
  3206. failed.append(e)
  3207. else:
  3208. matchOrder.append(self.opt1map.get(id(e),e))
  3209. if e in tmpReqd:
  3210. tmpReqd.remove(e)
  3211. elif e in tmpOpt:
  3212. tmpOpt.remove(e)
  3213. if len(failed) == len(tmpExprs):
  3214. keepMatching = False
  3215. if tmpReqd:
  3216. missing = ", ".join(_ustr(e) for e in tmpReqd)
  3217. raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
  3218. # add any unmatched Optionals, in case they have default values defined
  3219. matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt]
  3220. resultlist = []
  3221. for e in matchOrder:
  3222. loc,results = e._parse(instring,loc,doActions)
  3223. resultlist.append(results)
  3224. finalResults = sum(resultlist, ParseResults([]))
  3225. return loc, finalResults
  3226. def __str__( self ):
  3227. if hasattr(self,"name"):
  3228. return self.name
  3229. if self.strRepr is None:
  3230. self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}"
  3231. return self.strRepr
  3232. def checkRecursion( self, parseElementList ):
  3233. subRecCheckList = parseElementList[:] + [ self ]
  3234. for e in self.exprs:
  3235. e.checkRecursion( subRecCheckList )
  3236. class ParseElementEnhance(ParserElement):
  3237. """
  3238. Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens.
  3239. """
  3240. def __init__( self, expr, savelist=False ):
  3241. super(ParseElementEnhance,self).__init__(savelist)
  3242. if isinstance( expr, basestring ):
  3243. if issubclass(ParserElement._literalStringClass, Token):
  3244. expr = ParserElement._literalStringClass(expr)
  3245. else:
  3246. expr = ParserElement._literalStringClass(Literal(expr))
  3247. self.expr = expr
  3248. self.strRepr = None
  3249. if expr is not None:
  3250. self.mayIndexError = expr.mayIndexError
  3251. self.mayReturnEmpty = expr.mayReturnEmpty
  3252. self.setWhitespaceChars( expr.whiteChars )
  3253. self.skipWhitespace = expr.skipWhitespace
  3254. self.saveAsList = expr.saveAsList
  3255. self.callPreparse = expr.callPreparse
  3256. self.ignoreExprs.extend(expr.ignoreExprs)
  3257. def parseImpl( self, instring, loc, doActions=True ):
  3258. if self.expr is not None:
  3259. return self.expr._parse( instring, loc, doActions, callPreParse=False )
  3260. else:
  3261. raise ParseException("",loc,self.errmsg,self)
  3262. def leaveWhitespace( self ):
  3263. self.skipWhitespace = False
  3264. self.expr = self.expr.copy()
  3265. if self.expr is not None:
  3266. self.expr.leaveWhitespace()
  3267. return self
  3268. def ignore( self, other ):
  3269. if isinstance( other, Suppress ):
  3270. if other not in self.ignoreExprs:
  3271. super( ParseElementEnhance, self).ignore( other )
  3272. if self.expr is not None:
  3273. self.expr.ignore( self.ignoreExprs[-1] )
  3274. else:
  3275. super( ParseElementEnhance, self).ignore( other )
  3276. if self.expr is not None:
  3277. self.expr.ignore( self.ignoreExprs[-1] )
  3278. return self
  3279. def streamline( self ):
  3280. super(ParseElementEnhance,self).streamline()
  3281. if self.expr is not None:
  3282. self.expr.streamline()
  3283. return self
  3284. def checkRecursion( self, parseElementList ):
  3285. if self in parseElementList:
  3286. raise RecursiveGrammarException( parseElementList+[self] )
  3287. subRecCheckList = parseElementList[:] + [ self ]
  3288. if self.expr is not None:
  3289. self.expr.checkRecursion( subRecCheckList )
  3290. def validate( self, validateTrace=[] ):
  3291. tmp = validateTrace[:]+[self]
  3292. if self.expr is not None:
  3293. self.expr.validate(tmp)
  3294. self.checkRecursion( [] )
  3295. def __str__( self ):
  3296. try:
  3297. return super(ParseElementEnhance,self).__str__()
  3298. except Exception:
  3299. pass
  3300. if self.strRepr is None and self.expr is not None:
  3301. self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
  3302. return self.strRepr
  3303. class FollowedBy(ParseElementEnhance):
  3304. """
  3305. Lookahead matching of the given parse expression. C{FollowedBy}
  3306. does I{not} advance the parsing position within the input string, it only
  3307. verifies that the specified parse expression matches at the current
  3308. position. C{FollowedBy} always returns a null token list. If any
  3309. results names are defined in the lookahead expression, those *will* be
  3310. returned for access by name.
  3311. Example::
  3312. # use FollowedBy to match a label only if it is followed by a ':'
  3313. data_word = Word(alphas)
  3314. label = data_word + FollowedBy(':')
  3315. attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
  3316. OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint()
  3317. prints::
  3318. [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]
  3319. """
  3320. def __init__( self, expr ):
  3321. super(FollowedBy,self).__init__(expr)
  3322. self.mayReturnEmpty = True
  3323. def parseImpl( self, instring, loc, doActions=True ):
  3324. _, ret = self.expr._parse(instring, loc, doActions=doActions)
  3325. del ret[:]
  3326. return loc, ret
  3327. class PrecededBy(ParseElementEnhance):
  3328. """
  3329. Lookbehind matching of the given parse expression. C{PrecededBy}
  3330. does not advance the parsing position within the input string, it only
  3331. verifies that the specified parse expression matches prior to the current
  3332. position. C{PrecededBy} always returns a null token list, but if
  3333. a results name is defined on the given expression, it is returned.
  3334. Parameters:
  3335. - expr - expression that must match prior to the current parse location
  3336. - retreat - (default=C{None}) - (int) maximum number of characters to
  3337. lookbehind prior to the current parse location
  3338. If the lookbehind expression is a string, Literal, Keyword, or a
  3339. Word or CharsNotIn with a specified exact or maximum length, then
  3340. the retreat parameter is not required. Otherwise, retreat must be
  3341. specified to give a maximum number of characters to look back from
  3342. the current parse position for a lookbehind match.
  3343. Example::
  3344. # VB-style variable names with type prefixes
  3345. int_var = PrecededBy("#") + pyparsing_common.identifier
  3346. str_var = PrecededBy("$") + pyparsing_common.identifier
  3347. """
  3348. def __init__(self, expr, retreat=None):
  3349. super(PrecededBy, self).__init__(expr)
  3350. self.expr = self.expr().leaveWhitespace()
  3351. self.mayReturnEmpty = True
  3352. self.mayIndexError = False
  3353. self.exact = False
  3354. if isinstance(expr, str):
  3355. retreat = len(expr)
  3356. self.exact = True
  3357. elif isinstance(expr, (Literal, Keyword)):
  3358. retreat = expr.matchLen
  3359. self.exact = True
  3360. elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT:
  3361. retreat = expr.maxLen
  3362. self.exact = True
  3363. elif isinstance(expr, _PositionToken):
  3364. retreat = 0
  3365. self.exact = True
  3366. self.retreat = retreat
  3367. self.errmsg = "not preceded by " + str(expr)
  3368. self.skipWhitespace = False
  3369. def parseImpl(self, instring, loc=0, doActions=True):
  3370. if self.exact:
  3371. if loc < self.retreat:
  3372. raise ParseException(instring, loc, self.errmsg)
  3373. start = loc - self.retreat
  3374. _, ret = self.expr._parse(instring, start)
  3375. else:
  3376. # retreat specified a maximum lookbehind window, iterate
  3377. test_expr = self.expr + StringEnd()
  3378. instring_slice = instring[:loc]
  3379. last_expr = ParseException(instring, loc, self.errmsg)
  3380. for offset in range(1, min(loc, self.retreat+1)):
  3381. try:
  3382. _, ret = test_expr._parse(instring_slice, loc-offset)
  3383. except ParseBaseException as pbe:
  3384. last_expr = pbe
  3385. else:
  3386. break
  3387. else:
  3388. raise last_expr
  3389. # return empty list of tokens, but preserve any defined results names
  3390. del ret[:]
  3391. return loc, ret
  3392. class NotAny(ParseElementEnhance):
  3393. """
  3394. Lookahead to disallow matching with the given parse expression. C{NotAny}
  3395. does I{not} advance the parsing position within the input string, it only
  3396. verifies that the specified parse expression does I{not} match at the current
  3397. position. Also, C{NotAny} does I{not} skip over leading whitespace. C{NotAny}
  3398. always returns a null token list. May be constructed using the '~' operator.
  3399. Example::
  3400. AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split())
  3401. # take care not to mistake keywords for identifiers
  3402. ident = ~(AND | OR | NOT) + Word(alphas)
  3403. boolean_term = Optional(NOT) + ident
  3404. # very crude boolean expression - to support parenthesis groups and
  3405. # operation hierarchy, use infixNotation
  3406. boolean_expr = boolean_term + ZeroOrMore((AND | OR) + boolean_term)
  3407. # integers that are followed by "." are actually floats
  3408. integer = Word(nums) + ~Char(".")
  3409. """
  3410. def __init__( self, expr ):
  3411. super(NotAny,self).__init__(expr)
  3412. #~ self.leaveWhitespace()
  3413. self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
  3414. self.mayReturnEmpty = True
  3415. self.errmsg = "Found unwanted token, "+_ustr(self.expr)
  3416. def parseImpl( self, instring, loc, doActions=True ):
  3417. if self.expr.canParseNext(instring, loc):
  3418. raise ParseException(instring, loc, self.errmsg, self)
  3419. return loc, []
  3420. def __str__( self ):
  3421. if hasattr(self,"name"):
  3422. return self.name
  3423. if self.strRepr is None:
  3424. self.strRepr = "~{" + _ustr(self.expr) + "}"
  3425. return self.strRepr
  3426. class _MultipleMatch(ParseElementEnhance):
  3427. def __init__( self, expr, stopOn=None):
  3428. super(_MultipleMatch, self).__init__(expr)
  3429. self.saveAsList = True
  3430. ender = stopOn
  3431. if isinstance(ender, basestring):
  3432. ender = ParserElement._literalStringClass(ender)
  3433. self.not_ender = ~ender if ender is not None else None
  3434. def parseImpl( self, instring, loc, doActions=True ):
  3435. self_expr_parse = self.expr._parse
  3436. self_skip_ignorables = self._skipIgnorables
  3437. check_ender = self.not_ender is not None
  3438. if check_ender:
  3439. try_not_ender = self.not_ender.tryParse
  3440. # must be at least one (but first see if we are the stopOn sentinel;
  3441. # if so, fail)
  3442. if check_ender:
  3443. try_not_ender(instring, loc)
  3444. loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False )
  3445. try:
  3446. hasIgnoreExprs = (not not self.ignoreExprs)
  3447. while 1:
  3448. if check_ender:
  3449. try_not_ender(instring, loc)
  3450. if hasIgnoreExprs:
  3451. preloc = self_skip_ignorables( instring, loc )
  3452. else:
  3453. preloc = loc
  3454. loc, tmptokens = self_expr_parse( instring, preloc, doActions )
  3455. if tmptokens or tmptokens.haskeys():
  3456. tokens += tmptokens
  3457. except (ParseException,IndexError):
  3458. pass
  3459. return loc, tokens
  3460. class OneOrMore(_MultipleMatch):
  3461. """
  3462. Repetition of one or more of the given expression.
  3463. Parameters:
  3464. - expr - expression that must match one or more times
  3465. - stopOn - (default=C{None}) - expression for a terminating sentinel
  3466. (only required if the sentinel would ordinarily match the repetition
  3467. expression)
  3468. Example::
  3469. data_word = Word(alphas)
  3470. label = data_word + FollowedBy(':')
  3471. attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
  3472. text = "shape: SQUARE posn: upper left color: BLACK"
  3473. OneOrMore(attr_expr).parseString(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]
  3474. # use stopOn attribute for OneOrMore to avoid reading label string as part of the data
  3475. attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
  3476. OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]
  3477. # could also be written as
  3478. (attr_expr * (1,)).parseString(text).pprint()
  3479. """
  3480. def __str__( self ):
  3481. if hasattr(self,"name"):
  3482. return self.name
  3483. if self.strRepr is None:
  3484. self.strRepr = "{" + _ustr(self.expr) + "}..."
  3485. return self.strRepr
  3486. class ZeroOrMore(_MultipleMatch):
  3487. """
  3488. Optional repetition of zero or more of the given expression.
  3489. Parameters:
  3490. - expr - expression that must match zero or more times
  3491. - stopOn - (default=C{None}) - expression for a terminating sentinel
  3492. (only required if the sentinel would ordinarily match the repetition
  3493. expression)
  3494. Example: similar to L{OneOrMore}
  3495. """
  3496. def __init__( self, expr, stopOn=None):
  3497. super(ZeroOrMore,self).__init__(expr, stopOn=stopOn)
  3498. self.mayReturnEmpty = True
  3499. def parseImpl( self, instring, loc, doActions=True ):
  3500. try:
  3501. return super(ZeroOrMore, self).parseImpl(instring, loc, doActions)
  3502. except (ParseException,IndexError):
  3503. return loc, []
  3504. def __str__( self ):
  3505. if hasattr(self,"name"):
  3506. return self.name
  3507. if self.strRepr is None:
  3508. self.strRepr = "[" + _ustr(self.expr) + "]..."
  3509. return self.strRepr
  3510. class _NullToken(object):
  3511. def __bool__(self):
  3512. return False
  3513. __nonzero__ = __bool__
  3514. def __str__(self):
  3515. return ""
  3516. _optionalNotMatched = _NullToken()
  3517. class Optional(ParseElementEnhance):
  3518. """
  3519. Optional matching of the given expression.
  3520. Parameters:
  3521. - expr - expression that must match zero or more times
  3522. - default (optional) - value to be returned if the optional expression is not found.
  3523. Example::
  3524. # US postal code can be a 5-digit zip, plus optional 4-digit qualifier
  3525. zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4)))
  3526. zip.runTests('''
  3527. # traditional ZIP code
  3528. 12345
  3529. # ZIP+4 form
  3530. 12101-0001
  3531. # invalid ZIP
  3532. 98765-
  3533. ''')
  3534. prints::
  3535. # traditional ZIP code
  3536. 12345
  3537. ['12345']
  3538. # ZIP+4 form
  3539. 12101-0001
  3540. ['12101-0001']
  3541. # invalid ZIP
  3542. 98765-
  3543. ^
  3544. FAIL: Expected end of text (at char 5), (line:1, col:6)
  3545. """
  3546. def __init__( self, expr, default=_optionalNotMatched ):
  3547. super(Optional,self).__init__( expr, savelist=False )
  3548. self.saveAsList = self.expr.saveAsList
  3549. self.defaultValue = default
  3550. self.mayReturnEmpty = True
  3551. def parseImpl( self, instring, loc, doActions=True ):
  3552. try:
  3553. loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
  3554. except (ParseException,IndexError):
  3555. if self.defaultValue is not _optionalNotMatched:
  3556. if self.expr.resultsName:
  3557. tokens = ParseResults([ self.defaultValue ])
  3558. tokens[self.expr.resultsName] = self.defaultValue
  3559. else:
  3560. tokens = [ self.defaultValue ]
  3561. else:
  3562. tokens = []
  3563. return loc, tokens
  3564. def __str__( self ):
  3565. if hasattr(self,"name"):
  3566. return self.name
  3567. if self.strRepr is None:
  3568. self.strRepr = "[" + _ustr(self.expr) + "]"
  3569. return self.strRepr
  3570. class SkipTo(ParseElementEnhance):
  3571. """
  3572. Token for skipping over all undefined text until the matched expression is found.
  3573. Parameters:
  3574. - expr - target expression marking the end of the data to be skipped
  3575. - include - (default=C{False}) if True, the target expression is also parsed
  3576. (the skipped text and target expression are returned as a 2-element list).
  3577. - ignore - (default=C{None}) used to define grammars (typically quoted strings and
  3578. comments) that might contain false matches to the target expression
  3579. - failOn - (default=C{None}) define expressions that are not allowed to be
  3580. included in the skipped test; if found before the target expression is found,
  3581. the SkipTo is not a match
  3582. Example::
  3583. report = '''
  3584. Outstanding Issues Report - 1 Jan 2000
  3585. # | Severity | Description | Days Open
  3586. -----+----------+-------------------------------------------+-----------
  3587. 101 | Critical | Intermittent system crash | 6
  3588. 94 | Cosmetic | Spelling error on Login ('log|n') | 14
  3589. 79 | Minor | System slow when running too many reports | 47
  3590. '''
  3591. integer = Word(nums)
  3592. SEP = Suppress('|')
  3593. # use SkipTo to simply match everything up until the next SEP
  3594. # - ignore quoted strings, so that a '|' character inside a quoted string does not match
  3595. # - parse action will call token.strip() for each matched token, i.e., the description body
  3596. string_data = SkipTo(SEP, ignore=quotedString)
  3597. string_data.setParseAction(tokenMap(str.strip))
  3598. ticket_expr = (integer("issue_num") + SEP
  3599. + string_data("sev") + SEP
  3600. + string_data("desc") + SEP
  3601. + integer("days_open"))
  3602. for tkt in ticket_expr.searchString(report):
  3603. print tkt.dump()
  3604. prints::
  3605. ['101', 'Critical', 'Intermittent system crash', '6']
  3606. - days_open: 6
  3607. - desc: Intermittent system crash
  3608. - issue_num: 101
  3609. - sev: Critical
  3610. ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']
  3611. - days_open: 14
  3612. - desc: Spelling error on Login ('log|n')
  3613. - issue_num: 94
  3614. - sev: Cosmetic
  3615. ['79', 'Minor', 'System slow when running too many reports', '47']
  3616. - days_open: 47
  3617. - desc: System slow when running too many reports
  3618. - issue_num: 79
  3619. - sev: Minor
  3620. """
  3621. def __init__( self, other, include=False, ignore=None, failOn=None ):
  3622. super( SkipTo, self ).__init__( other )
  3623. self.ignoreExpr = ignore
  3624. self.mayReturnEmpty = True
  3625. self.mayIndexError = False
  3626. self.includeMatch = include
  3627. self.saveAsList = False
  3628. if isinstance(failOn, basestring):
  3629. self.failOn = ParserElement._literalStringClass(failOn)
  3630. else:
  3631. self.failOn = failOn
  3632. self.errmsg = "No match found for "+_ustr(self.expr)
  3633. def parseImpl( self, instring, loc, doActions=True ):
  3634. startloc = loc
  3635. instrlen = len(instring)
  3636. expr = self.expr
  3637. expr_parse = self.expr._parse
  3638. self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None
  3639. self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None
  3640. tmploc = loc
  3641. while tmploc <= instrlen:
  3642. if self_failOn_canParseNext is not None:
  3643. # break if failOn expression matches
  3644. if self_failOn_canParseNext(instring, tmploc):
  3645. break
  3646. if self_ignoreExpr_tryParse is not None:
  3647. # advance past ignore expressions
  3648. while 1:
  3649. try:
  3650. tmploc = self_ignoreExpr_tryParse(instring, tmploc)
  3651. except ParseBaseException:
  3652. break
  3653. try:
  3654. expr_parse(instring, tmploc, doActions=False, callPreParse=False)
  3655. except (ParseException, IndexError):
  3656. # no match, advance loc in string
  3657. tmploc += 1
  3658. else:
  3659. # matched skipto expr, done
  3660. break
  3661. else:
  3662. # ran off the end of the input string without matching skipto expr, fail
  3663. raise ParseException(instring, loc, self.errmsg, self)
  3664. # build up return values
  3665. loc = tmploc
  3666. skiptext = instring[startloc:loc]
  3667. skipresult = ParseResults(skiptext)
  3668. if self.includeMatch:
  3669. loc, mat = expr_parse(instring,loc,doActions,callPreParse=False)
  3670. skipresult += mat
  3671. return loc, skipresult
  3672. class Forward(ParseElementEnhance):
  3673. """
  3674. Forward declaration of an expression to be defined later -
  3675. used for recursive grammars, such as algebraic infix notation.
  3676. When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator.
  3677. Note: take care when assigning to C{Forward} not to overlook precedence of operators.
  3678. Specifically, '|' has a lower precedence than '<<', so that::
  3679. fwdExpr << a | b | c
  3680. will actually be evaluated as::
  3681. (fwdExpr << a) | b | c
  3682. thereby leaving b and c out as parseable alternatives. It is recommended that you
  3683. explicitly group the values inserted into the C{Forward}::
  3684. fwdExpr << (a | b | c)
  3685. Converting to use the '<<=' operator instead will avoid this problem.
  3686. See L{ParseResults.pprint} for an example of a recursive parser created using
  3687. C{Forward}.
  3688. """
  3689. def __init__( self, other=None ):
  3690. super(Forward,self).__init__( other, savelist=False )
  3691. def __lshift__( self, other ):
  3692. if isinstance( other, basestring ):
  3693. other = ParserElement._literalStringClass(other)
  3694. self.expr = other
  3695. self.strRepr = None
  3696. self.mayIndexError = self.expr.mayIndexError
  3697. self.mayReturnEmpty = self.expr.mayReturnEmpty
  3698. self.setWhitespaceChars( self.expr.whiteChars )
  3699. self.skipWhitespace = self.expr.skipWhitespace
  3700. self.saveAsList = self.expr.saveAsList
  3701. self.ignoreExprs.extend(self.expr.ignoreExprs)
  3702. return self
  3703. def __ilshift__(self, other):
  3704. return self << other
  3705. def leaveWhitespace( self ):
  3706. self.skipWhitespace = False
  3707. return self
  3708. def streamline( self ):
  3709. if not self.streamlined:
  3710. self.streamlined = True
  3711. if self.expr is not None:
  3712. self.expr.streamline()
  3713. return self
  3714. def validate( self, validateTrace=[] ):
  3715. if self not in validateTrace:
  3716. tmp = validateTrace[:]+[self]
  3717. if self.expr is not None:
  3718. self.expr.validate(tmp)
  3719. self.checkRecursion([])
  3720. def __str__( self ):
  3721. if hasattr(self,"name"):
  3722. return self.name
  3723. return self.__class__.__name__ + ": ..."
  3724. # stubbed out for now - creates awful memory and perf issues
  3725. self._revertClass = self.__class__
  3726. self.__class__ = _ForwardNoRecurse
  3727. try:
  3728. if self.expr is not None:
  3729. retString = _ustr(self.expr)
  3730. else:
  3731. retString = "None"
  3732. finally:
  3733. self.__class__ = self._revertClass
  3734. return self.__class__.__name__ + ": " + retString
  3735. def copy(self):
  3736. if self.expr is not None:
  3737. return super(Forward,self).copy()
  3738. else:
  3739. ret = Forward()
  3740. ret <<= self
  3741. return ret
  3742. class _ForwardNoRecurse(Forward):
  3743. def __str__( self ):
  3744. return "..."
  3745. class TokenConverter(ParseElementEnhance):
  3746. """
  3747. Abstract subclass of C{ParseExpression}, for converting parsed results.
  3748. """
  3749. def __init__( self, expr, savelist=False ):
  3750. super(TokenConverter,self).__init__( expr )#, savelist )
  3751. self.saveAsList = False
  3752. class Combine(TokenConverter):
  3753. """
  3754. Converter to concatenate all matching tokens to a single string.
  3755. By default, the matching patterns must also be contiguous in the input string;
  3756. this can be disabled by specifying C{'adjacent=False'} in the constructor.
  3757. Example::
  3758. real = Word(nums) + '.' + Word(nums)
  3759. print(real.parseString('3.1416')) # -> ['3', '.', '1416']
  3760. # will also erroneously match the following
  3761. print(real.parseString('3. 1416')) # -> ['3', '.', '1416']
  3762. real = Combine(Word(nums) + '.' + Word(nums))
  3763. print(real.parseString('3.1416')) # -> ['3.1416']
  3764. # no match when there are internal spaces
  3765. print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...)
  3766. """
  3767. def __init__( self, expr, joinString="", adjacent=True ):
  3768. super(Combine,self).__init__( expr )
  3769. # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
  3770. if adjacent:
  3771. self.leaveWhitespace()
  3772. self.adjacent = adjacent
  3773. self.skipWhitespace = True
  3774. self.joinString = joinString
  3775. self.callPreparse = True
  3776. def ignore( self, other ):
  3777. if self.adjacent:
  3778. ParserElement.ignore(self, other)
  3779. else:
  3780. super( Combine, self).ignore( other )
  3781. return self
  3782. def postParse( self, instring, loc, tokenlist ):
  3783. retToks = tokenlist.copy()
  3784. del retToks[:]
  3785. retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
  3786. if self.resultsName and retToks.haskeys():
  3787. return [ retToks ]
  3788. else:
  3789. return retToks
  3790. class Group(TokenConverter):
  3791. """
  3792. Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions.
  3793. Example::
  3794. ident = Word(alphas)
  3795. num = Word(nums)
  3796. term = ident | num
  3797. func = ident + Optional(delimitedList(term))
  3798. print(func.parseString("fn a,b,100")) # -> ['fn', 'a', 'b', '100']
  3799. func = ident + Group(Optional(delimitedList(term)))
  3800. print(func.parseString("fn a,b,100")) # -> ['fn', ['a', 'b', '100']]
  3801. """
  3802. def __init__( self, expr ):
  3803. super(Group,self).__init__( expr )
  3804. self.saveAsList = True
  3805. def postParse( self, instring, loc, tokenlist ):
  3806. return [ tokenlist ]
  3807. class Dict(TokenConverter):
  3808. """
  3809. Converter to return a repetitive expression as a list, but also as a dictionary.
  3810. Each element can also be referenced using the first token in the expression as its key.
  3811. Useful for tabular report scraping when the first column can be used as a item key.
  3812. Example::
  3813. data_word = Word(alphas)
  3814. label = data_word + FollowedBy(':')
  3815. attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
  3816. text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
  3817. attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
  3818. # print attributes as plain groups
  3819. print(OneOrMore(attr_expr).parseString(text).dump())
  3820. # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names
  3821. result = Dict(OneOrMore(Group(attr_expr))).parseString(text)
  3822. print(result.dump())
  3823. # access named fields as dict entries, or output as dict
  3824. print(result['shape'])
  3825. print(result.asDict())
  3826. prints::
  3827. ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']
  3828. [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
  3829. - color: light blue
  3830. - posn: upper left
  3831. - shape: SQUARE
  3832. - texture: burlap
  3833. SQUARE
  3834. {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}
  3835. See more examples at L{ParseResults} of accessing fields by results name.
  3836. """
  3837. def __init__( self, expr ):
  3838. super(Dict,self).__init__( expr )
  3839. self.saveAsList = True
  3840. def postParse( self, instring, loc, tokenlist ):
  3841. for i,tok in enumerate(tokenlist):
  3842. if len(tok) == 0:
  3843. continue
  3844. ikey = tok[0]
  3845. if isinstance(ikey,int):
  3846. ikey = _ustr(tok[0]).strip()
  3847. if len(tok)==1:
  3848. tokenlist[ikey] = _ParseResultsWithOffset("",i)
  3849. elif len(tok)==2 and not isinstance(tok[1],ParseResults):
  3850. tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
  3851. else:
  3852. dictvalue = tok.copy() #ParseResults(i)
  3853. del dictvalue[0]
  3854. if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()):
  3855. tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
  3856. else:
  3857. tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
  3858. if self.resultsName:
  3859. return [ tokenlist ]
  3860. else:
  3861. return tokenlist
  3862. class Suppress(TokenConverter):
  3863. """
  3864. Converter for ignoring the results of a parsed expression.
  3865. Example::
  3866. source = "a, b, c,d"
  3867. wd = Word(alphas)
  3868. wd_list1 = wd + ZeroOrMore(',' + wd)
  3869. print(wd_list1.parseString(source))
  3870. # often, delimiters that are useful during parsing are just in the
  3871. # way afterward - use Suppress to keep them out of the parsed output
  3872. wd_list2 = wd + ZeroOrMore(Suppress(',') + wd)
  3873. print(wd_list2.parseString(source))
  3874. prints::
  3875. ['a', ',', 'b', ',', 'c', ',', 'd']
  3876. ['a', 'b', 'c', 'd']
  3877. (See also L{delimitedList}.)
  3878. """
  3879. def postParse( self, instring, loc, tokenlist ):
  3880. return []
  3881. def suppress( self ):
  3882. return self
  3883. class OnlyOnce(object):
  3884. """
  3885. Wrapper for parse actions, to ensure they are only called once.
  3886. """
  3887. def __init__(self, methodCall):
  3888. self.callable = _trim_arity(methodCall)
  3889. self.called = False
  3890. def __call__(self,s,l,t):
  3891. if not self.called:
  3892. results = self.callable(s,l,t)
  3893. self.called = True
  3894. return results
  3895. raise ParseException(s,l,"")
  3896. def reset(self):
  3897. self.called = False
  3898. def traceParseAction(f):
  3899. """
  3900. Decorator for debugging parse actions.
  3901. When the parse action is called, this decorator will print C{">> entering I{method-name}(line:I{current_source_line}, I{parse_location}, I{matched_tokens})".}
  3902. When the parse action completes, the decorator will print C{"<<"} followed by the returned value, or any exception that the parse action raised.
  3903. Example::
  3904. wd = Word(alphas)
  3905. @traceParseAction
  3906. def remove_duplicate_chars(tokens):
  3907. return ''.join(sorted(set(''.join(tokens))))
  3908. wds = OneOrMore(wd).setParseAction(remove_duplicate_chars)
  3909. print(wds.parseString("slkdjs sld sldd sdlf sdljf"))
  3910. prints::
  3911. >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))
  3912. <<leaving remove_duplicate_chars (ret: 'dfjkls')
  3913. ['dfjkls']
  3914. """
  3915. f = _trim_arity(f)
  3916. def z(*paArgs):
  3917. thisFunc = f.__name__
  3918. s,l,t = paArgs[-3:]
  3919. if len(paArgs)>3:
  3920. thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
  3921. sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) )
  3922. try:
  3923. ret = f(*paArgs)
  3924. except Exception as exc:
  3925. sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
  3926. raise
  3927. sys.stderr.write( "<<leaving %s (ret: %r)\n" % (thisFunc,ret) )
  3928. return ret
  3929. try:
  3930. z.__name__ = f.__name__
  3931. except AttributeError:
  3932. pass
  3933. return z
  3934. #
  3935. # global helpers
  3936. #
  3937. def delimitedList( expr, delim=",", combine=False ):
  3938. """
  3939. Helper to define a delimited list of expressions - the delimiter defaults to ','.
  3940. By default, the list elements and delimiters can have intervening whitespace, and
  3941. comments, but this can be overridden by passing C{combine=True} in the constructor.
  3942. If C{combine} is set to C{True}, the matching tokens are returned as a single token
  3943. string, with the delimiters included; otherwise, the matching tokens are returned
  3944. as a list of tokens, with the delimiters suppressed.
  3945. Example::
  3946. delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc']
  3947. delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']
  3948. """
  3949. dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
  3950. if combine:
  3951. return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
  3952. else:
  3953. return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
  3954. def countedArray( expr, intExpr=None ):
  3955. """
  3956. Helper to define a counted list of expressions.
  3957. This helper defines a pattern of the form::
  3958. integer expr expr expr...
  3959. where the leading integer tells how many expr expressions follow.
  3960. The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
  3961. If C{intExpr} is specified, it should be a pyparsing expression that produces an integer value.
  3962. Example::
  3963. countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd']
  3964. # in this parser, the leading integer value is given in binary,
  3965. # '10' indicating that 2 values are in the array
  3966. binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2))
  3967. countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef') # -> ['ab', 'cd']
  3968. """
  3969. arrayExpr = Forward()
  3970. def countFieldParseAction(s,l,t):
  3971. n = t[0]
  3972. arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
  3973. return []
  3974. if intExpr is None:
  3975. intExpr = Word(nums).setParseAction(lambda t:int(t[0]))
  3976. else:
  3977. intExpr = intExpr.copy()
  3978. intExpr.setName("arrayLen")
  3979. intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
  3980. return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...')
  3981. def _flatten(L):
  3982. ret = []
  3983. for i in L:
  3984. if isinstance(i,list):
  3985. ret.extend(_flatten(i))
  3986. else:
  3987. ret.append(i)
  3988. return ret
  3989. def matchPreviousLiteral(expr):
  3990. """
  3991. Helper to define an expression that is indirectly defined from
  3992. the tokens matched in a previous expression, that is, it looks
  3993. for a 'repeat' of a previous expression. For example::
  3994. first = Word(nums)
  3995. second = matchPreviousLiteral(first)
  3996. matchExpr = first + ":" + second
  3997. will match C{"1:1"}, but not C{"1:2"}. Because this matches a
  3998. previous literal, will also match the leading C{"1:1"} in C{"1:10"}.
  3999. If this is not desired, use C{matchPreviousExpr}.
  4000. Do I{not} use with packrat parsing enabled.
  4001. """
  4002. rep = Forward()
  4003. def copyTokenToRepeater(s,l,t):
  4004. if t:
  4005. if len(t) == 1:
  4006. rep << t[0]
  4007. else:
  4008. # flatten t tokens
  4009. tflat = _flatten(t.asList())
  4010. rep << And(Literal(tt) for tt in tflat)
  4011. else:
  4012. rep << Empty()
  4013. expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
  4014. rep.setName('(prev) ' + _ustr(expr))
  4015. return rep
  4016. def matchPreviousExpr(expr):
  4017. """
  4018. Helper to define an expression that is indirectly defined from
  4019. the tokens matched in a previous expression, that is, it looks
  4020. for a 'repeat' of a previous expression. For example::
  4021. first = Word(nums)
  4022. second = matchPreviousExpr(first)
  4023. matchExpr = first + ":" + second
  4024. will match C{"1:1"}, but not C{"1:2"}. Because this matches by
  4025. expressions, will I{not} match the leading C{"1:1"} in C{"1:10"};
  4026. the expressions are evaluated first, and then compared, so
  4027. C{"1"} is compared with C{"10"}.
  4028. Do I{not} use with packrat parsing enabled.
  4029. """
  4030. rep = Forward()
  4031. e2 = expr.copy()
  4032. rep <<= e2
  4033. def copyTokenToRepeater(s,l,t):
  4034. matchTokens = _flatten(t.asList())
  4035. def mustMatchTheseTokens(s,l,t):
  4036. theseTokens = _flatten(t.asList())
  4037. if theseTokens != matchTokens:
  4038. raise ParseException("",0,"")
  4039. rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
  4040. expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
  4041. rep.setName('(prev) ' + _ustr(expr))
  4042. return rep
  4043. def _escapeRegexRangeChars(s):
  4044. #~ escape these chars: ^-]
  4045. for c in r"\^-]":
  4046. s = s.replace(c,_bslash+c)
  4047. s = s.replace("\n",r"\n")
  4048. s = s.replace("\t",r"\t")
  4049. return _ustr(s)
  4050. def oneOf( strs, caseless=False, useRegex=True ):
  4051. """
  4052. Helper to quickly define a set of alternative Literals, and makes sure to do
  4053. longest-first testing when there is a conflict, regardless of the input order,
  4054. but returns a C{L{MatchFirst}} for best performance.
  4055. Parameters:
  4056. - strs - a string of space-delimited literals, or a collection of string literals
  4057. - caseless - (default=C{False}) - treat all literals as caseless
  4058. - useRegex - (default=C{True}) - as an optimization, will generate a Regex
  4059. object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or
  4060. if creating a C{Regex} raises an exception)
  4061. Example::
  4062. comp_oper = oneOf("< = > <= >= !=")
  4063. var = Word(alphas)
  4064. number = Word(nums)
  4065. term = var | number
  4066. comparison_expr = term + comp_oper + term
  4067. print(comparison_expr.searchString("B = 12 AA=23 B<=AA AA>12"))
  4068. prints::
  4069. [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
  4070. """
  4071. if caseless:
  4072. isequal = ( lambda a,b: a.upper() == b.upper() )
  4073. masks = ( lambda a,b: b.upper().startswith(a.upper()) )
  4074. parseElementClass = CaselessLiteral
  4075. else:
  4076. isequal = ( lambda a,b: a == b )
  4077. masks = ( lambda a,b: b.startswith(a) )
  4078. parseElementClass = Literal
  4079. symbols = []
  4080. if isinstance(strs,basestring):
  4081. symbols = strs.split()
  4082. elif isinstance(strs, Iterable):
  4083. symbols = list(strs)
  4084. else:
  4085. warnings.warn("Invalid argument to oneOf, expected string or iterable",
  4086. SyntaxWarning, stacklevel=2)
  4087. if not symbols:
  4088. return NoMatch()
  4089. i = 0
  4090. while i < len(symbols)-1:
  4091. cur = symbols[i]
  4092. for j,other in enumerate(symbols[i+1:]):
  4093. if ( isequal(other, cur) ):
  4094. del symbols[i+j+1]
  4095. break
  4096. elif ( masks(cur, other) ):
  4097. del symbols[i+j+1]
  4098. symbols.insert(i,other)
  4099. cur = other
  4100. break
  4101. else:
  4102. i += 1
  4103. if not caseless and useRegex:
  4104. #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] ))
  4105. try:
  4106. if len(symbols)==len("".join(symbols)):
  4107. return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols))
  4108. else:
  4109. return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols))
  4110. except Exception:
  4111. warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
  4112. SyntaxWarning, stacklevel=2)
  4113. # last resort, just use MatchFirst
  4114. return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
  4115. def dictOf( key, value ):
  4116. """
  4117. Helper to easily and clearly define a dictionary by specifying the respective patterns
  4118. for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens
  4119. in the proper order. The key pattern can include delimiting markers or punctuation,
  4120. as long as they are suppressed, thereby leaving the significant key text. The value
  4121. pattern can include named results, so that the C{Dict} results can include named token
  4122. fields.
  4123. Example::
  4124. text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
  4125. attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
  4126. print(OneOrMore(attr_expr).parseString(text).dump())
  4127. attr_label = label
  4128. attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)
  4129. # similar to Dict, but simpler call format
  4130. result = dictOf(attr_label, attr_value).parseString(text)
  4131. print(result.dump())
  4132. print(result['shape'])
  4133. print(result.shape) # object attribute access works too
  4134. print(result.asDict())
  4135. prints::
  4136. [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
  4137. - color: light blue
  4138. - posn: upper left
  4139. - shape: SQUARE
  4140. - texture: burlap
  4141. SQUARE
  4142. SQUARE
  4143. {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}
  4144. """
  4145. return Dict( ZeroOrMore( Group ( key + value ) ) )
  4146. def originalTextFor(expr, asString=True):
  4147. """
  4148. Helper to return the original, untokenized text for a given expression. Useful to
  4149. restore the parsed fields of an HTML start tag into the raw tag text itself, or to
  4150. revert separate tokens with intervening whitespace back to the original matching
  4151. input text. By default, returns astring containing the original parsed text.
  4152. If the optional C{asString} argument is passed as C{False}, then the return value is a
  4153. C{L{ParseResults}} containing any results names that were originally matched, and a
  4154. single token containing the original matched text from the input string. So if
  4155. the expression passed to C{L{originalTextFor}} contains expressions with defined
  4156. results names, you must set C{asString} to C{False} if you want to preserve those
  4157. results name values.
  4158. Example::
  4159. src = "this is test <b> bold <i>text</i> </b> normal text "
  4160. for tag in ("b","i"):
  4161. opener,closer = makeHTMLTags(tag)
  4162. patt = originalTextFor(opener + SkipTo(closer) + closer)
  4163. print(patt.searchString(src)[0])
  4164. prints::
  4165. ['<b> bold <i>text</i> </b>']
  4166. ['<i>text</i>']
  4167. """
  4168. locMarker = Empty().setParseAction(lambda s,loc,t: loc)
  4169. endlocMarker = locMarker.copy()
  4170. endlocMarker.callPreparse = False
  4171. matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
  4172. if asString:
  4173. extractText = lambda s,l,t: s[t._original_start:t._original_end]
  4174. else:
  4175. def extractText(s,l,t):
  4176. t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
  4177. matchExpr.setParseAction(extractText)
  4178. matchExpr.ignoreExprs = expr.ignoreExprs
  4179. return matchExpr
  4180. def ungroup(expr):
  4181. """
  4182. Helper to undo pyparsing's default grouping of And expressions, even
  4183. if all but one are non-empty.
  4184. """
  4185. return TokenConverter(expr).setParseAction(lambda t:t[0])
  4186. def locatedExpr(expr):
  4187. """
  4188. Helper to decorate a returned token with its starting and ending locations in the input string.
  4189. This helper adds the following results names:
  4190. - locn_start = location where matched expression begins
  4191. - locn_end = location where matched expression ends
  4192. - value = the actual parsed results
  4193. Be careful if the input text contains C{<TAB>} characters, you may want to call
  4194. C{L{ParserElement.parseWithTabs}}
  4195. Example::
  4196. wd = Word(alphas)
  4197. for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"):
  4198. print(match)
  4199. prints::
  4200. [[0, 'ljsdf', 5]]
  4201. [[8, 'lksdjjf', 15]]
  4202. [[18, 'lkkjj', 23]]
  4203. """
  4204. locator = Empty().setParseAction(lambda s,l,t: l)
  4205. return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
  4206. # convenience constants for positional expressions
  4207. empty = Empty().setName("empty")
  4208. lineStart = LineStart().setName("lineStart")
  4209. lineEnd = LineEnd().setName("lineEnd")
  4210. stringStart = StringStart().setName("stringStart")
  4211. stringEnd = StringEnd().setName("stringEnd")
  4212. _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
  4213. _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16)))
  4214. _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))
  4215. _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r'\]', exact=1)
  4216. _charRange = Group(_singleChar + Suppress("-") + _singleChar)
  4217. _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
  4218. def srange(s):
  4219. r"""
  4220. Helper to easily define string ranges for use in Word construction. Borrows
  4221. syntax from regexp '[]' string range definitions::
  4222. srange("[0-9]") -> "0123456789"
  4223. srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
  4224. srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
  4225. The input string must be enclosed in []'s, and the returned string is the expanded
  4226. character set joined into a single string.
  4227. The values enclosed in the []'s may be:
  4228. - a single character
  4229. - an escaped character with a leading backslash (such as C{\-} or C{\]})
  4230. - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character)
  4231. (C{\0x##} is also supported for backwards compatibility)
  4232. - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character)
  4233. - a range of any of the above, separated by a dash (C{'a-z'}, etc.)
  4234. - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.)
  4235. """
  4236. _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1))
  4237. try:
  4238. return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body)
  4239. except Exception:
  4240. return ""
  4241. def matchOnlyAtCol(n):
  4242. """
  4243. Helper method for defining parse actions that require matching at a specific
  4244. column in the input text.
  4245. """
  4246. def verifyCol(strg,locn,toks):
  4247. if col(locn,strg) != n:
  4248. raise ParseException(strg,locn,"matched token not at column %d" % n)
  4249. return verifyCol
  4250. def replaceWith(replStr):
  4251. """
  4252. Helper method for common parse actions that simply return a literal value. Especially
  4253. useful when used with C{L{transformString<ParserElement.transformString>}()}.
  4254. Example::
  4255. num = Word(nums).setParseAction(lambda toks: int(toks[0]))
  4256. na = oneOf("N/A NA").setParseAction(replaceWith(math.nan))
  4257. term = na | num
  4258. OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234]
  4259. """
  4260. return lambda s,l,t: [replStr]
  4261. def removeQuotes(s,l,t):
  4262. """
  4263. Helper parse action for removing quotation marks from parsed quoted strings.
  4264. Example::
  4265. # by default, quotation marks are included in parsed results
  4266. quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"]
  4267. # use removeQuotes to strip quotation marks from parsed results
  4268. quotedString.setParseAction(removeQuotes)
  4269. quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"]
  4270. """
  4271. return t[0][1:-1]
  4272. def tokenMap(func, *args):
  4273. """
  4274. Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional
  4275. args are passed, they are forwarded to the given function as additional arguments after
  4276. the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the
  4277. parsed data to an integer using base 16.
  4278. Example (compare the last to example in L{ParserElement.transformString}::
  4279. hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16))
  4280. hex_ints.runTests('''
  4281. 00 11 22 aa FF 0a 0d 1a
  4282. ''')
  4283. upperword = Word(alphas).setParseAction(tokenMap(str.upper))
  4284. OneOrMore(upperword).runTests('''
  4285. my kingdom for a horse
  4286. ''')
  4287. wd = Word(alphas).setParseAction(tokenMap(str.title))
  4288. OneOrMore(wd).setParseAction(' '.join).runTests('''
  4289. now is the winter of our discontent made glorious summer by this sun of york
  4290. ''')
  4291. prints::
  4292. 00 11 22 aa FF 0a 0d 1a
  4293. [0, 17, 34, 170, 255, 10, 13, 26]
  4294. my kingdom for a horse
  4295. ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']
  4296. now is the winter of our discontent made glorious summer by this sun of york
  4297. ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']
  4298. """
  4299. def pa(s,l,t):
  4300. return [func(tokn, *args) for tokn in t]
  4301. try:
  4302. func_name = getattr(func, '__name__',
  4303. getattr(func, '__class__').__name__)
  4304. except Exception:
  4305. func_name = str(func)
  4306. pa.__name__ = func_name
  4307. return pa
  4308. upcaseTokens = tokenMap(lambda t: _ustr(t).upper())
  4309. """(Deprecated) Helper parse action to convert tokens to upper case. Deprecated in favor of L{pyparsing_common.upcaseTokens}"""
  4310. downcaseTokens = tokenMap(lambda t: _ustr(t).lower())
  4311. """(Deprecated) Helper parse action to convert tokens to lower case. Deprecated in favor of L{pyparsing_common.downcaseTokens}"""
  4312. def _makeTags(tagStr, xml):
  4313. """Internal helper to construct opening and closing tag expressions, given a tag name"""
  4314. if isinstance(tagStr,basestring):
  4315. resname = tagStr
  4316. tagStr = Keyword(tagStr, caseless=not xml)
  4317. else:
  4318. resname = tagStr.name
  4319. tagAttrName = Word(alphas,alphanums+"_-:")
  4320. if (xml):
  4321. tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
  4322. openTag = Suppress("<") + tagStr("tag") + \
  4323. Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \
  4324. Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
  4325. else:
  4326. printablesLessRAbrack = "".join(c for c in printables if c not in ">")
  4327. tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
  4328. openTag = Suppress("<") + tagStr("tag") + \
  4329. Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
  4330. Optional( Suppress("=") + tagAttrValue ) ))) + \
  4331. Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
  4332. closeTag = Combine(_L("</") + tagStr + ">")
  4333. openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % resname)
  4334. closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % resname)
  4335. openTag.tag = resname
  4336. closeTag.tag = resname
  4337. return openTag, closeTag
  4338. def makeHTMLTags(tagStr):
  4339. """
  4340. Helper to construct opening and closing tag expressions for HTML, given a tag name. Matches
  4341. tags in either upper or lower case, attributes with namespaces and with quoted or unquoted values.
  4342. Example::
  4343. text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>'
  4344. # makeHTMLTags returns pyparsing expressions for the opening and closing tags as a 2-tuple
  4345. a,a_end = makeHTMLTags("A")
  4346. link_expr = a + SkipTo(a_end)("link_text") + a_end
  4347. for link in link_expr.searchString(text):
  4348. # attributes in the <A> tag (like "href" shown here) are also accessible as named results
  4349. print(link.link_text, '->', link.href)
  4350. prints::
  4351. pyparsing -> http://pyparsing.wikispaces.com
  4352. """
  4353. return _makeTags( tagStr, False )
  4354. def makeXMLTags(tagStr):
  4355. """
  4356. Helper to construct opening and closing tag expressions for XML, given a tag name. Matches
  4357. tags only in the given upper/lower case.
  4358. Example: similar to L{makeHTMLTags}
  4359. """
  4360. return _makeTags( tagStr, True )
  4361. def withAttribute(*args,**attrDict):
  4362. """
  4363. Helper to create a validating parse action to be used with start tags created
  4364. with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag
  4365. with a required attribute value, to avoid false matches on common tags such as
  4366. C{<TD>} or C{<DIV>}.
  4367. Call C{withAttribute} with a series of attribute names and values. Specify the list
  4368. of filter attributes names and values as:
  4369. - keyword arguments, as in C{(align="right")}, or
  4370. - as an explicit dict with C{**} operator, when an attribute name is also a Python
  4371. reserved word, as in C{**{"class":"Customer", "align":"right"}}
  4372. - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
  4373. For attribute names with a namespace prefix, you must use the second form. Attribute
  4374. names are matched insensitive to upper/lower case.
  4375. If just testing for C{class} (with or without a namespace), use C{L{withClass}}.
  4376. To verify that the attribute exists, but without specifying a value, pass
  4377. C{withAttribute.ANY_VALUE} as the value.
  4378. Example::
  4379. html = '''
  4380. <div>
  4381. Some text
  4382. <div type="grid">1 4 0 1 0</div>
  4383. <div type="graph">1,3 2,3 1,1</div>
  4384. <div>this has no type</div>
  4385. </div>
  4386. '''
  4387. div,div_end = makeHTMLTags("div")
  4388. # only match div tag having a type attribute with value "grid"
  4389. div_grid = div().setParseAction(withAttribute(type="grid"))
  4390. grid_expr = div_grid + SkipTo(div | div_end)("body")
  4391. for grid_header in grid_expr.searchString(html):
  4392. print(grid_header.body)
  4393. # construct a match with any div tag having a type attribute, regardless of the value
  4394. div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE))
  4395. div_expr = div_any_type + SkipTo(div | div_end)("body")
  4396. for div_header in div_expr.searchString(html):
  4397. print(div_header.body)
  4398. prints::
  4399. 1 4 0 1 0
  4400. 1 4 0 1 0
  4401. 1,3 2,3 1,1
  4402. """
  4403. if args:
  4404. attrs = args[:]
  4405. else:
  4406. attrs = attrDict.items()
  4407. attrs = [(k,v) for k,v in attrs]
  4408. def pa(s,l,tokens):
  4409. for attrName,attrValue in attrs:
  4410. if attrName not in tokens:
  4411. raise ParseException(s,l,"no matching attribute " + attrName)
  4412. if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
  4413. raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
  4414. (attrName, tokens[attrName], attrValue))
  4415. return pa
  4416. withAttribute.ANY_VALUE = object()
  4417. def withClass(classname, namespace=''):
  4418. """
  4419. Simplified version of C{L{withAttribute}} when matching on a div class - made
  4420. difficult because C{class} is a reserved word in Python.
  4421. Example::
  4422. html = '''
  4423. <div>
  4424. Some text
  4425. <div class="grid">1 4 0 1 0</div>
  4426. <div class="graph">1,3 2,3 1,1</div>
  4427. <div>this &lt;div&gt; has no class</div>
  4428. </div>
  4429. '''
  4430. div,div_end = makeHTMLTags("div")
  4431. div_grid = div().setParseAction(withClass("grid"))
  4432. grid_expr = div_grid + SkipTo(div | div_end)("body")
  4433. for grid_header in grid_expr.searchString(html):
  4434. print(grid_header.body)
  4435. div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE))
  4436. div_expr = div_any_type + SkipTo(div | div_end)("body")
  4437. for div_header in div_expr.searchString(html):
  4438. print(div_header.body)
  4439. prints::
  4440. 1 4 0 1 0
  4441. 1 4 0 1 0
  4442. 1,3 2,3 1,1
  4443. """
  4444. classattr = "%s:class" % namespace if namespace else "class"
  4445. return withAttribute(**{classattr : classname})
  4446. opAssoc = SimpleNamespace()
  4447. opAssoc.LEFT = object()
  4448. opAssoc.RIGHT = object()
  4449. def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):
  4450. """
  4451. Helper method for constructing grammars of expressions made up of
  4452. operators working in a precedence hierarchy. Operators may be unary or
  4453. binary, left- or right-associative. Parse actions can also be attached
  4454. to operator expressions. The generated parser will also recognize the use
  4455. of parentheses to override operator precedences (see example below).
  4456. Note: if you define a deep operator list, you may see performance issues
  4457. when using infixNotation. See L{ParserElement.enablePackrat} for a
  4458. mechanism to potentially improve your parser performance.
  4459. Parameters:
  4460. - baseExpr - expression representing the most basic element for the nested
  4461. - opList - list of tuples, one for each operator precedence level in the
  4462. expression grammar; each tuple is of the form
  4463. (opExpr, numTerms, rightLeftAssoc, parseAction), where:
  4464. - opExpr is the pyparsing expression for the operator;
  4465. may also be a string, which will be converted to a Literal;
  4466. if numTerms is 3, opExpr is a tuple of two expressions, for the
  4467. two operators separating the 3 terms
  4468. - numTerms is the number of terms for this operator (must
  4469. be 1, 2, or 3)
  4470. - rightLeftAssoc is the indicator whether the operator is
  4471. right or left associative, using the pyparsing-defined
  4472. constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}.
  4473. - parseAction is the parse action to be associated with
  4474. expressions matching this operator expression (the
  4475. parse action tuple member may be omitted); if the parse action
  4476. is passed a tuple or list of functions, this is equivalent to
  4477. calling C{setParseAction(*fn)} (L{ParserElement.setParseAction})
  4478. - lpar - expression for matching left-parentheses (default=C{Suppress('(')})
  4479. - rpar - expression for matching right-parentheses (default=C{Suppress(')')})
  4480. Example::
  4481. # simple example of four-function arithmetic with ints and variable names
  4482. integer = pyparsing_common.signed_integer
  4483. varname = pyparsing_common.identifier
  4484. arith_expr = infixNotation(integer | varname,
  4485. [
  4486. ('-', 1, opAssoc.RIGHT),
  4487. (oneOf('* /'), 2, opAssoc.LEFT),
  4488. (oneOf('+ -'), 2, opAssoc.LEFT),
  4489. ])
  4490. arith_expr.runTests('''
  4491. 5+3*6
  4492. (5+3)*6
  4493. -2--11
  4494. ''', fullDump=False)
  4495. prints::
  4496. 5+3*6
  4497. [[5, '+', [3, '*', 6]]]
  4498. (5+3)*6
  4499. [[[5, '+', 3], '*', 6]]
  4500. -2--11
  4501. [[['-', 2], '-', ['-', 11]]]
  4502. """
  4503. # captive version of FollowedBy that does not do parse actions or capture results names
  4504. class _FB(FollowedBy):
  4505. def parseImpl(self, instring, loc, doActions=True):
  4506. self.expr.tryParse(instring, loc)
  4507. return loc, []
  4508. ret = Forward()
  4509. lastExpr = baseExpr | ( lpar + ret + rpar )
  4510. for i,operDef in enumerate(opList):
  4511. opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
  4512. termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr
  4513. if arity == 3:
  4514. if opExpr is None or len(opExpr) != 2:
  4515. raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
  4516. opExpr1, opExpr2 = opExpr
  4517. thisExpr = Forward().setName(termName)
  4518. if rightLeftAssoc == opAssoc.LEFT:
  4519. if arity == 1:
  4520. matchExpr = _FB(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
  4521. elif arity == 2:
  4522. if opExpr is not None:
  4523. matchExpr = _FB(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
  4524. else:
  4525. matchExpr = _FB(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
  4526. elif arity == 3:
  4527. matchExpr = _FB(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
  4528. Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
  4529. else:
  4530. raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
  4531. elif rightLeftAssoc == opAssoc.RIGHT:
  4532. if arity == 1:
  4533. # try to avoid LR with this extra test
  4534. if not isinstance(opExpr, Optional):
  4535. opExpr = Optional(opExpr)
  4536. matchExpr = _FB(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
  4537. elif arity == 2:
  4538. if opExpr is not None:
  4539. matchExpr = _FB(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
  4540. else:
  4541. matchExpr = _FB(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
  4542. elif arity == 3:
  4543. matchExpr = _FB(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
  4544. Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
  4545. else:
  4546. raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
  4547. else:
  4548. raise ValueError("operator must indicate right or left associativity")
  4549. if pa:
  4550. if isinstance(pa, (tuple, list)):
  4551. matchExpr.setParseAction(*pa)
  4552. else:
  4553. matchExpr.setParseAction(pa)
  4554. thisExpr <<= ( matchExpr.setName(termName) | lastExpr )
  4555. lastExpr = thisExpr
  4556. ret <<= lastExpr
  4557. return ret
  4558. operatorPrecedence = infixNotation
  4559. """(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release."""
  4560. dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes")
  4561. sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes")
  4562. quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'|
  4563. Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes")
  4564. unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
  4565. def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
  4566. """
  4567. Helper method for defining nested lists enclosed in opening and closing
  4568. delimiters ("(" and ")" are the default).
  4569. Parameters:
  4570. - opener - opening character for a nested list (default=C{"("}); can also be a pyparsing expression
  4571. - closer - closing character for a nested list (default=C{")"}); can also be a pyparsing expression
  4572. - content - expression for items within the nested lists (default=C{None})
  4573. - ignoreExpr - expression for ignoring opening and closing delimiters (default=C{quotedString})
  4574. If an expression is not provided for the content argument, the nested
  4575. expression will capture all whitespace-delimited content between delimiters
  4576. as a list of separate values.
  4577. Use the C{ignoreExpr} argument to define expressions that may contain
  4578. opening or closing characters that should not be treated as opening
  4579. or closing characters for nesting, such as quotedString or a comment
  4580. expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}.
  4581. The default is L{quotedString}, but if no expressions are to be ignored,
  4582. then pass C{None} for this argument.
  4583. Example::
  4584. data_type = oneOf("void int short long char float double")
  4585. decl_data_type = Combine(data_type + Optional(Word('*')))
  4586. ident = Word(alphas+'_', alphanums+'_')
  4587. number = pyparsing_common.number
  4588. arg = Group(decl_data_type + ident)
  4589. LPAR,RPAR = map(Suppress, "()")
  4590. code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment))
  4591. c_function = (decl_data_type("type")
  4592. + ident("name")
  4593. + LPAR + Optional(delimitedList(arg), [])("args") + RPAR
  4594. + code_body("body"))
  4595. c_function.ignore(cStyleComment)
  4596. source_code = '''
  4597. int is_odd(int x) {
  4598. return (x%2);
  4599. }
  4600. int dec_to_hex(char hchar) {
  4601. if (hchar >= '0' && hchar <= '9') {
  4602. return (ord(hchar)-ord('0'));
  4603. } else {
  4604. return (10+ord(hchar)-ord('A'));
  4605. }
  4606. }
  4607. '''
  4608. for func in c_function.searchString(source_code):
  4609. print("%(name)s (%(type)s) args: %(args)s" % func)
  4610. prints::
  4611. is_odd (int) args: [['int', 'x']]
  4612. dec_to_hex (int) args: [['char', 'hchar']]
  4613. """
  4614. if opener == closer:
  4615. raise ValueError("opening and closing strings cannot be the same")
  4616. if content is None:
  4617. if isinstance(opener,basestring) and isinstance(closer,basestring):
  4618. if len(opener) == 1 and len(closer)==1:
  4619. if ignoreExpr is not None:
  4620. content = (Combine(OneOrMore(~ignoreExpr +
  4621. CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
  4622. ).setParseAction(lambda t:t[0].strip()))
  4623. else:
  4624. content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
  4625. ).setParseAction(lambda t:t[0].strip()))
  4626. else:
  4627. if ignoreExpr is not None:
  4628. content = (Combine(OneOrMore(~ignoreExpr +
  4629. ~Literal(opener) + ~Literal(closer) +
  4630. CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
  4631. ).setParseAction(lambda t:t[0].strip()))
  4632. else:
  4633. content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
  4634. CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
  4635. ).setParseAction(lambda t:t[0].strip()))
  4636. else:
  4637. raise ValueError("opening and closing arguments must be strings if no content expression is given")
  4638. ret = Forward()
  4639. if ignoreExpr is not None:
  4640. ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
  4641. else:
  4642. ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )
  4643. ret.setName('nested %s%s expression' % (opener,closer))
  4644. return ret
  4645. def indentedBlock(blockStatementExpr, indentStack, indent=True):
  4646. """
  4647. Helper method for defining space-delimited indentation blocks, such as
  4648. those used to define block statements in Python source code.
  4649. Parameters:
  4650. - blockStatementExpr - expression defining syntax of statement that
  4651. is repeated within the indented block
  4652. - indentStack - list created by caller to manage indentation stack
  4653. (multiple statementWithIndentedBlock expressions within a single grammar
  4654. should share a common indentStack)
  4655. - indent - boolean indicating whether block must be indented beyond the
  4656. the current level; set to False for block of left-most statements
  4657. (default=C{True})
  4658. A valid block must contain at least one C{blockStatement}.
  4659. Example::
  4660. data = '''
  4661. def A(z):
  4662. A1
  4663. B = 100
  4664. G = A2
  4665. A2
  4666. A3
  4667. B
  4668. def BB(a,b,c):
  4669. BB1
  4670. def BBA():
  4671. bba1
  4672. bba2
  4673. bba3
  4674. C
  4675. D
  4676. def spam(x,y):
  4677. def eggs(z):
  4678. pass
  4679. '''
  4680. indentStack = [1]
  4681. stmt = Forward()
  4682. identifier = Word(alphas, alphanums)
  4683. funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":")
  4684. func_body = indentedBlock(stmt, indentStack)
  4685. funcDef = Group( funcDecl + func_body )
  4686. rvalue = Forward()
  4687. funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")")
  4688. rvalue << (funcCall | identifier | Word(nums))
  4689. assignment = Group(identifier + "=" + rvalue)
  4690. stmt << ( funcDef | assignment | identifier )
  4691. module_body = OneOrMore(stmt)
  4692. parseTree = module_body.parseString(data)
  4693. parseTree.pprint()
  4694. prints::
  4695. [['def',
  4696. 'A',
  4697. ['(', 'z', ')'],
  4698. ':',
  4699. [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],
  4700. 'B',
  4701. ['def',
  4702. 'BB',
  4703. ['(', 'a', 'b', 'c', ')'],
  4704. ':',
  4705. [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],
  4706. 'C',
  4707. 'D',
  4708. ['def',
  4709. 'spam',
  4710. ['(', 'x', 'y', ')'],
  4711. ':',
  4712. [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]
  4713. """
  4714. def checkPeerIndent(s,l,t):
  4715. if l >= len(s): return
  4716. curCol = col(l,s)
  4717. if curCol != indentStack[-1]:
  4718. if curCol > indentStack[-1]:
  4719. raise ParseFatalException(s,l,"illegal nesting")
  4720. raise ParseException(s,l,"not a peer entry")
  4721. def checkSubIndent(s,l,t):
  4722. curCol = col(l,s)
  4723. if curCol > indentStack[-1]:
  4724. indentStack.append( curCol )
  4725. else:
  4726. raise ParseException(s,l,"not a subentry")
  4727. def checkUnindent(s,l,t):
  4728. if l >= len(s): return
  4729. curCol = col(l,s)
  4730. if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
  4731. raise ParseException(s,l,"not an unindent")
  4732. indentStack.pop()
  4733. NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
  4734. INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT')
  4735. PEER = Empty().setParseAction(checkPeerIndent).setName('')
  4736. UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT')
  4737. if indent:
  4738. smExpr = Group( Optional(NL) +
  4739. #~ FollowedBy(blockStatementExpr) +
  4740. INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
  4741. else:
  4742. smExpr = Group( Optional(NL) +
  4743. (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
  4744. blockStatementExpr.ignore(_bslash + LineEnd())
  4745. return smExpr.setName('indented block')
  4746. alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
  4747. punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
  4748. anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag'))
  4749. _htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\''))
  4750. commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
  4751. def replaceHTMLEntity(t):
  4752. """Helper parser action to replace common HTML entities with their special characters"""
  4753. return _htmlEntityMap.get(t.entity)
  4754. # it's easy to get these comment structures wrong - they're very common, so may as well make them available
  4755. cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment")
  4756. "Comment of the form C{/* ... */}"
  4757. htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment")
  4758. "Comment of the form C{<!-- ... -->}"
  4759. restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line")
  4760. dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment")
  4761. "Comment of the form C{// ... (to end of line)}"
  4762. cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment")
  4763. "Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}"
  4764. javaStyleComment = cppStyleComment
  4765. "Same as C{L{cppStyleComment}}"
  4766. pythonStyleComment = Regex(r"#.*").setName("Python style comment")
  4767. "Comment of the form C{# ... (to end of line)}"
  4768. _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') +
  4769. Optional( Word(" \t") +
  4770. ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
  4771. commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList")
  4772. """(Deprecated) Predefined expression of 1 or more printable words or quoted strings, separated by commas.
  4773. This expression is deprecated in favor of L{pyparsing_common.comma_separated_list}."""
  4774. # some other useful expressions - using lower-case class name since we are really using this as a namespace
  4775. class pyparsing_common:
  4776. """
  4777. Here are some common low-level expressions that may be useful in jump-starting parser development:
  4778. - numeric forms (L{integers<integer>}, L{reals<real>}, L{scientific notation<sci_real>})
  4779. - common L{programming identifiers<identifier>}
  4780. - network addresses (L{MAC<mac_address>}, L{IPv4<ipv4_address>}, L{IPv6<ipv6_address>})
  4781. - ISO8601 L{dates<iso8601_date>} and L{datetime<iso8601_datetime>}
  4782. - L{UUID<uuid>}
  4783. - L{comma-separated list<comma_separated_list>}
  4784. Parse actions:
  4785. - C{L{convertToInteger}}
  4786. - C{L{convertToFloat}}
  4787. - C{L{convertToDate}}
  4788. - C{L{convertToDatetime}}
  4789. - C{L{stripHTMLTags}}
  4790. - C{L{upcaseTokens}}
  4791. - C{L{downcaseTokens}}
  4792. Example::
  4793. pyparsing_common.number.runTests('''
  4794. # any int or real number, returned as the appropriate type
  4795. 100
  4796. -100
  4797. +100
  4798. 3.14159
  4799. 6.02e23
  4800. 1e-12
  4801. ''')
  4802. pyparsing_common.fnumber.runTests('''
  4803. # any int or real number, returned as float
  4804. 100
  4805. -100
  4806. +100
  4807. 3.14159
  4808. 6.02e23
  4809. 1e-12
  4810. ''')
  4811. pyparsing_common.hex_integer.runTests('''
  4812. # hex numbers
  4813. 100
  4814. FF
  4815. ''')
  4816. pyparsing_common.fraction.runTests('''
  4817. # fractions
  4818. 1/2
  4819. -3/4
  4820. ''')
  4821. pyparsing_common.mixed_integer.runTests('''
  4822. # mixed fractions
  4823. 1
  4824. 1/2
  4825. -3/4
  4826. 1-3/4
  4827. ''')
  4828. import uuid
  4829. pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
  4830. pyparsing_common.uuid.runTests('''
  4831. # uuid
  4832. 12345678-1234-5678-1234-567812345678
  4833. ''')
  4834. prints::
  4835. # any int or real number, returned as the appropriate type
  4836. 100
  4837. [100]
  4838. -100
  4839. [-100]
  4840. +100
  4841. [100]
  4842. 3.14159
  4843. [3.14159]
  4844. 6.02e23
  4845. [6.02e+23]
  4846. 1e-12
  4847. [1e-12]
  4848. # any int or real number, returned as float
  4849. 100
  4850. [100.0]
  4851. -100
  4852. [-100.0]
  4853. +100
  4854. [100.0]
  4855. 3.14159
  4856. [3.14159]
  4857. 6.02e23
  4858. [6.02e+23]
  4859. 1e-12
  4860. [1e-12]
  4861. # hex numbers
  4862. 100
  4863. [256]
  4864. FF
  4865. [255]
  4866. # fractions
  4867. 1/2
  4868. [0.5]
  4869. -3/4
  4870. [-0.75]
  4871. # mixed fractions
  4872. 1
  4873. [1]
  4874. 1/2
  4875. [0.5]
  4876. -3/4
  4877. [-0.75]
  4878. 1-3/4
  4879. [1.75]
  4880. # uuid
  4881. 12345678-1234-5678-1234-567812345678
  4882. [UUID('12345678-1234-5678-1234-567812345678')]
  4883. """
  4884. convertToInteger = tokenMap(int)
  4885. """
  4886. Parse action for converting parsed integers to Python int
  4887. """
  4888. convertToFloat = tokenMap(float)
  4889. """
  4890. Parse action for converting parsed numbers to Python float
  4891. """
  4892. integer = Word(nums).setName("integer").setParseAction(convertToInteger)
  4893. """expression that parses an unsigned integer, returns an int"""
  4894. hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16))
  4895. """expression that parses a hexadecimal integer, returns an int"""
  4896. signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger)
  4897. """expression that parses an integer with optional leading sign, returns an int"""
  4898. fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction")
  4899. """fractional expression of an integer divided by an integer, returns a float"""
  4900. fraction.addParseAction(lambda t: t[0]/t[-1])
  4901. mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction")
  4902. """mixed integer of the form 'integer - fraction', with optional leading integer, returns float"""
  4903. mixed_integer.addParseAction(sum)
  4904. real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat)
  4905. """expression that parses a floating point number and returns a float"""
  4906. sci_real = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat)
  4907. """expression that parses a floating point number with optional scientific notation and returns a float"""
  4908. # streamlining this expression makes the docs nicer-looking
  4909. number = (sci_real | real | signed_integer).streamline()
  4910. """any numeric expression, returns the corresponding Python type"""
  4911. fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat)
  4912. """any int or real number, returned as float"""
  4913. identifier = Word(alphas+'_', alphanums+'_').setName("identifier")
  4914. """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')"""
  4915. ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address")
  4916. "IPv4 address (C{0.0.0.0 - 255.255.255.255})"
  4917. _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer")
  4918. _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address")
  4919. _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address")
  4920. _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8)
  4921. _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address")
  4922. ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address")
  4923. "IPv6 address (long, short, or mixed form)"
  4924. mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address")
  4925. "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"
  4926. @staticmethod
  4927. def convertToDate(fmt="%Y-%m-%d"):
  4928. """
  4929. Helper to create a parse action for converting parsed date string to Python datetime.date
  4930. Params -
  4931. - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"})
  4932. Example::
  4933. date_expr = pyparsing_common.iso8601_date.copy()
  4934. date_expr.setParseAction(pyparsing_common.convertToDate())
  4935. print(date_expr.parseString("1999-12-31"))
  4936. prints::
  4937. [datetime.date(1999, 12, 31)]
  4938. """
  4939. def cvt_fn(s,l,t):
  4940. try:
  4941. return datetime.strptime(t[0], fmt).date()
  4942. except ValueError as ve:
  4943. raise ParseException(s, l, str(ve))
  4944. return cvt_fn
  4945. @staticmethod
  4946. def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"):
  4947. """
  4948. Helper to create a parse action for converting parsed datetime string to Python datetime.datetime
  4949. Params -
  4950. - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT%H:%M:%S.%f"})
  4951. Example::
  4952. dt_expr = pyparsing_common.iso8601_datetime.copy()
  4953. dt_expr.setParseAction(pyparsing_common.convertToDatetime())
  4954. print(dt_expr.parseString("1999-12-31T23:59:59.999"))
  4955. prints::
  4956. [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)]
  4957. """
  4958. def cvt_fn(s,l,t):
  4959. try:
  4960. return datetime.strptime(t[0], fmt)
  4961. except ValueError as ve:
  4962. raise ParseException(s, l, str(ve))
  4963. return cvt_fn
  4964. iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date")
  4965. "ISO8601 date (C{yyyy-mm-dd})"
  4966. iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime")
  4967. "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds, milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}"
  4968. uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID")
  4969. "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})"
  4970. _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress()
  4971. @staticmethod
  4972. def stripHTMLTags(s, l, tokens):
  4973. """
  4974. Parse action to remove HTML tags from web page HTML source
  4975. Example::
  4976. # strip HTML links from normal text
  4977. text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>'
  4978. td,td_end = makeHTMLTags("TD")
  4979. table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end
  4980. print(table_text.parseString(text).body) # -> 'More info at the pyparsing wiki page'
  4981. """
  4982. return pyparsing_common._html_stripper.transformString(tokens[0])
  4983. _commasepitem = Combine(OneOrMore(~Literal(",") + ~LineEnd() + Word(printables, excludeChars=',')
  4984. + Optional( White(" \t") ) ) ).streamline().setName("commaItem")
  4985. comma_separated_list = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("comma separated list")
  4986. """Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
  4987. upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper()))
  4988. """Parse action to convert tokens to upper case."""
  4989. downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower()))
  4990. """Parse action to convert tokens to lower case."""
  4991. class _lazyclassproperty(object):
  4992. def __init__(self, fn):
  4993. self.fn = fn
  4994. def __get__(self, obj, cls):
  4995. if cls is None:
  4996. cls = type(obj)
  4997. ret = self.fn(cls)
  4998. setattr(cls, self.fn.__name__, ret)
  4999. return ret
  5000. class unicode_set:
  5001. _ranges = []
  5002. @_lazyclassproperty
  5003. def printables(cls):
  5004. return ''.join(filterfalse(unicode.isspace, (unichr(c) for r in cls._ranges for c in range(r[0], r[-1] + 1))))
  5005. @_lazyclassproperty
  5006. def alphas(cls):
  5007. return ''.join(filter(unicode.isalpha, (unichr(c) for r in cls._ranges for c in range(r[0], r[-1] + 1))))
  5008. @_lazyclassproperty
  5009. def nums(cls):
  5010. return ''.join(filter(unicode.isdigit, (unichr(c) for r in cls._ranges for c in range(r[0], r[-1] + 1))))
  5011. @_lazyclassproperty
  5012. def alphanums(cls):
  5013. return cls.alphas + cls.nums
  5014. class pyparsing_unicode(unicode_set):
  5015. _ranges = [(32, sys.maxunicode)]
  5016. class Latin1(unicode_set):
  5017. _ranges = [
  5018. (0x0020, 0x007e), (0x00a0, 0x00ff),
  5019. ]
  5020. class Greek(unicode_set):
  5021. _ranges = [
  5022. (0x0370, 0x03ff), (0x1f00, 0x1f15), (0x1f18, 0x1f1d), (0x1f20, 0x1f45), (0x1f48, 0x1f4d),
  5023. (0x1f50, 0x1f57), (0x1f59,), (0x1f5b,), (0x1f5d,), (0x1f5f, 0x1f7d), (0x1f80, 0x1fb4), (0x1fb6, 0x1fc4),
  5024. (0x1fc6, 0x1fd3), (0x1fd6, 0x1fdb), (0x1fdd, 0x1fef), (0x1ff2, 0x1ff4), (0x1ff6, 0x1ffe),
  5025. ]
  5026. class Cyrillic(unicode_set):
  5027. _ranges = [(0x0400, 0x04ff)]
  5028. class Chinese(unicode_set):
  5029. _ranges = [(0x4e00, 0x9fff)]
  5030. class Japanese(unicode_set):
  5031. _ranges = [ ] # sum of Kanji, Hiragana, and Katakana ranges
  5032. class Kanji(unicode_set):
  5033. _ranges = [(0x4E00, 0x9Fbf), ]
  5034. class Hiragana(unicode_set):
  5035. _ranges = [(0x3040, 0x309f), ]
  5036. class Katakana(unicode_set):
  5037. _ranges = [(0x30a0, 0x30ff), ]
  5038. class Korean(unicode_set):
  5039. _ranges = [(0xac00, 0xd7af), (0x1100, 0x11ff), (0x3130, 0x318f), (0xa960, 0xa97f), (0xd7b0, 0xd7ff), ]
  5040. class CJK(unicode_set):
  5041. _ranges = [ # sum of Chinese, Japanese, and Korean ranges
  5042. ]
  5043. class Thai(unicode_set):
  5044. _ranges = [(0x0e01, 0x0e3a), (0x0e3f, 0x0e5b), ]
  5045. class Arabic(unicode_set):
  5046. _ranges = [(0x0600, 0x061b), (0x061e, 0x06ff), (0x0700, 0x077f), ]
  5047. class Hebrew(unicode_set):
  5048. _ranges = [(0x0590, 0x05ff), ]
  5049. class Devanagari(unicode_set):
  5050. _ranges = [(0x0900, 0x097f), (0xa8e0, 0xa8ff)]
  5051. pyparsing_unicode.Japanese._ranges = pyparsing_unicode.Japanese.Kanji._ranges + pyparsing_unicode.Japanese.Hiragana._ranges + pyparsing_unicode.Japanese.Katakana._ranges
  5052. pyparsing_unicode.CJK._ranges = pyparsing_unicode.Chinese._ranges + pyparsing_unicode.Japanese._ranges + pyparsing_unicode.Korean._ranges
  5053. # define ranges in language character sets
  5054. if PY_3:
  5055. setattr(pyparsing_unicode, "العربية", pyparsing_unicode.Arabic)
  5056. setattr(pyparsing_unicode, "中文", pyparsing_unicode.Chinese)
  5057. setattr(pyparsing_unicode, "кириллица", pyparsing_unicode.Cyrillic)
  5058. setattr(pyparsing_unicode, "Ελληνικά", pyparsing_unicode.Greek)
  5059. setattr(pyparsing_unicode, "עִברִית", pyparsing_unicode.Hebrew)
  5060. setattr(pyparsing_unicode, "日本語", pyparsing_unicode.Japanese)
  5061. setattr(pyparsing_unicode.Japanese, "漢字", pyparsing_unicode.Japanese.Kanji)
  5062. setattr(pyparsing_unicode.Japanese, "カタカナ", pyparsing_unicode.Japanese.Katakana)
  5063. setattr(pyparsing_unicode.Japanese, "ひらがな", pyparsing_unicode.Japanese.Hiragana)
  5064. setattr(pyparsing_unicode, "한국어", pyparsing_unicode.Korean)
  5065. setattr(pyparsing_unicode, "ไทย", pyparsing_unicode.Thai)
  5066. setattr(pyparsing_unicode, "देवनागरी", pyparsing_unicode.Devanagari)
  5067. if __name__ == "__main__":
  5068. selectToken = CaselessLiteral("select")
  5069. fromToken = CaselessLiteral("from")
  5070. ident = Word(alphas, alphanums + "_$")
  5071. columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
  5072. columnNameList = Group(delimitedList(columnName)).setName("columns")
  5073. columnSpec = ('*' | columnNameList)
  5074. tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
  5075. tableNameList = Group(delimitedList(tableName)).setName("tables")
  5076. simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables")
  5077. # demo runTests method, including embedded comments in test string
  5078. simpleSQL.runTests("""
  5079. # '*' as column list and dotted table name
  5080. select * from SYS.XYZZY
  5081. # caseless match on "SELECT", and casts back to "select"
  5082. SELECT * from XYZZY, ABC
  5083. # list of column names, and mixed case SELECT keyword
  5084. Select AA,BB,CC from Sys.dual
  5085. # multiple tables
  5086. Select A, B, C from Sys.dual, Table2
  5087. # invalid SELECT keyword - should fail
  5088. Xelect A, B, C from Sys.dual
  5089. # incomplete command - should fail
  5090. Select
  5091. # invalid column name - should fail
  5092. Select ^^^ frox Sys.dual
  5093. """)
  5094. pyparsing_common.number.runTests("""
  5095. 100
  5096. -100
  5097. +100
  5098. 3.14159
  5099. 6.02e23
  5100. 1e-12
  5101. """)
  5102. # any int or real number, returned as float
  5103. pyparsing_common.fnumber.runTests("""
  5104. 100
  5105. -100
  5106. +100
  5107. 3.14159
  5108. 6.02e23
  5109. 1e-12
  5110. """)
  5111. pyparsing_common.hex_integer.runTests("""
  5112. 100
  5113. FF
  5114. """)
  5115. import uuid
  5116. pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
  5117. pyparsing_common.uuid.runTests("""
  5118. 12345678-1234-5678-1234-567812345678
  5119. """)