You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

3502 lines
134 KiB

4 years ago
  1. # -----------------------------------------------------------------------------
  2. # ply: yacc.py
  3. #
  4. # Copyright (C) 2001-2018
  5. # David M. Beazley (Dabeaz LLC)
  6. # All rights reserved.
  7. #
  8. # Redistribution and use in source and binary forms, with or without
  9. # modification, are permitted provided that the following conditions are
  10. # met:
  11. #
  12. # * Redistributions of source code must retain the above copyright notice,
  13. # this list of conditions and the following disclaimer.
  14. # * Redistributions in binary form must reproduce the above copyright notice,
  15. # this list of conditions and the following disclaimer in the documentation
  16. # and/or other materials provided with the distribution.
  17. # * Neither the name of the David Beazley or Dabeaz LLC may be used to
  18. # endorse or promote products derived from this software without
  19. # specific prior written permission.
  20. #
  21. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22. # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23. # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  24. # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  25. # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  26. # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  27. # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  28. # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  29. # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  30. # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  31. # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  32. # -----------------------------------------------------------------------------
  33. #
  34. # This implements an LR parser that is constructed from grammar rules defined
  35. # as Python functions. The grammar is specified by supplying the BNF inside
  36. # Python documentation strings. The inspiration for this technique was borrowed
  37. # from John Aycock's Spark parsing system. PLY might be viewed as cross between
  38. # Spark and the GNU bison utility.
  39. #
  40. # The current implementation is only somewhat object-oriented. The
  41. # LR parser itself is defined in terms of an object (which allows multiple
  42. # parsers to co-exist). However, most of the variables used during table
  43. # construction are defined in terms of global variables. Users shouldn't
  44. # notice unless they are trying to define multiple parsers at the same
  45. # time using threads (in which case they should have their head examined).
  46. #
  47. # This implementation supports both SLR and LALR(1) parsing. LALR(1)
  48. # support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu),
  49. # using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,
  50. # Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced
  51. # by the more efficient DeRemer and Pennello algorithm.
  52. #
  53. # :::::::: WARNING :::::::
  54. #
  55. # Construction of LR parsing tables is fairly complicated and expensive.
  56. # To make this module run fast, a *LOT* of work has been put into
  57. # optimization---often at the expensive of readability and what might
  58. # consider to be good Python "coding style." Modify the code at your
  59. # own risk!
  60. # ----------------------------------------------------------------------------
  61. import re
  62. import types
  63. import sys
  64. import os.path
  65. import inspect
  66. import warnings
  67. __version__ = '3.11'
  68. __tabversion__ = '3.10'
  69. #-----------------------------------------------------------------------------
  70. # === User configurable parameters ===
  71. #
  72. # Change these to modify the default behavior of yacc (if you wish)
  73. #-----------------------------------------------------------------------------
  74. yaccdebug = True # Debugging mode. If set, yacc generates a
  75. # a 'parser.out' file in the current directory
  76. debug_file = 'parser.out' # Default name of the debugging file
  77. tab_module = 'parsetab' # Default name of the table module
  78. default_lr = 'LALR' # Default LR table generation method
  79. error_count = 3 # Number of symbols that must be shifted to leave recovery mode
  80. yaccdevel = False # Set to True if developing yacc. This turns off optimized
  81. # implementations of certain functions.
  82. resultlimit = 40 # Size limit of results when running in debug mode.
  83. pickle_protocol = 0 # Protocol to use when writing pickle files
  84. # String type-checking compatibility
  85. if sys.version_info[0] < 3:
  86. string_types = basestring
  87. else:
  88. string_types = str
  89. MAXINT = sys.maxsize
  90. # This object is a stand-in for a logging object created by the
  91. # logging module. PLY will use this by default to create things
  92. # such as the parser.out file. If a user wants more detailed
  93. # information, they can create their own logging object and pass
  94. # it into PLY.
  95. class PlyLogger(object):
  96. def __init__(self, f):
  97. self.f = f
  98. def debug(self, msg, *args, **kwargs):
  99. self.f.write((msg % args) + '\n')
  100. info = debug
  101. def warning(self, msg, *args, **kwargs):
  102. self.f.write('WARNING: ' + (msg % args) + '\n')
  103. def error(self, msg, *args, **kwargs):
  104. self.f.write('ERROR: ' + (msg % args) + '\n')
  105. critical = debug
  106. # Null logger is used when no output is generated. Does nothing.
  107. class NullLogger(object):
  108. def __getattribute__(self, name):
  109. return self
  110. def __call__(self, *args, **kwargs):
  111. return self
  112. # Exception raised for yacc-related errors
  113. class YaccError(Exception):
  114. pass
  115. # Format the result message that the parser produces when running in debug mode.
  116. def format_result(r):
  117. repr_str = repr(r)
  118. if '\n' in repr_str:
  119. repr_str = repr(repr_str)
  120. if len(repr_str) > resultlimit:
  121. repr_str = repr_str[:resultlimit] + ' ...'
  122. result = '<%s @ 0x%x> (%s)' % (type(r).__name__, id(r), repr_str)
  123. return result
  124. # Format stack entries when the parser is running in debug mode
  125. def format_stack_entry(r):
  126. repr_str = repr(r)
  127. if '\n' in repr_str:
  128. repr_str = repr(repr_str)
  129. if len(repr_str) < 16:
  130. return repr_str
  131. else:
  132. return '<%s @ 0x%x>' % (type(r).__name__, id(r))
  133. # Panic mode error recovery support. This feature is being reworked--much of the
  134. # code here is to offer a deprecation/backwards compatible transition
  135. _errok = None
  136. _token = None
  137. _restart = None
  138. _warnmsg = '''PLY: Don't use global functions errok(), token(), and restart() in p_error().
  139. Instead, invoke the methods on the associated parser instance:
  140. def p_error(p):
  141. ...
  142. # Use parser.errok(), parser.token(), parser.restart()
  143. ...
  144. parser = yacc.yacc()
  145. '''
  146. def errok():
  147. warnings.warn(_warnmsg)
  148. return _errok()
  149. def restart():
  150. warnings.warn(_warnmsg)
  151. return _restart()
  152. def token():
  153. warnings.warn(_warnmsg)
  154. return _token()
  155. # Utility function to call the p_error() function with some deprecation hacks
  156. def call_errorfunc(errorfunc, token, parser):
  157. global _errok, _token, _restart
  158. _errok = parser.errok
  159. _token = parser.token
  160. _restart = parser.restart
  161. r = errorfunc(token)
  162. try:
  163. del _errok, _token, _restart
  164. except NameError:
  165. pass
  166. return r
  167. #-----------------------------------------------------------------------------
  168. # === LR Parsing Engine ===
  169. #
  170. # The following classes are used for the LR parser itself. These are not
  171. # used during table construction and are independent of the actual LR
  172. # table generation algorithm
  173. #-----------------------------------------------------------------------------
  174. # This class is used to hold non-terminal grammar symbols during parsing.
  175. # It normally has the following attributes set:
  176. # .type = Grammar symbol type
  177. # .value = Symbol value
  178. # .lineno = Starting line number
  179. # .endlineno = Ending line number (optional, set automatically)
  180. # .lexpos = Starting lex position
  181. # .endlexpos = Ending lex position (optional, set automatically)
  182. class YaccSymbol:
  183. def __str__(self):
  184. return self.type
  185. def __repr__(self):
  186. return str(self)
  187. # This class is a wrapper around the objects actually passed to each
  188. # grammar rule. Index lookup and assignment actually assign the
  189. # .value attribute of the underlying YaccSymbol object.
  190. # The lineno() method returns the line number of a given
  191. # item (or 0 if not defined). The linespan() method returns
  192. # a tuple of (startline,endline) representing the range of lines
  193. # for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos)
  194. # representing the range of positional information for a symbol.
  195. class YaccProduction:
  196. def __init__(self, s, stack=None):
  197. self.slice = s
  198. self.stack = stack
  199. self.lexer = None
  200. self.parser = None
  201. def __getitem__(self, n):
  202. if isinstance(n, slice):
  203. return [s.value for s in self.slice[n]]
  204. elif n >= 0:
  205. return self.slice[n].value
  206. else:
  207. return self.stack[n].value
  208. def __setitem__(self, n, v):
  209. self.slice[n].value = v
  210. def __getslice__(self, i, j):
  211. return [s.value for s in self.slice[i:j]]
  212. def __len__(self):
  213. return len(self.slice)
  214. def lineno(self, n):
  215. return getattr(self.slice[n], 'lineno', 0)
  216. def set_lineno(self, n, lineno):
  217. self.slice[n].lineno = lineno
  218. def linespan(self, n):
  219. startline = getattr(self.slice[n], 'lineno', 0)
  220. endline = getattr(self.slice[n], 'endlineno', startline)
  221. return startline, endline
  222. def lexpos(self, n):
  223. return getattr(self.slice[n], 'lexpos', 0)
  224. def set_lexpos(self, n, lexpos):
  225. self.slice[n].lexpos = lexpos
  226. def lexspan(self, n):
  227. startpos = getattr(self.slice[n], 'lexpos', 0)
  228. endpos = getattr(self.slice[n], 'endlexpos', startpos)
  229. return startpos, endpos
  230. def error(self):
  231. raise SyntaxError
  232. # -----------------------------------------------------------------------------
  233. # == LRParser ==
  234. #
  235. # The LR Parsing engine.
  236. # -----------------------------------------------------------------------------
  237. class LRParser:
  238. def __init__(self, lrtab, errorf):
  239. self.productions = lrtab.lr_productions
  240. self.action = lrtab.lr_action
  241. self.goto = lrtab.lr_goto
  242. self.errorfunc = errorf
  243. self.set_defaulted_states()
  244. self.errorok = True
  245. def errok(self):
  246. self.errorok = True
  247. def restart(self):
  248. del self.statestack[:]
  249. del self.symstack[:]
  250. sym = YaccSymbol()
  251. sym.type = '$end'
  252. self.symstack.append(sym)
  253. self.statestack.append(0)
  254. # Defaulted state support.
  255. # This method identifies parser states where there is only one possible reduction action.
  256. # For such states, the parser can make a choose to make a rule reduction without consuming
  257. # the next look-ahead token. This delayed invocation of the tokenizer can be useful in
  258. # certain kinds of advanced parsing situations where the lexer and parser interact with
  259. # each other or change states (i.e., manipulation of scope, lexer states, etc.).
  260. #
  261. # See: http://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions
  262. def set_defaulted_states(self):
  263. self.defaulted_states = {}
  264. for state, actions in self.action.items():
  265. rules = list(actions.values())
  266. if len(rules) == 1 and rules[0] < 0:
  267. self.defaulted_states[state] = rules[0]
  268. def disable_defaulted_states(self):
  269. self.defaulted_states = {}
  270. def parse(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
  271. if debug or yaccdevel:
  272. if isinstance(debug, int):
  273. debug = PlyLogger(sys.stderr)
  274. return self.parsedebug(input, lexer, debug, tracking, tokenfunc)
  275. elif tracking:
  276. return self.parseopt(input, lexer, debug, tracking, tokenfunc)
  277. else:
  278. return self.parseopt_notrack(input, lexer, debug, tracking, tokenfunc)
  279. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  280. # parsedebug().
  281. #
  282. # This is the debugging enabled version of parse(). All changes made to the
  283. # parsing engine should be made here. Optimized versions of this function
  284. # are automatically created by the ply/ygen.py script. This script cuts out
  285. # sections enclosed in markers such as this:
  286. #
  287. # #--! DEBUG
  288. # statements
  289. # #--! DEBUG
  290. #
  291. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  292. def parsedebug(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
  293. #--! parsedebug-start
  294. lookahead = None # Current lookahead symbol
  295. lookaheadstack = [] # Stack of lookahead symbols
  296. actions = self.action # Local reference to action table (to avoid lookup on self.)
  297. goto = self.goto # Local reference to goto table (to avoid lookup on self.)
  298. prod = self.productions # Local reference to production list (to avoid lookup on self.)
  299. defaulted_states = self.defaulted_states # Local reference to defaulted states
  300. pslice = YaccProduction(None) # Production object passed to grammar rules
  301. errorcount = 0 # Used during error recovery
  302. #--! DEBUG
  303. debug.info('PLY: PARSE DEBUG START')
  304. #--! DEBUG
  305. # If no lexer was given, we will try to use the lex module
  306. if not lexer:
  307. from . import lex
  308. lexer = lex.lexer
  309. # Set up the lexer and parser objects on pslice
  310. pslice.lexer = lexer
  311. pslice.parser = self
  312. # If input was supplied, pass to lexer
  313. if input is not None:
  314. lexer.input(input)
  315. if tokenfunc is None:
  316. # Tokenize function
  317. get_token = lexer.token
  318. else:
  319. get_token = tokenfunc
  320. # Set the parser() token method (sometimes used in error recovery)
  321. self.token = get_token
  322. # Set up the state and symbol stacks
  323. statestack = [] # Stack of parsing states
  324. self.statestack = statestack
  325. symstack = [] # Stack of grammar symbols
  326. self.symstack = symstack
  327. pslice.stack = symstack # Put in the production
  328. errtoken = None # Err token
  329. # The start state is assumed to be (0,$end)
  330. statestack.append(0)
  331. sym = YaccSymbol()
  332. sym.type = '$end'
  333. symstack.append(sym)
  334. state = 0
  335. while True:
  336. # Get the next symbol on the input. If a lookahead symbol
  337. # is already set, we just use that. Otherwise, we'll pull
  338. # the next token off of the lookaheadstack or from the lexer
  339. #--! DEBUG
  340. debug.debug('')
  341. debug.debug('State : %s', state)
  342. #--! DEBUG
  343. if state not in defaulted_states:
  344. if not lookahead:
  345. if not lookaheadstack:
  346. lookahead = get_token() # Get the next token
  347. else:
  348. lookahead = lookaheadstack.pop()
  349. if not lookahead:
  350. lookahead = YaccSymbol()
  351. lookahead.type = '$end'
  352. # Check the action table
  353. ltype = lookahead.type
  354. t = actions[state].get(ltype)
  355. else:
  356. t = defaulted_states[state]
  357. #--! DEBUG
  358. debug.debug('Defaulted state %s: Reduce using %d', state, -t)
  359. #--! DEBUG
  360. #--! DEBUG
  361. debug.debug('Stack : %s',
  362. ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
  363. #--! DEBUG
  364. if t is not None:
  365. if t > 0:
  366. # shift a symbol on the stack
  367. statestack.append(t)
  368. state = t
  369. #--! DEBUG
  370. debug.debug('Action : Shift and goto state %s', t)
  371. #--! DEBUG
  372. symstack.append(lookahead)
  373. lookahead = None
  374. # Decrease error count on successful shift
  375. if errorcount:
  376. errorcount -= 1
  377. continue
  378. if t < 0:
  379. # reduce a symbol on the stack, emit a production
  380. p = prod[-t]
  381. pname = p.name
  382. plen = p.len
  383. # Get production function
  384. sym = YaccSymbol()
  385. sym.type = pname # Production name
  386. sym.value = None
  387. #--! DEBUG
  388. if plen:
  389. debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str,
  390. '['+','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+']',
  391. goto[statestack[-1-plen]][pname])
  392. else:
  393. debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [],
  394. goto[statestack[-1]][pname])
  395. #--! DEBUG
  396. if plen:
  397. targ = symstack[-plen-1:]
  398. targ[0] = sym
  399. #--! TRACKING
  400. if tracking:
  401. t1 = targ[1]
  402. sym.lineno = t1.lineno
  403. sym.lexpos = t1.lexpos
  404. t1 = targ[-1]
  405. sym.endlineno = getattr(t1, 'endlineno', t1.lineno)
  406. sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos)
  407. #--! TRACKING
  408. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  409. # The code enclosed in this section is duplicated
  410. # below as a performance optimization. Make sure
  411. # changes get made in both locations.
  412. pslice.slice = targ
  413. try:
  414. # Call the grammar rule with our special slice object
  415. del symstack[-plen:]
  416. self.state = state
  417. p.callable(pslice)
  418. del statestack[-plen:]
  419. #--! DEBUG
  420. debug.info('Result : %s', format_result(pslice[0]))
  421. #--! DEBUG
  422. symstack.append(sym)
  423. state = goto[statestack[-1]][pname]
  424. statestack.append(state)
  425. except SyntaxError:
  426. # If an error was set. Enter error recovery state
  427. lookaheadstack.append(lookahead) # Save the current lookahead token
  428. symstack.extend(targ[1:-1]) # Put the production slice back on the stack
  429. statestack.pop() # Pop back one state (before the reduce)
  430. state = statestack[-1]
  431. sym.type = 'error'
  432. sym.value = 'error'
  433. lookahead = sym
  434. errorcount = error_count
  435. self.errorok = False
  436. continue
  437. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  438. else:
  439. #--! TRACKING
  440. if tracking:
  441. sym.lineno = lexer.lineno
  442. sym.lexpos = lexer.lexpos
  443. #--! TRACKING
  444. targ = [sym]
  445. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  446. # The code enclosed in this section is duplicated
  447. # above as a performance optimization. Make sure
  448. # changes get made in both locations.
  449. pslice.slice = targ
  450. try:
  451. # Call the grammar rule with our special slice object
  452. self.state = state
  453. p.callable(pslice)
  454. #--! DEBUG
  455. debug.info('Result : %s', format_result(pslice[0]))
  456. #--! DEBUG
  457. symstack.append(sym)
  458. state = goto[statestack[-1]][pname]
  459. statestack.append(state)
  460. except SyntaxError:
  461. # If an error was set. Enter error recovery state
  462. lookaheadstack.append(lookahead) # Save the current lookahead token
  463. statestack.pop() # Pop back one state (before the reduce)
  464. state = statestack[-1]
  465. sym.type = 'error'
  466. sym.value = 'error'
  467. lookahead = sym
  468. errorcount = error_count
  469. self.errorok = False
  470. continue
  471. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  472. if t == 0:
  473. n = symstack[-1]
  474. result = getattr(n, 'value', None)
  475. #--! DEBUG
  476. debug.info('Done : Returning %s', format_result(result))
  477. debug.info('PLY: PARSE DEBUG END')
  478. #--! DEBUG
  479. return result
  480. if t is None:
  481. #--! DEBUG
  482. debug.error('Error : %s',
  483. ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
  484. #--! DEBUG
  485. # We have some kind of parsing error here. To handle
  486. # this, we are going to push the current token onto
  487. # the tokenstack and replace it with an 'error' token.
  488. # If there are any synchronization rules, they may
  489. # catch it.
  490. #
  491. # In addition to pushing the error token, we call call
  492. # the user defined p_error() function if this is the
  493. # first syntax error. This function is only called if
  494. # errorcount == 0.
  495. if errorcount == 0 or self.errorok:
  496. errorcount = error_count
  497. self.errorok = False
  498. errtoken = lookahead
  499. if errtoken.type == '$end':
  500. errtoken = None # End of file!
  501. if self.errorfunc:
  502. if errtoken and not hasattr(errtoken, 'lexer'):
  503. errtoken.lexer = lexer
  504. self.state = state
  505. tok = call_errorfunc(self.errorfunc, errtoken, self)
  506. if self.errorok:
  507. # User must have done some kind of panic
  508. # mode recovery on their own. The
  509. # returned token is the next lookahead
  510. lookahead = tok
  511. errtoken = None
  512. continue
  513. else:
  514. if errtoken:
  515. if hasattr(errtoken, 'lineno'):
  516. lineno = lookahead.lineno
  517. else:
  518. lineno = 0
  519. if lineno:
  520. sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
  521. else:
  522. sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
  523. else:
  524. sys.stderr.write('yacc: Parse error in input. EOF\n')
  525. return
  526. else:
  527. errorcount = error_count
  528. # case 1: the statestack only has 1 entry on it. If we're in this state, the
  529. # entire parse has been rolled back and we're completely hosed. The token is
  530. # discarded and we just keep going.
  531. if len(statestack) <= 1 and lookahead.type != '$end':
  532. lookahead = None
  533. errtoken = None
  534. state = 0
  535. # Nuke the pushback stack
  536. del lookaheadstack[:]
  537. continue
  538. # case 2: the statestack has a couple of entries on it, but we're
  539. # at the end of the file. nuke the top entry and generate an error token
  540. # Start nuking entries on the stack
  541. if lookahead.type == '$end':
  542. # Whoa. We're really hosed here. Bail out
  543. return
  544. if lookahead.type != 'error':
  545. sym = symstack[-1]
  546. if sym.type == 'error':
  547. # Hmmm. Error is on top of stack, we'll just nuke input
  548. # symbol and continue
  549. #--! TRACKING
  550. if tracking:
  551. sym.endlineno = getattr(lookahead, 'lineno', sym.lineno)
  552. sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos)
  553. #--! TRACKING
  554. lookahead = None
  555. continue
  556. # Create the error symbol for the first time and make it the new lookahead symbol
  557. t = YaccSymbol()
  558. t.type = 'error'
  559. if hasattr(lookahead, 'lineno'):
  560. t.lineno = t.endlineno = lookahead.lineno
  561. if hasattr(lookahead, 'lexpos'):
  562. t.lexpos = t.endlexpos = lookahead.lexpos
  563. t.value = lookahead
  564. lookaheadstack.append(lookahead)
  565. lookahead = t
  566. else:
  567. sym = symstack.pop()
  568. #--! TRACKING
  569. if tracking:
  570. lookahead.lineno = sym.lineno
  571. lookahead.lexpos = sym.lexpos
  572. #--! TRACKING
  573. statestack.pop()
  574. state = statestack[-1]
  575. continue
  576. # Call an error function here
  577. raise RuntimeError('yacc: internal parser error!!!\n')
  578. #--! parsedebug-end
  579. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  580. # parseopt().
  581. #
  582. # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY!
  583. # This code is automatically generated by the ply/ygen.py script. Make
  584. # changes to the parsedebug() method instead.
  585. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  586. def parseopt(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
  587. #--! parseopt-start
  588. lookahead = None # Current lookahead symbol
  589. lookaheadstack = [] # Stack of lookahead symbols
  590. actions = self.action # Local reference to action table (to avoid lookup on self.)
  591. goto = self.goto # Local reference to goto table (to avoid lookup on self.)
  592. prod = self.productions # Local reference to production list (to avoid lookup on self.)
  593. defaulted_states = self.defaulted_states # Local reference to defaulted states
  594. pslice = YaccProduction(None) # Production object passed to grammar rules
  595. errorcount = 0 # Used during error recovery
  596. # If no lexer was given, we will try to use the lex module
  597. if not lexer:
  598. from . import lex
  599. lexer = lex.lexer
  600. # Set up the lexer and parser objects on pslice
  601. pslice.lexer = lexer
  602. pslice.parser = self
  603. # If input was supplied, pass to lexer
  604. if input is not None:
  605. lexer.input(input)
  606. if tokenfunc is None:
  607. # Tokenize function
  608. get_token = lexer.token
  609. else:
  610. get_token = tokenfunc
  611. # Set the parser() token method (sometimes used in error recovery)
  612. self.token = get_token
  613. # Set up the state and symbol stacks
  614. statestack = [] # Stack of parsing states
  615. self.statestack = statestack
  616. symstack = [] # Stack of grammar symbols
  617. self.symstack = symstack
  618. pslice.stack = symstack # Put in the production
  619. errtoken = None # Err token
  620. # The start state is assumed to be (0,$end)
  621. statestack.append(0)
  622. sym = YaccSymbol()
  623. sym.type = '$end'
  624. symstack.append(sym)
  625. state = 0
  626. while True:
  627. # Get the next symbol on the input. If a lookahead symbol
  628. # is already set, we just use that. Otherwise, we'll pull
  629. # the next token off of the lookaheadstack or from the lexer
  630. if state not in defaulted_states:
  631. if not lookahead:
  632. if not lookaheadstack:
  633. lookahead = get_token() # Get the next token
  634. else:
  635. lookahead = lookaheadstack.pop()
  636. if not lookahead:
  637. lookahead = YaccSymbol()
  638. lookahead.type = '$end'
  639. # Check the action table
  640. ltype = lookahead.type
  641. t = actions[state].get(ltype)
  642. else:
  643. t = defaulted_states[state]
  644. if t is not None:
  645. if t > 0:
  646. # shift a symbol on the stack
  647. statestack.append(t)
  648. state = t
  649. symstack.append(lookahead)
  650. lookahead = None
  651. # Decrease error count on successful shift
  652. if errorcount:
  653. errorcount -= 1
  654. continue
  655. if t < 0:
  656. # reduce a symbol on the stack, emit a production
  657. p = prod[-t]
  658. pname = p.name
  659. plen = p.len
  660. # Get production function
  661. sym = YaccSymbol()
  662. sym.type = pname # Production name
  663. sym.value = None
  664. if plen:
  665. targ = symstack[-plen-1:]
  666. targ[0] = sym
  667. #--! TRACKING
  668. if tracking:
  669. t1 = targ[1]
  670. sym.lineno = t1.lineno
  671. sym.lexpos = t1.lexpos
  672. t1 = targ[-1]
  673. sym.endlineno = getattr(t1, 'endlineno', t1.lineno)
  674. sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos)
  675. #--! TRACKING
  676. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  677. # The code enclosed in this section is duplicated
  678. # below as a performance optimization. Make sure
  679. # changes get made in both locations.
  680. pslice.slice = targ
  681. try:
  682. # Call the grammar rule with our special slice object
  683. del symstack[-plen:]
  684. self.state = state
  685. p.callable(pslice)
  686. del statestack[-plen:]
  687. symstack.append(sym)
  688. state = goto[statestack[-1]][pname]
  689. statestack.append(state)
  690. except SyntaxError:
  691. # If an error was set. Enter error recovery state
  692. lookaheadstack.append(lookahead) # Save the current lookahead token
  693. symstack.extend(targ[1:-1]) # Put the production slice back on the stack
  694. statestack.pop() # Pop back one state (before the reduce)
  695. state = statestack[-1]
  696. sym.type = 'error'
  697. sym.value = 'error'
  698. lookahead = sym
  699. errorcount = error_count
  700. self.errorok = False
  701. continue
  702. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  703. else:
  704. #--! TRACKING
  705. if tracking:
  706. sym.lineno = lexer.lineno
  707. sym.lexpos = lexer.lexpos
  708. #--! TRACKING
  709. targ = [sym]
  710. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  711. # The code enclosed in this section is duplicated
  712. # above as a performance optimization. Make sure
  713. # changes get made in both locations.
  714. pslice.slice = targ
  715. try:
  716. # Call the grammar rule with our special slice object
  717. self.state = state
  718. p.callable(pslice)
  719. symstack.append(sym)
  720. state = goto[statestack[-1]][pname]
  721. statestack.append(state)
  722. except SyntaxError:
  723. # If an error was set. Enter error recovery state
  724. lookaheadstack.append(lookahead) # Save the current lookahead token
  725. statestack.pop() # Pop back one state (before the reduce)
  726. state = statestack[-1]
  727. sym.type = 'error'
  728. sym.value = 'error'
  729. lookahead = sym
  730. errorcount = error_count
  731. self.errorok = False
  732. continue
  733. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  734. if t == 0:
  735. n = symstack[-1]
  736. result = getattr(n, 'value', None)
  737. return result
  738. if t is None:
  739. # We have some kind of parsing error here. To handle
  740. # this, we are going to push the current token onto
  741. # the tokenstack and replace it with an 'error' token.
  742. # If there are any synchronization rules, they may
  743. # catch it.
  744. #
  745. # In addition to pushing the error token, we call call
  746. # the user defined p_error() function if this is the
  747. # first syntax error. This function is only called if
  748. # errorcount == 0.
  749. if errorcount == 0 or self.errorok:
  750. errorcount = error_count
  751. self.errorok = False
  752. errtoken = lookahead
  753. if errtoken.type == '$end':
  754. errtoken = None # End of file!
  755. if self.errorfunc:
  756. if errtoken and not hasattr(errtoken, 'lexer'):
  757. errtoken.lexer = lexer
  758. self.state = state
  759. tok = call_errorfunc(self.errorfunc, errtoken, self)
  760. if self.errorok:
  761. # User must have done some kind of panic
  762. # mode recovery on their own. The
  763. # returned token is the next lookahead
  764. lookahead = tok
  765. errtoken = None
  766. continue
  767. else:
  768. if errtoken:
  769. if hasattr(errtoken, 'lineno'):
  770. lineno = lookahead.lineno
  771. else:
  772. lineno = 0
  773. if lineno:
  774. sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
  775. else:
  776. sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
  777. else:
  778. sys.stderr.write('yacc: Parse error in input. EOF\n')
  779. return
  780. else:
  781. errorcount = error_count
  782. # case 1: the statestack only has 1 entry on it. If we're in this state, the
  783. # entire parse has been rolled back and we're completely hosed. The token is
  784. # discarded and we just keep going.
  785. if len(statestack) <= 1 and lookahead.type != '$end':
  786. lookahead = None
  787. errtoken = None
  788. state = 0
  789. # Nuke the pushback stack
  790. del lookaheadstack[:]
  791. continue
  792. # case 2: the statestack has a couple of entries on it, but we're
  793. # at the end of the file. nuke the top entry and generate an error token
  794. # Start nuking entries on the stack
  795. if lookahead.type == '$end':
  796. # Whoa. We're really hosed here. Bail out
  797. return
  798. if lookahead.type != 'error':
  799. sym = symstack[-1]
  800. if sym.type == 'error':
  801. # Hmmm. Error is on top of stack, we'll just nuke input
  802. # symbol and continue
  803. #--! TRACKING
  804. if tracking:
  805. sym.endlineno = getattr(lookahead, 'lineno', sym.lineno)
  806. sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos)
  807. #--! TRACKING
  808. lookahead = None
  809. continue
  810. # Create the error symbol for the first time and make it the new lookahead symbol
  811. t = YaccSymbol()
  812. t.type = 'error'
  813. if hasattr(lookahead, 'lineno'):
  814. t.lineno = t.endlineno = lookahead.lineno
  815. if hasattr(lookahead, 'lexpos'):
  816. t.lexpos = t.endlexpos = lookahead.lexpos
  817. t.value = lookahead
  818. lookaheadstack.append(lookahead)
  819. lookahead = t
  820. else:
  821. sym = symstack.pop()
  822. #--! TRACKING
  823. if tracking:
  824. lookahead.lineno = sym.lineno
  825. lookahead.lexpos = sym.lexpos
  826. #--! TRACKING
  827. statestack.pop()
  828. state = statestack[-1]
  829. continue
  830. # Call an error function here
  831. raise RuntimeError('yacc: internal parser error!!!\n')
  832. #--! parseopt-end
  833. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  834. # parseopt_notrack().
  835. #
  836. # Optimized version of parseopt() with line number tracking removed.
  837. # DO NOT EDIT THIS CODE DIRECTLY. This code is automatically generated
  838. # by the ply/ygen.py script. Make changes to the parsedebug() method instead.
  839. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  840. def parseopt_notrack(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
  841. #--! parseopt-notrack-start
  842. lookahead = None # Current lookahead symbol
  843. lookaheadstack = [] # Stack of lookahead symbols
  844. actions = self.action # Local reference to action table (to avoid lookup on self.)
  845. goto = self.goto # Local reference to goto table (to avoid lookup on self.)
  846. prod = self.productions # Local reference to production list (to avoid lookup on self.)
  847. defaulted_states = self.defaulted_states # Local reference to defaulted states
  848. pslice = YaccProduction(None) # Production object passed to grammar rules
  849. errorcount = 0 # Used during error recovery
  850. # If no lexer was given, we will try to use the lex module
  851. if not lexer:
  852. from . import lex
  853. lexer = lex.lexer
  854. # Set up the lexer and parser objects on pslice
  855. pslice.lexer = lexer
  856. pslice.parser = self
  857. # If input was supplied, pass to lexer
  858. if input is not None:
  859. lexer.input(input)
  860. if tokenfunc is None:
  861. # Tokenize function
  862. get_token = lexer.token
  863. else:
  864. get_token = tokenfunc
  865. # Set the parser() token method (sometimes used in error recovery)
  866. self.token = get_token
  867. # Set up the state and symbol stacks
  868. statestack = [] # Stack of parsing states
  869. self.statestack = statestack
  870. symstack = [] # Stack of grammar symbols
  871. self.symstack = symstack
  872. pslice.stack = symstack # Put in the production
  873. errtoken = None # Err token
  874. # The start state is assumed to be (0,$end)
  875. statestack.append(0)
  876. sym = YaccSymbol()
  877. sym.type = '$end'
  878. symstack.append(sym)
  879. state = 0
  880. while True:
  881. # Get the next symbol on the input. If a lookahead symbol
  882. # is already set, we just use that. Otherwise, we'll pull
  883. # the next token off of the lookaheadstack or from the lexer
  884. if state not in defaulted_states:
  885. if not lookahead:
  886. if not lookaheadstack:
  887. lookahead = get_token() # Get the next token
  888. else:
  889. lookahead = lookaheadstack.pop()
  890. if not lookahead:
  891. lookahead = YaccSymbol()
  892. lookahead.type = '$end'
  893. # Check the action table
  894. ltype = lookahead.type
  895. t = actions[state].get(ltype)
  896. else:
  897. t = defaulted_states[state]
  898. if t is not None:
  899. if t > 0:
  900. # shift a symbol on the stack
  901. statestack.append(t)
  902. state = t
  903. symstack.append(lookahead)
  904. lookahead = None
  905. # Decrease error count on successful shift
  906. if errorcount:
  907. errorcount -= 1
  908. continue
  909. if t < 0:
  910. # reduce a symbol on the stack, emit a production
  911. p = prod[-t]
  912. pname = p.name
  913. plen = p.len
  914. # Get production function
  915. sym = YaccSymbol()
  916. sym.type = pname # Production name
  917. sym.value = None
  918. if plen:
  919. targ = symstack[-plen-1:]
  920. targ[0] = sym
  921. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  922. # The code enclosed in this section is duplicated
  923. # below as a performance optimization. Make sure
  924. # changes get made in both locations.
  925. pslice.slice = targ
  926. try:
  927. # Call the grammar rule with our special slice object
  928. del symstack[-plen:]
  929. self.state = state
  930. p.callable(pslice)
  931. del statestack[-plen:]
  932. symstack.append(sym)
  933. state = goto[statestack[-1]][pname]
  934. statestack.append(state)
  935. except SyntaxError:
  936. # If an error was set. Enter error recovery state
  937. lookaheadstack.append(lookahead) # Save the current lookahead token
  938. symstack.extend(targ[1:-1]) # Put the production slice back on the stack
  939. statestack.pop() # Pop back one state (before the reduce)
  940. state = statestack[-1]
  941. sym.type = 'error'
  942. sym.value = 'error'
  943. lookahead = sym
  944. errorcount = error_count
  945. self.errorok = False
  946. continue
  947. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  948. else:
  949. targ = [sym]
  950. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  951. # The code enclosed in this section is duplicated
  952. # above as a performance optimization. Make sure
  953. # changes get made in both locations.
  954. pslice.slice = targ
  955. try:
  956. # Call the grammar rule with our special slice object
  957. self.state = state
  958. p.callable(pslice)
  959. symstack.append(sym)
  960. state = goto[statestack[-1]][pname]
  961. statestack.append(state)
  962. except SyntaxError:
  963. # If an error was set. Enter error recovery state
  964. lookaheadstack.append(lookahead) # Save the current lookahead token
  965. statestack.pop() # Pop back one state (before the reduce)
  966. state = statestack[-1]
  967. sym.type = 'error'
  968. sym.value = 'error'
  969. lookahead = sym
  970. errorcount = error_count
  971. self.errorok = False
  972. continue
  973. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  974. if t == 0:
  975. n = symstack[-1]
  976. result = getattr(n, 'value', None)
  977. return result
  978. if t is None:
  979. # We have some kind of parsing error here. To handle
  980. # this, we are going to push the current token onto
  981. # the tokenstack and replace it with an 'error' token.
  982. # If there are any synchronization rules, they may
  983. # catch it.
  984. #
  985. # In addition to pushing the error token, we call call
  986. # the user defined p_error() function if this is the
  987. # first syntax error. This function is only called if
  988. # errorcount == 0.
  989. if errorcount == 0 or self.errorok:
  990. errorcount = error_count
  991. self.errorok = False
  992. errtoken = lookahead
  993. if errtoken.type == '$end':
  994. errtoken = None # End of file!
  995. if self.errorfunc:
  996. if errtoken and not hasattr(errtoken, 'lexer'):
  997. errtoken.lexer = lexer
  998. self.state = state
  999. tok = call_errorfunc(self.errorfunc, errtoken, self)
  1000. if self.errorok:
  1001. # User must have done some kind of panic
  1002. # mode recovery on their own. The
  1003. # returned token is the next lookahead
  1004. lookahead = tok
  1005. errtoken = None
  1006. continue
  1007. else:
  1008. if errtoken:
  1009. if hasattr(errtoken, 'lineno'):
  1010. lineno = lookahead.lineno
  1011. else:
  1012. lineno = 0
  1013. if lineno:
  1014. sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
  1015. else:
  1016. sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
  1017. else:
  1018. sys.stderr.write('yacc: Parse error in input. EOF\n')
  1019. return
  1020. else:
  1021. errorcount = error_count
  1022. # case 1: the statestack only has 1 entry on it. If we're in this state, the
  1023. # entire parse has been rolled back and we're completely hosed. The token is
  1024. # discarded and we just keep going.
  1025. if len(statestack) <= 1 and lookahead.type != '$end':
  1026. lookahead = None
  1027. errtoken = None
  1028. state = 0
  1029. # Nuke the pushback stack
  1030. del lookaheadstack[:]
  1031. continue
  1032. # case 2: the statestack has a couple of entries on it, but we're
  1033. # at the end of the file. nuke the top entry and generate an error token
  1034. # Start nuking entries on the stack
  1035. if lookahead.type == '$end':
  1036. # Whoa. We're really hosed here. Bail out
  1037. return
  1038. if lookahead.type != 'error':
  1039. sym = symstack[-1]
  1040. if sym.type == 'error':
  1041. # Hmmm. Error is on top of stack, we'll just nuke input
  1042. # symbol and continue
  1043. lookahead = None
  1044. continue
  1045. # Create the error symbol for the first time and make it the new lookahead symbol
  1046. t = YaccSymbol()
  1047. t.type = 'error'
  1048. if hasattr(lookahead, 'lineno'):
  1049. t.lineno = t.endlineno = lookahead.lineno
  1050. if hasattr(lookahead, 'lexpos'):
  1051. t.lexpos = t.endlexpos = lookahead.lexpos
  1052. t.value = lookahead
  1053. lookaheadstack.append(lookahead)
  1054. lookahead = t
  1055. else:
  1056. sym = symstack.pop()
  1057. statestack.pop()
  1058. state = statestack[-1]
  1059. continue
  1060. # Call an error function here
  1061. raise RuntimeError('yacc: internal parser error!!!\n')
  1062. #--! parseopt-notrack-end
  1063. # -----------------------------------------------------------------------------
  1064. # === Grammar Representation ===
  1065. #
  1066. # The following functions, classes, and variables are used to represent and
  1067. # manipulate the rules that make up a grammar.
  1068. # -----------------------------------------------------------------------------
  1069. # regex matching identifiers
  1070. _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$')
  1071. # -----------------------------------------------------------------------------
  1072. # class Production:
  1073. #
  1074. # This class stores the raw information about a single production or grammar rule.
  1075. # A grammar rule refers to a specification such as this:
  1076. #
  1077. # expr : expr PLUS term
  1078. #
  1079. # Here are the basic attributes defined on all productions
  1080. #
  1081. # name - Name of the production. For example 'expr'
  1082. # prod - A list of symbols on the right side ['expr','PLUS','term']
  1083. # prec - Production precedence level
  1084. # number - Production number.
  1085. # func - Function that executes on reduce
  1086. # file - File where production function is defined
  1087. # lineno - Line number where production function is defined
  1088. #
  1089. # The following attributes are defined or optional.
  1090. #
  1091. # len - Length of the production (number of symbols on right hand side)
  1092. # usyms - Set of unique symbols found in the production
  1093. # -----------------------------------------------------------------------------
  1094. class Production(object):
  1095. reduced = 0
  1096. def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0):
  1097. self.name = name
  1098. self.prod = tuple(prod)
  1099. self.number = number
  1100. self.func = func
  1101. self.callable = None
  1102. self.file = file
  1103. self.line = line
  1104. self.prec = precedence
  1105. # Internal settings used during table construction
  1106. self.len = len(self.prod) # Length of the production
  1107. # Create a list of unique production symbols used in the production
  1108. self.usyms = []
  1109. for s in self.prod:
  1110. if s not in self.usyms:
  1111. self.usyms.append(s)
  1112. # List of all LR items for the production
  1113. self.lr_items = []
  1114. self.lr_next = None
  1115. # Create a string representation
  1116. if self.prod:
  1117. self.str = '%s -> %s' % (self.name, ' '.join(self.prod))
  1118. else:
  1119. self.str = '%s -> <empty>' % self.name
  1120. def __str__(self):
  1121. return self.str
  1122. def __repr__(self):
  1123. return 'Production(' + str(self) + ')'
  1124. def __len__(self):
  1125. return len(self.prod)
  1126. def __nonzero__(self):
  1127. return 1
  1128. def __getitem__(self, index):
  1129. return self.prod[index]
  1130. # Return the nth lr_item from the production (or None if at the end)
  1131. def lr_item(self, n):
  1132. if n > len(self.prod):
  1133. return None
  1134. p = LRItem(self, n)
  1135. # Precompute the list of productions immediately following.
  1136. try:
  1137. p.lr_after = self.Prodnames[p.prod[n+1]]
  1138. except (IndexError, KeyError):
  1139. p.lr_after = []
  1140. try:
  1141. p.lr_before = p.prod[n-1]
  1142. except IndexError:
  1143. p.lr_before = None
  1144. return p
  1145. # Bind the production function name to a callable
  1146. def bind(self, pdict):
  1147. if self.func:
  1148. self.callable = pdict[self.func]
  1149. # This class serves as a minimal standin for Production objects when
  1150. # reading table data from files. It only contains information
  1151. # actually used by the LR parsing engine, plus some additional
  1152. # debugging information.
  1153. class MiniProduction(object):
  1154. def __init__(self, str, name, len, func, file, line):
  1155. self.name = name
  1156. self.len = len
  1157. self.func = func
  1158. self.callable = None
  1159. self.file = file
  1160. self.line = line
  1161. self.str = str
  1162. def __str__(self):
  1163. return self.str
  1164. def __repr__(self):
  1165. return 'MiniProduction(%s)' % self.str
  1166. # Bind the production function name to a callable
  1167. def bind(self, pdict):
  1168. if self.func:
  1169. self.callable = pdict[self.func]
  1170. # -----------------------------------------------------------------------------
  1171. # class LRItem
  1172. #
  1173. # This class represents a specific stage of parsing a production rule. For
  1174. # example:
  1175. #
  1176. # expr : expr . PLUS term
  1177. #
  1178. # In the above, the "." represents the current location of the parse. Here
  1179. # basic attributes:
  1180. #
  1181. # name - Name of the production. For example 'expr'
  1182. # prod - A list of symbols on the right side ['expr','.', 'PLUS','term']
  1183. # number - Production number.
  1184. #
  1185. # lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term'
  1186. # then lr_next refers to 'expr -> expr PLUS . term'
  1187. # lr_index - LR item index (location of the ".") in the prod list.
  1188. # lookaheads - LALR lookahead symbols for this item
  1189. # len - Length of the production (number of symbols on right hand side)
  1190. # lr_after - List of all productions that immediately follow
  1191. # lr_before - Grammar symbol immediately before
  1192. # -----------------------------------------------------------------------------
  1193. class LRItem(object):
  1194. def __init__(self, p, n):
  1195. self.name = p.name
  1196. self.prod = list(p.prod)
  1197. self.number = p.number
  1198. self.lr_index = n
  1199. self.lookaheads = {}
  1200. self.prod.insert(n, '.')
  1201. self.prod = tuple(self.prod)
  1202. self.len = len(self.prod)
  1203. self.usyms = p.usyms
  1204. def __str__(self):
  1205. if self.prod:
  1206. s = '%s -> %s' % (self.name, ' '.join(self.prod))
  1207. else:
  1208. s = '%s -> <empty>' % self.name
  1209. return s
  1210. def __repr__(self):
  1211. return 'LRItem(' + str(self) + ')'
  1212. # -----------------------------------------------------------------------------
  1213. # rightmost_terminal()
  1214. #
  1215. # Return the rightmost terminal from a list of symbols. Used in add_production()
  1216. # -----------------------------------------------------------------------------
  1217. def rightmost_terminal(symbols, terminals):
  1218. i = len(symbols) - 1
  1219. while i >= 0:
  1220. if symbols[i] in terminals:
  1221. return symbols[i]
  1222. i -= 1
  1223. return None
  1224. # -----------------------------------------------------------------------------
  1225. # === GRAMMAR CLASS ===
  1226. #
  1227. # The following class represents the contents of the specified grammar along
  1228. # with various computed properties such as first sets, follow sets, LR items, etc.
  1229. # This data is used for critical parts of the table generation process later.
  1230. # -----------------------------------------------------------------------------
  1231. class GrammarError(YaccError):
  1232. pass
  1233. class Grammar(object):
  1234. def __init__(self, terminals):
  1235. self.Productions = [None] # A list of all of the productions. The first
  1236. # entry is always reserved for the purpose of
  1237. # building an augmented grammar
  1238. self.Prodnames = {} # A dictionary mapping the names of nonterminals to a list of all
  1239. # productions of that nonterminal.
  1240. self.Prodmap = {} # A dictionary that is only used to detect duplicate
  1241. # productions.
  1242. self.Terminals = {} # A dictionary mapping the names of terminal symbols to a
  1243. # list of the rules where they are used.
  1244. for term in terminals:
  1245. self.Terminals[term] = []
  1246. self.Terminals['error'] = []
  1247. self.Nonterminals = {} # A dictionary mapping names of nonterminals to a list
  1248. # of rule numbers where they are used.
  1249. self.First = {} # A dictionary of precomputed FIRST(x) symbols
  1250. self.Follow = {} # A dictionary of precomputed FOLLOW(x) symbols
  1251. self.Precedence = {} # Precedence rules for each terminal. Contains tuples of the
  1252. # form ('right',level) or ('nonassoc', level) or ('left',level)
  1253. self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer.
  1254. # This is only used to provide error checking and to generate
  1255. # a warning about unused precedence rules.
  1256. self.Start = None # Starting symbol for the grammar
  1257. def __len__(self):
  1258. return len(self.Productions)
  1259. def __getitem__(self, index):
  1260. return self.Productions[index]
  1261. # -----------------------------------------------------------------------------
  1262. # set_precedence()
  1263. #
  1264. # Sets the precedence for a given terminal. assoc is the associativity such as
  1265. # 'left','right', or 'nonassoc'. level is a numeric level.
  1266. #
  1267. # -----------------------------------------------------------------------------
  1268. def set_precedence(self, term, assoc, level):
  1269. assert self.Productions == [None], 'Must call set_precedence() before add_production()'
  1270. if term in self.Precedence:
  1271. raise GrammarError('Precedence already specified for terminal %r' % term)
  1272. if assoc not in ['left', 'right', 'nonassoc']:
  1273. raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'")
  1274. self.Precedence[term] = (assoc, level)
  1275. # -----------------------------------------------------------------------------
  1276. # add_production()
  1277. #
  1278. # Given an action function, this function assembles a production rule and
  1279. # computes its precedence level.
  1280. #
  1281. # The production rule is supplied as a list of symbols. For example,
  1282. # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and
  1283. # symbols ['expr','PLUS','term'].
  1284. #
  1285. # Precedence is determined by the precedence of the right-most non-terminal
  1286. # or the precedence of a terminal specified by %prec.
  1287. #
  1288. # A variety of error checks are performed to make sure production symbols
  1289. # are valid and that %prec is used correctly.
  1290. # -----------------------------------------------------------------------------
  1291. def add_production(self, prodname, syms, func=None, file='', line=0):
  1292. if prodname in self.Terminals:
  1293. raise GrammarError('%s:%d: Illegal rule name %r. Already defined as a token' % (file, line, prodname))
  1294. if prodname == 'error':
  1295. raise GrammarError('%s:%d: Illegal rule name %r. error is a reserved word' % (file, line, prodname))
  1296. if not _is_identifier.match(prodname):
  1297. raise GrammarError('%s:%d: Illegal rule name %r' % (file, line, prodname))
  1298. # Look for literal tokens
  1299. for n, s in enumerate(syms):
  1300. if s[0] in "'\"":
  1301. try:
  1302. c = eval(s)
  1303. if (len(c) > 1):
  1304. raise GrammarError('%s:%d: Literal token %s in rule %r may only be a single character' %
  1305. (file, line, s, prodname))
  1306. if c not in self.Terminals:
  1307. self.Terminals[c] = []
  1308. syms[n] = c
  1309. continue
  1310. except SyntaxError:
  1311. pass
  1312. if not _is_identifier.match(s) and s != '%prec':
  1313. raise GrammarError('%s:%d: Illegal name %r in rule %r' % (file, line, s, prodname))
  1314. # Determine the precedence level
  1315. if '%prec' in syms:
  1316. if syms[-1] == '%prec':
  1317. raise GrammarError('%s:%d: Syntax error. Nothing follows %%prec' % (file, line))
  1318. if syms[-2] != '%prec':
  1319. raise GrammarError('%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule' %
  1320. (file, line))
  1321. precname = syms[-1]
  1322. prodprec = self.Precedence.get(precname)
  1323. if not prodprec:
  1324. raise GrammarError('%s:%d: Nothing known about the precedence of %r' % (file, line, precname))
  1325. else:
  1326. self.UsedPrecedence.add(precname)
  1327. del syms[-2:] # Drop %prec from the rule
  1328. else:
  1329. # If no %prec, precedence is determined by the rightmost terminal symbol
  1330. precname = rightmost_terminal(syms, self.Terminals)
  1331. prodprec = self.Precedence.get(precname, ('right', 0))
  1332. # See if the rule is already in the rulemap
  1333. map = '%s -> %s' % (prodname, syms)
  1334. if map in self.Prodmap:
  1335. m = self.Prodmap[map]
  1336. raise GrammarError('%s:%d: Duplicate rule %s. ' % (file, line, m) +
  1337. 'Previous definition at %s:%d' % (m.file, m.line))
  1338. # From this point on, everything is valid. Create a new Production instance
  1339. pnumber = len(self.Productions)
  1340. if prodname not in self.Nonterminals:
  1341. self.Nonterminals[prodname] = []
  1342. # Add the production number to Terminals and Nonterminals
  1343. for t in syms:
  1344. if t in self.Terminals:
  1345. self.Terminals[t].append(pnumber)
  1346. else:
  1347. if t not in self.Nonterminals:
  1348. self.Nonterminals[t] = []
  1349. self.Nonterminals[t].append(pnumber)
  1350. # Create a production and add it to the list of productions
  1351. p = Production(pnumber, prodname, syms, prodprec, func, file, line)
  1352. self.Productions.append(p)
  1353. self.Prodmap[map] = p
  1354. # Add to the global productions list
  1355. try:
  1356. self.Prodnames[prodname].append(p)
  1357. except KeyError:
  1358. self.Prodnames[prodname] = [p]
  1359. # -----------------------------------------------------------------------------
  1360. # set_start()
  1361. #
  1362. # Sets the starting symbol and creates the augmented grammar. Production
  1363. # rule 0 is S' -> start where start is the start symbol.
  1364. # -----------------------------------------------------------------------------
  1365. def set_start(self, start=None):
  1366. if not start:
  1367. start = self.Productions[1].name
  1368. if start not in self.Nonterminals:
  1369. raise GrammarError('start symbol %s undefined' % start)
  1370. self.Productions[0] = Production(0, "S'", [start])
  1371. self.Nonterminals[start].append(0)
  1372. self.Start = start
  1373. # -----------------------------------------------------------------------------
  1374. # find_unreachable()
  1375. #
  1376. # Find all of the nonterminal symbols that can't be reached from the starting
  1377. # symbol. Returns a list of nonterminals that can't be reached.
  1378. # -----------------------------------------------------------------------------
  1379. def find_unreachable(self):
  1380. # Mark all symbols that are reachable from a symbol s
  1381. def mark_reachable_from(s):
  1382. if s in reachable:
  1383. return
  1384. reachable.add(s)
  1385. for p in self.Prodnames.get(s, []):
  1386. for r in p.prod:
  1387. mark_reachable_from(r)
  1388. reachable = set()
  1389. mark_reachable_from(self.Productions[0].prod[0])
  1390. return [s for s in self.Nonterminals if s not in reachable]
  1391. # -----------------------------------------------------------------------------
  1392. # infinite_cycles()
  1393. #
  1394. # This function looks at the various parsing rules and tries to detect
  1395. # infinite recursion cycles (grammar rules where there is no possible way
  1396. # to derive a string of only terminals).
  1397. # -----------------------------------------------------------------------------
  1398. def infinite_cycles(self):
  1399. terminates = {}
  1400. # Terminals:
  1401. for t in self.Terminals:
  1402. terminates[t] = True
  1403. terminates['$end'] = True
  1404. # Nonterminals:
  1405. # Initialize to false:
  1406. for n in self.Nonterminals:
  1407. terminates[n] = False
  1408. # Then propagate termination until no change:
  1409. while True:
  1410. some_change = False
  1411. for (n, pl) in self.Prodnames.items():
  1412. # Nonterminal n terminates iff any of its productions terminates.
  1413. for p in pl:
  1414. # Production p terminates iff all of its rhs symbols terminate.
  1415. for s in p.prod:
  1416. if not terminates[s]:
  1417. # The symbol s does not terminate,
  1418. # so production p does not terminate.
  1419. p_terminates = False
  1420. break
  1421. else:
  1422. # didn't break from the loop,
  1423. # so every symbol s terminates
  1424. # so production p terminates.
  1425. p_terminates = True
  1426. if p_terminates:
  1427. # symbol n terminates!
  1428. if not terminates[n]:
  1429. terminates[n] = True
  1430. some_change = True
  1431. # Don't need to consider any more productions for this n.
  1432. break
  1433. if not some_change:
  1434. break
  1435. infinite = []
  1436. for (s, term) in terminates.items():
  1437. if not term:
  1438. if s not in self.Prodnames and s not in self.Terminals and s != 'error':
  1439. # s is used-but-not-defined, and we've already warned of that,
  1440. # so it would be overkill to say that it's also non-terminating.
  1441. pass
  1442. else:
  1443. infinite.append(s)
  1444. return infinite
  1445. # -----------------------------------------------------------------------------
  1446. # undefined_symbols()
  1447. #
  1448. # Find all symbols that were used the grammar, but not defined as tokens or
  1449. # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol
  1450. # and prod is the production where the symbol was used.
  1451. # -----------------------------------------------------------------------------
  1452. def undefined_symbols(self):
  1453. result = []
  1454. for p in self.Productions:
  1455. if not p:
  1456. continue
  1457. for s in p.prod:
  1458. if s not in self.Prodnames and s not in self.Terminals and s != 'error':
  1459. result.append((s, p))
  1460. return result
  1461. # -----------------------------------------------------------------------------
  1462. # unused_terminals()
  1463. #
  1464. # Find all terminals that were defined, but not used by the grammar. Returns
  1465. # a list of all symbols.
  1466. # -----------------------------------------------------------------------------
  1467. def unused_terminals(self):
  1468. unused_tok = []
  1469. for s, v in self.Terminals.items():
  1470. if s != 'error' and not v:
  1471. unused_tok.append(s)
  1472. return unused_tok
  1473. # ------------------------------------------------------------------------------
  1474. # unused_rules()
  1475. #
  1476. # Find all grammar rules that were defined, but not used (maybe not reachable)
  1477. # Returns a list of productions.
  1478. # ------------------------------------------------------------------------------
  1479. def unused_rules(self):
  1480. unused_prod = []
  1481. for s, v in self.Nonterminals.items():
  1482. if not v:
  1483. p = self.Prodnames[s][0]
  1484. unused_prod.append(p)
  1485. return unused_prod
  1486. # -----------------------------------------------------------------------------
  1487. # unused_precedence()
  1488. #
  1489. # Returns a list of tuples (term,precedence) corresponding to precedence
  1490. # rules that were never used by the grammar. term is the name of the terminal
  1491. # on which precedence was applied and precedence is a string such as 'left' or
  1492. # 'right' corresponding to the type of precedence.
  1493. # -----------------------------------------------------------------------------
  1494. def unused_precedence(self):
  1495. unused = []
  1496. for termname in self.Precedence:
  1497. if not (termname in self.Terminals or termname in self.UsedPrecedence):
  1498. unused.append((termname, self.Precedence[termname][0]))
  1499. return unused
  1500. # -------------------------------------------------------------------------
  1501. # _first()
  1502. #
  1503. # Compute the value of FIRST1(beta) where beta is a tuple of symbols.
  1504. #
  1505. # During execution of compute_first1, the result may be incomplete.
  1506. # Afterward (e.g., when called from compute_follow()), it will be complete.
  1507. # -------------------------------------------------------------------------
  1508. def _first(self, beta):
  1509. # We are computing First(x1,x2,x3,...,xn)
  1510. result = []
  1511. for x in beta:
  1512. x_produces_empty = False
  1513. # Add all the non-<empty> symbols of First[x] to the result.
  1514. for f in self.First[x]:
  1515. if f == '<empty>':
  1516. x_produces_empty = True
  1517. else:
  1518. if f not in result:
  1519. result.append(f)
  1520. if x_produces_empty:
  1521. # We have to consider the next x in beta,
  1522. # i.e. stay in the loop.
  1523. pass
  1524. else:
  1525. # We don't have to consider any further symbols in beta.
  1526. break
  1527. else:
  1528. # There was no 'break' from the loop,
  1529. # so x_produces_empty was true for all x in beta,
  1530. # so beta produces empty as well.
  1531. result.append('<empty>')
  1532. return result
  1533. # -------------------------------------------------------------------------
  1534. # compute_first()
  1535. #
  1536. # Compute the value of FIRST1(X) for all symbols
  1537. # -------------------------------------------------------------------------
  1538. def compute_first(self):
  1539. if self.First:
  1540. return self.First
  1541. # Terminals:
  1542. for t in self.Terminals:
  1543. self.First[t] = [t]
  1544. self.First['$end'] = ['$end']
  1545. # Nonterminals:
  1546. # Initialize to the empty set:
  1547. for n in self.Nonterminals:
  1548. self.First[n] = []
  1549. # Then propagate symbols until no change:
  1550. while True:
  1551. some_change = False
  1552. for n in self.Nonterminals:
  1553. for p in self.Prodnames[n]:
  1554. for f in self._first(p.prod):
  1555. if f not in self.First[n]:
  1556. self.First[n].append(f)
  1557. some_change = True
  1558. if not some_change:
  1559. break
  1560. return self.First
  1561. # ---------------------------------------------------------------------
  1562. # compute_follow()
  1563. #
  1564. # Computes all of the follow sets for every non-terminal symbol. The
  1565. # follow set is the set of all symbols that might follow a given
  1566. # non-terminal. See the Dragon book, 2nd Ed. p. 189.
  1567. # ---------------------------------------------------------------------
  1568. def compute_follow(self, start=None):
  1569. # If already computed, return the result
  1570. if self.Follow:
  1571. return self.Follow
  1572. # If first sets not computed yet, do that first.
  1573. if not self.First:
  1574. self.compute_first()
  1575. # Add '$end' to the follow list of the start symbol
  1576. for k in self.Nonterminals:
  1577. self.Follow[k] = []
  1578. if not start:
  1579. start = self.Productions[1].name
  1580. self.Follow[start] = ['$end']
  1581. while True:
  1582. didadd = False
  1583. for p in self.Productions[1:]:
  1584. # Here is the production set
  1585. for i, B in enumerate(p.prod):
  1586. if B in self.Nonterminals:
  1587. # Okay. We got a non-terminal in a production
  1588. fst = self._first(p.prod[i+1:])
  1589. hasempty = False
  1590. for f in fst:
  1591. if f != '<empty>' and f not in self.Follow[B]:
  1592. self.Follow[B].append(f)
  1593. didadd = True
  1594. if f == '<empty>':
  1595. hasempty = True
  1596. if hasempty or i == (len(p.prod)-1):
  1597. # Add elements of follow(a) to follow(b)
  1598. for f in self.Follow[p.name]:
  1599. if f not in self.Follow[B]:
  1600. self.Follow[B].append(f)
  1601. didadd = True
  1602. if not didadd:
  1603. break
  1604. return self.Follow
  1605. # -----------------------------------------------------------------------------
  1606. # build_lritems()
  1607. #
  1608. # This function walks the list of productions and builds a complete set of the
  1609. # LR items. The LR items are stored in two ways: First, they are uniquely
  1610. # numbered and placed in the list _lritems. Second, a linked list of LR items
  1611. # is built for each production. For example:
  1612. #
  1613. # E -> E PLUS E
  1614. #
  1615. # Creates the list
  1616. #
  1617. # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
  1618. # -----------------------------------------------------------------------------
  1619. def build_lritems(self):
  1620. for p in self.Productions:
  1621. lastlri = p
  1622. i = 0
  1623. lr_items = []
  1624. while True:
  1625. if i > len(p):
  1626. lri = None
  1627. else:
  1628. lri = LRItem(p, i)
  1629. # Precompute the list of productions immediately following
  1630. try:
  1631. lri.lr_after = self.Prodnames[lri.prod[i+1]]
  1632. except (IndexError, KeyError):
  1633. lri.lr_after = []
  1634. try:
  1635. lri.lr_before = lri.prod[i-1]
  1636. except IndexError:
  1637. lri.lr_before = None
  1638. lastlri.lr_next = lri
  1639. if not lri:
  1640. break
  1641. lr_items.append(lri)
  1642. lastlri = lri
  1643. i += 1
  1644. p.lr_items = lr_items
  1645. # -----------------------------------------------------------------------------
  1646. # == Class LRTable ==
  1647. #
  1648. # This basic class represents a basic table of LR parsing information.
  1649. # Methods for generating the tables are not defined here. They are defined
  1650. # in the derived class LRGeneratedTable.
  1651. # -----------------------------------------------------------------------------
  1652. class VersionError(YaccError):
  1653. pass
  1654. class LRTable(object):
  1655. def __init__(self):
  1656. self.lr_action = None
  1657. self.lr_goto = None
  1658. self.lr_productions = None
  1659. self.lr_method = None
  1660. def read_table(self, module):
  1661. if isinstance(module, types.ModuleType):
  1662. parsetab = module
  1663. else:
  1664. exec('import %s' % module)
  1665. parsetab = sys.modules[module]
  1666. if parsetab._tabversion != __tabversion__:
  1667. raise VersionError('yacc table file version is out of date')
  1668. self.lr_action = parsetab._lr_action
  1669. self.lr_goto = parsetab._lr_goto
  1670. self.lr_productions = []
  1671. for p in parsetab._lr_productions:
  1672. self.lr_productions.append(MiniProduction(*p))
  1673. self.lr_method = parsetab._lr_method
  1674. return parsetab._lr_signature
  1675. def read_pickle(self, filename):
  1676. try:
  1677. import cPickle as pickle
  1678. except ImportError:
  1679. import pickle
  1680. if not os.path.exists(filename):
  1681. raise ImportError
  1682. in_f = open(filename, 'rb')
  1683. tabversion = pickle.load(in_f)
  1684. if tabversion != __tabversion__:
  1685. raise VersionError('yacc table file version is out of date')
  1686. self.lr_method = pickle.load(in_f)
  1687. signature = pickle.load(in_f)
  1688. self.lr_action = pickle.load(in_f)
  1689. self.lr_goto = pickle.load(in_f)
  1690. productions = pickle.load(in_f)
  1691. self.lr_productions = []
  1692. for p in productions:
  1693. self.lr_productions.append(MiniProduction(*p))
  1694. in_f.close()
  1695. return signature
  1696. # Bind all production function names to callable objects in pdict
  1697. def bind_callables(self, pdict):
  1698. for p in self.lr_productions:
  1699. p.bind(pdict)
  1700. # -----------------------------------------------------------------------------
  1701. # === LR Generator ===
  1702. #
  1703. # The following classes and functions are used to generate LR parsing tables on
  1704. # a grammar.
  1705. # -----------------------------------------------------------------------------
  1706. # -----------------------------------------------------------------------------
  1707. # digraph()
  1708. # traverse()
  1709. #
  1710. # The following two functions are used to compute set valued functions
  1711. # of the form:
  1712. #
  1713. # F(x) = F'(x) U U{F(y) | x R y}
  1714. #
  1715. # This is used to compute the values of Read() sets as well as FOLLOW sets
  1716. # in LALR(1) generation.
  1717. #
  1718. # Inputs: X - An input set
  1719. # R - A relation
  1720. # FP - Set-valued function
  1721. # ------------------------------------------------------------------------------
  1722. def digraph(X, R, FP):
  1723. N = {}
  1724. for x in X:
  1725. N[x] = 0
  1726. stack = []
  1727. F = {}
  1728. for x in X:
  1729. if N[x] == 0:
  1730. traverse(x, N, stack, F, X, R, FP)
  1731. return F
  1732. def traverse(x, N, stack, F, X, R, FP):
  1733. stack.append(x)
  1734. d = len(stack)
  1735. N[x] = d
  1736. F[x] = FP(x) # F(X) <- F'(x)
  1737. rel = R(x) # Get y's related to x
  1738. for y in rel:
  1739. if N[y] == 0:
  1740. traverse(y, N, stack, F, X, R, FP)
  1741. N[x] = min(N[x], N[y])
  1742. for a in F.get(y, []):
  1743. if a not in F[x]:
  1744. F[x].append(a)
  1745. if N[x] == d:
  1746. N[stack[-1]] = MAXINT
  1747. F[stack[-1]] = F[x]
  1748. element = stack.pop()
  1749. while element != x:
  1750. N[stack[-1]] = MAXINT
  1751. F[stack[-1]] = F[x]
  1752. element = stack.pop()
  1753. class LALRError(YaccError):
  1754. pass
  1755. # -----------------------------------------------------------------------------
  1756. # == LRGeneratedTable ==
  1757. #
  1758. # This class implements the LR table generation algorithm. There are no
  1759. # public methods except for write()
  1760. # -----------------------------------------------------------------------------
  1761. class LRGeneratedTable(LRTable):
  1762. def __init__(self, grammar, method='LALR', log=None):
  1763. if method not in ['SLR', 'LALR']:
  1764. raise LALRError('Unsupported method %s' % method)
  1765. self.grammar = grammar
  1766. self.lr_method = method
  1767. # Set up the logger
  1768. if not log:
  1769. log = NullLogger()
  1770. self.log = log
  1771. # Internal attributes
  1772. self.lr_action = {} # Action table
  1773. self.lr_goto = {} # Goto table
  1774. self.lr_productions = grammar.Productions # Copy of grammar Production array
  1775. self.lr_goto_cache = {} # Cache of computed gotos
  1776. self.lr0_cidhash = {} # Cache of closures
  1777. self._add_count = 0 # Internal counter used to detect cycles
  1778. # Diagonistic information filled in by the table generator
  1779. self.sr_conflict = 0
  1780. self.rr_conflict = 0
  1781. self.conflicts = [] # List of conflicts
  1782. self.sr_conflicts = []
  1783. self.rr_conflicts = []
  1784. # Build the tables
  1785. self.grammar.build_lritems()
  1786. self.grammar.compute_first()
  1787. self.grammar.compute_follow()
  1788. self.lr_parse_table()
  1789. # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
  1790. def lr0_closure(self, I):
  1791. self._add_count += 1
  1792. # Add everything in I to J
  1793. J = I[:]
  1794. didadd = True
  1795. while didadd:
  1796. didadd = False
  1797. for j in J:
  1798. for x in j.lr_after:
  1799. if getattr(x, 'lr0_added', 0) == self._add_count:
  1800. continue
  1801. # Add B --> .G to J
  1802. J.append(x.lr_next)
  1803. x.lr0_added = self._add_count
  1804. didadd = True
  1805. return J
  1806. # Compute the LR(0) goto function goto(I,X) where I is a set
  1807. # of LR(0) items and X is a grammar symbol. This function is written
  1808. # in a way that guarantees uniqueness of the generated goto sets
  1809. # (i.e. the same goto set will never be returned as two different Python
  1810. # objects). With uniqueness, we can later do fast set comparisons using
  1811. # id(obj) instead of element-wise comparison.
  1812. def lr0_goto(self, I, x):
  1813. # First we look for a previously cached entry
  1814. g = self.lr_goto_cache.get((id(I), x))
  1815. if g:
  1816. return g
  1817. # Now we generate the goto set in a way that guarantees uniqueness
  1818. # of the result
  1819. s = self.lr_goto_cache.get(x)
  1820. if not s:
  1821. s = {}
  1822. self.lr_goto_cache[x] = s
  1823. gs = []
  1824. for p in I:
  1825. n = p.lr_next
  1826. if n and n.lr_before == x:
  1827. s1 = s.get(id(n))
  1828. if not s1:
  1829. s1 = {}
  1830. s[id(n)] = s1
  1831. gs.append(n)
  1832. s = s1
  1833. g = s.get('$end')
  1834. if not g:
  1835. if gs:
  1836. g = self.lr0_closure(gs)
  1837. s['$end'] = g
  1838. else:
  1839. s['$end'] = gs
  1840. self.lr_goto_cache[(id(I), x)] = g
  1841. return g
  1842. # Compute the LR(0) sets of item function
  1843. def lr0_items(self):
  1844. C = [self.lr0_closure([self.grammar.Productions[0].lr_next])]
  1845. i = 0
  1846. for I in C:
  1847. self.lr0_cidhash[id(I)] = i
  1848. i += 1
  1849. # Loop over the items in C and each grammar symbols
  1850. i = 0
  1851. while i < len(C):
  1852. I = C[i]
  1853. i += 1
  1854. # Collect all of the symbols that could possibly be in the goto(I,X) sets
  1855. asyms = {}
  1856. for ii in I:
  1857. for s in ii.usyms:
  1858. asyms[s] = None
  1859. for x in asyms:
  1860. g = self.lr0_goto(I, x)
  1861. if not g or id(g) in self.lr0_cidhash:
  1862. continue
  1863. self.lr0_cidhash[id(g)] = len(C)
  1864. C.append(g)
  1865. return C
  1866. # -----------------------------------------------------------------------------
  1867. # ==== LALR(1) Parsing ====
  1868. #
  1869. # LALR(1) parsing is almost exactly the same as SLR except that instead of
  1870. # relying upon Follow() sets when performing reductions, a more selective
  1871. # lookahead set that incorporates the state of the LR(0) machine is utilized.
  1872. # Thus, we mainly just have to focus on calculating the lookahead sets.
  1873. #
  1874. # The method used here is due to DeRemer and Pennelo (1982).
  1875. #
  1876. # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)
  1877. # Lookahead Sets", ACM Transactions on Programming Languages and Systems,
  1878. # Vol. 4, No. 4, Oct. 1982, pp. 615-649
  1879. #
  1880. # Further details can also be found in:
  1881. #
  1882. # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing",
  1883. # McGraw-Hill Book Company, (1985).
  1884. #
  1885. # -----------------------------------------------------------------------------
  1886. # -----------------------------------------------------------------------------
  1887. # compute_nullable_nonterminals()
  1888. #
  1889. # Creates a dictionary containing all of the non-terminals that might produce
  1890. # an empty production.
  1891. # -----------------------------------------------------------------------------
  1892. def compute_nullable_nonterminals(self):
  1893. nullable = set()
  1894. num_nullable = 0
  1895. while True:
  1896. for p in self.grammar.Productions[1:]:
  1897. if p.len == 0:
  1898. nullable.add(p.name)
  1899. continue
  1900. for t in p.prod:
  1901. if t not in nullable:
  1902. break
  1903. else:
  1904. nullable.add(p.name)
  1905. if len(nullable) == num_nullable:
  1906. break
  1907. num_nullable = len(nullable)
  1908. return nullable
  1909. # -----------------------------------------------------------------------------
  1910. # find_nonterminal_trans(C)
  1911. #
  1912. # Given a set of LR(0) items, this functions finds all of the non-terminal
  1913. # transitions. These are transitions in which a dot appears immediately before
  1914. # a non-terminal. Returns a list of tuples of the form (state,N) where state
  1915. # is the state number and N is the nonterminal symbol.
  1916. #
  1917. # The input C is the set of LR(0) items.
  1918. # -----------------------------------------------------------------------------
  1919. def find_nonterminal_transitions(self, C):
  1920. trans = []
  1921. for stateno, state in enumerate(C):
  1922. for p in state:
  1923. if p.lr_index < p.len - 1:
  1924. t = (stateno, p.prod[p.lr_index+1])
  1925. if t[1] in self.grammar.Nonterminals:
  1926. if t not in trans:
  1927. trans.append(t)
  1928. return trans
  1929. # -----------------------------------------------------------------------------
  1930. # dr_relation()
  1931. #
  1932. # Computes the DR(p,A) relationships for non-terminal transitions. The input
  1933. # is a tuple (state,N) where state is a number and N is a nonterminal symbol.
  1934. #
  1935. # Returns a list of terminals.
  1936. # -----------------------------------------------------------------------------
  1937. def dr_relation(self, C, trans, nullable):
  1938. state, N = trans
  1939. terms = []
  1940. g = self.lr0_goto(C[state], N)
  1941. for p in g:
  1942. if p.lr_index < p.len - 1:
  1943. a = p.prod[p.lr_index+1]
  1944. if a in self.grammar.Terminals:
  1945. if a not in terms:
  1946. terms.append(a)
  1947. # This extra bit is to handle the start state
  1948. if state == 0 and N == self.grammar.Productions[0].prod[0]:
  1949. terms.append('$end')
  1950. return terms
  1951. # -----------------------------------------------------------------------------
  1952. # reads_relation()
  1953. #
  1954. # Computes the READS() relation (p,A) READS (t,C).
  1955. # -----------------------------------------------------------------------------
  1956. def reads_relation(self, C, trans, empty):
  1957. # Look for empty transitions
  1958. rel = []
  1959. state, N = trans
  1960. g = self.lr0_goto(C[state], N)
  1961. j = self.lr0_cidhash.get(id(g), -1)
  1962. for p in g:
  1963. if p.lr_index < p.len - 1:
  1964. a = p.prod[p.lr_index + 1]
  1965. if a in empty:
  1966. rel.append((j, a))
  1967. return rel
  1968. # -----------------------------------------------------------------------------
  1969. # compute_lookback_includes()
  1970. #
  1971. # Determines the lookback and includes relations
  1972. #
  1973. # LOOKBACK:
  1974. #
  1975. # This relation is determined by running the LR(0) state machine forward.
  1976. # For example, starting with a production "N : . A B C", we run it forward
  1977. # to obtain "N : A B C ." We then build a relationship between this final
  1978. # state and the starting state. These relationships are stored in a dictionary
  1979. # lookdict.
  1980. #
  1981. # INCLUDES:
  1982. #
  1983. # Computes the INCLUDE() relation (p,A) INCLUDES (p',B).
  1984. #
  1985. # This relation is used to determine non-terminal transitions that occur
  1986. # inside of other non-terminal transition states. (p,A) INCLUDES (p', B)
  1987. # if the following holds:
  1988. #
  1989. # B -> LAT, where T -> epsilon and p' -L-> p
  1990. #
  1991. # L is essentially a prefix (which may be empty), T is a suffix that must be
  1992. # able to derive an empty string. State p' must lead to state p with the string L.
  1993. #
  1994. # -----------------------------------------------------------------------------
  1995. def compute_lookback_includes(self, C, trans, nullable):
  1996. lookdict = {} # Dictionary of lookback relations
  1997. includedict = {} # Dictionary of include relations
  1998. # Make a dictionary of non-terminal transitions
  1999. dtrans = {}
  2000. for t in trans:
  2001. dtrans[t] = 1
  2002. # Loop over all transitions and compute lookbacks and includes
  2003. for state, N in trans:
  2004. lookb = []
  2005. includes = []
  2006. for p in C[state]:
  2007. if p.name != N:
  2008. continue
  2009. # Okay, we have a name match. We now follow the production all the way
  2010. # through the state machine until we get the . on the right hand side
  2011. lr_index = p.lr_index
  2012. j = state
  2013. while lr_index < p.len - 1:
  2014. lr_index = lr_index + 1
  2015. t = p.prod[lr_index]
  2016. # Check to see if this symbol and state are a non-terminal transition
  2017. if (j, t) in dtrans:
  2018. # Yes. Okay, there is some chance that this is an includes relation
  2019. # the only way to know for certain is whether the rest of the
  2020. # production derives empty
  2021. li = lr_index + 1
  2022. while li < p.len:
  2023. if p.prod[li] in self.grammar.Terminals:
  2024. break # No forget it
  2025. if p.prod[li] not in nullable:
  2026. break
  2027. li = li + 1
  2028. else:
  2029. # Appears to be a relation between (j,t) and (state,N)
  2030. includes.append((j, t))
  2031. g = self.lr0_goto(C[j], t) # Go to next set
  2032. j = self.lr0_cidhash.get(id(g), -1) # Go to next state
  2033. # When we get here, j is the final state, now we have to locate the production
  2034. for r in C[j]:
  2035. if r.name != p.name:
  2036. continue
  2037. if r.len != p.len:
  2038. continue
  2039. i = 0
  2040. # This look is comparing a production ". A B C" with "A B C ."
  2041. while i < r.lr_index:
  2042. if r.prod[i] != p.prod[i+1]:
  2043. break
  2044. i = i + 1
  2045. else:
  2046. lookb.append((j, r))
  2047. for i in includes:
  2048. if i not in includedict:
  2049. includedict[i] = []
  2050. includedict[i].append((state, N))
  2051. lookdict[(state, N)] = lookb
  2052. return lookdict, includedict
  2053. # -----------------------------------------------------------------------------
  2054. # compute_read_sets()
  2055. #
  2056. # Given a set of LR(0) items, this function computes the read sets.
  2057. #
  2058. # Inputs: C = Set of LR(0) items
  2059. # ntrans = Set of nonterminal transitions
  2060. # nullable = Set of empty transitions
  2061. #
  2062. # Returns a set containing the read sets
  2063. # -----------------------------------------------------------------------------
  2064. def compute_read_sets(self, C, ntrans, nullable):
  2065. FP = lambda x: self.dr_relation(C, x, nullable)
  2066. R = lambda x: self.reads_relation(C, x, nullable)
  2067. F = digraph(ntrans, R, FP)
  2068. return F
  2069. # -----------------------------------------------------------------------------
  2070. # compute_follow_sets()
  2071. #
  2072. # Given a set of LR(0) items, a set of non-terminal transitions, a readset,
  2073. # and an include set, this function computes the follow sets
  2074. #
  2075. # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)}
  2076. #
  2077. # Inputs:
  2078. # ntrans = Set of nonterminal transitions
  2079. # readsets = Readset (previously computed)
  2080. # inclsets = Include sets (previously computed)
  2081. #
  2082. # Returns a set containing the follow sets
  2083. # -----------------------------------------------------------------------------
  2084. def compute_follow_sets(self, ntrans, readsets, inclsets):
  2085. FP = lambda x: readsets[x]
  2086. R = lambda x: inclsets.get(x, [])
  2087. F = digraph(ntrans, R, FP)
  2088. return F
  2089. # -----------------------------------------------------------------------------
  2090. # add_lookaheads()
  2091. #
  2092. # Attaches the lookahead symbols to grammar rules.
  2093. #
  2094. # Inputs: lookbacks - Set of lookback relations
  2095. # followset - Computed follow set
  2096. #
  2097. # This function directly attaches the lookaheads to productions contained
  2098. # in the lookbacks set
  2099. # -----------------------------------------------------------------------------
  2100. def add_lookaheads(self, lookbacks, followset):
  2101. for trans, lb in lookbacks.items():
  2102. # Loop over productions in lookback
  2103. for state, p in lb:
  2104. if state not in p.lookaheads:
  2105. p.lookaheads[state] = []
  2106. f = followset.get(trans, [])
  2107. for a in f:
  2108. if a not in p.lookaheads[state]:
  2109. p.lookaheads[state].append(a)
  2110. # -----------------------------------------------------------------------------
  2111. # add_lalr_lookaheads()
  2112. #
  2113. # This function does all of the work of adding lookahead information for use
  2114. # with LALR parsing
  2115. # -----------------------------------------------------------------------------
  2116. def add_lalr_lookaheads(self, C):
  2117. # Determine all of the nullable nonterminals
  2118. nullable = self.compute_nullable_nonterminals()
  2119. # Find all non-terminal transitions
  2120. trans = self.find_nonterminal_transitions(C)
  2121. # Compute read sets
  2122. readsets = self.compute_read_sets(C, trans, nullable)
  2123. # Compute lookback/includes relations
  2124. lookd, included = self.compute_lookback_includes(C, trans, nullable)
  2125. # Compute LALR FOLLOW sets
  2126. followsets = self.compute_follow_sets(trans, readsets, included)
  2127. # Add all of the lookaheads
  2128. self.add_lookaheads(lookd, followsets)
  2129. # -----------------------------------------------------------------------------
  2130. # lr_parse_table()
  2131. #
  2132. # This function constructs the parse tables for SLR or LALR
  2133. # -----------------------------------------------------------------------------
  2134. def lr_parse_table(self):
  2135. Productions = self.grammar.Productions
  2136. Precedence = self.grammar.Precedence
  2137. goto = self.lr_goto # Goto array
  2138. action = self.lr_action # Action array
  2139. log = self.log # Logger for output
  2140. actionp = {} # Action production array (temporary)
  2141. log.info('Parsing method: %s', self.lr_method)
  2142. # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
  2143. # This determines the number of states
  2144. C = self.lr0_items()
  2145. if self.lr_method == 'LALR':
  2146. self.add_lalr_lookaheads(C)
  2147. # Build the parser table, state by state
  2148. st = 0
  2149. for I in C:
  2150. # Loop over each production in I
  2151. actlist = [] # List of actions
  2152. st_action = {}
  2153. st_actionp = {}
  2154. st_goto = {}
  2155. log.info('')
  2156. log.info('state %d', st)
  2157. log.info('')
  2158. for p in I:
  2159. log.info(' (%d) %s', p.number, p)
  2160. log.info('')
  2161. for p in I:
  2162. if p.len == p.lr_index + 1:
  2163. if p.name == "S'":
  2164. # Start symbol. Accept!
  2165. st_action['$end'] = 0
  2166. st_actionp['$end'] = p
  2167. else:
  2168. # We are at the end of a production. Reduce!
  2169. if self.lr_method == 'LALR':
  2170. laheads = p.lookaheads[st]
  2171. else:
  2172. laheads = self.grammar.Follow[p.name]
  2173. for a in laheads:
  2174. actlist.append((a, p, 'reduce using rule %d (%s)' % (p.number, p)))
  2175. r = st_action.get(a)
  2176. if r is not None:
  2177. # Whoa. Have a shift/reduce or reduce/reduce conflict
  2178. if r > 0:
  2179. # Need to decide on shift or reduce here
  2180. # By default we favor shifting. Need to add
  2181. # some precedence rules here.
  2182. # Shift precedence comes from the token
  2183. sprec, slevel = Precedence.get(a, ('right', 0))
  2184. # Reduce precedence comes from rule being reduced (p)
  2185. rprec, rlevel = Productions[p.number].prec
  2186. if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
  2187. # We really need to reduce here.
  2188. st_action[a] = -p.number
  2189. st_actionp[a] = p
  2190. if not slevel and not rlevel:
  2191. log.info(' ! shift/reduce conflict for %s resolved as reduce', a)
  2192. self.sr_conflicts.append((st, a, 'reduce'))
  2193. Productions[p.number].reduced += 1
  2194. elif (slevel == rlevel) and (rprec == 'nonassoc'):
  2195. st_action[a] = None
  2196. else:
  2197. # Hmmm. Guess we'll keep the shift
  2198. if not rlevel:
  2199. log.info(' ! shift/reduce conflict for %s resolved as shift', a)
  2200. self.sr_conflicts.append((st, a, 'shift'))
  2201. elif r < 0:
  2202. # Reduce/reduce conflict. In this case, we favor the rule
  2203. # that was defined first in the grammar file
  2204. oldp = Productions[-r]
  2205. pp = Productions[p.number]
  2206. if oldp.line > pp.line:
  2207. st_action[a] = -p.number
  2208. st_actionp[a] = p
  2209. chosenp, rejectp = pp, oldp
  2210. Productions[p.number].reduced += 1
  2211. Productions[oldp.number].reduced -= 1
  2212. else:
  2213. chosenp, rejectp = oldp, pp
  2214. self.rr_conflicts.append((st, chosenp, rejectp))
  2215. log.info(' ! reduce/reduce conflict for %s resolved using rule %d (%s)',
  2216. a, st_actionp[a].number, st_actionp[a])
  2217. else:
  2218. raise LALRError('Unknown conflict in state %d' % st)
  2219. else:
  2220. st_action[a] = -p.number
  2221. st_actionp[a] = p
  2222. Productions[p.number].reduced += 1
  2223. else:
  2224. i = p.lr_index
  2225. a = p.prod[i+1] # Get symbol right after the "."
  2226. if a in self.grammar.Terminals:
  2227. g = self.lr0_goto(I, a)
  2228. j = self.lr0_cidhash.get(id(g), -1)
  2229. if j >= 0:
  2230. # We are in a shift state
  2231. actlist.append((a, p, 'shift and go to state %d' % j))
  2232. r = st_action.get(a)
  2233. if r is not None:
  2234. # Whoa have a shift/reduce or shift/shift conflict
  2235. if r > 0:
  2236. if r != j:
  2237. raise LALRError('Shift/shift conflict in state %d' % st)
  2238. elif r < 0:
  2239. # Do a precedence check.
  2240. # - if precedence of reduce rule is higher, we reduce.
  2241. # - if precedence of reduce is same and left assoc, we reduce.
  2242. # - otherwise we shift
  2243. # Shift precedence comes from the token
  2244. sprec, slevel = Precedence.get(a, ('right', 0))
  2245. # Reduce precedence comes from the rule that could have been reduced
  2246. rprec, rlevel = Productions[st_actionp[a].number].prec
  2247. if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')):
  2248. # We decide to shift here... highest precedence to shift
  2249. Productions[st_actionp[a].number].reduced -= 1
  2250. st_action[a] = j
  2251. st_actionp[a] = p
  2252. if not rlevel:
  2253. log.info(' ! shift/reduce conflict for %s resolved as shift', a)
  2254. self.sr_conflicts.append((st, a, 'shift'))
  2255. elif (slevel == rlevel) and (rprec == 'nonassoc'):
  2256. st_action[a] = None
  2257. else:
  2258. # Hmmm. Guess we'll keep the reduce
  2259. if not slevel and not rlevel:
  2260. log.info(' ! shift/reduce conflict for %s resolved as reduce', a)
  2261. self.sr_conflicts.append((st, a, 'reduce'))
  2262. else:
  2263. raise LALRError('Unknown conflict in state %d' % st)
  2264. else:
  2265. st_action[a] = j
  2266. st_actionp[a] = p
  2267. # Print the actions associated with each terminal
  2268. _actprint = {}
  2269. for a, p, m in actlist:
  2270. if a in st_action:
  2271. if p is st_actionp[a]:
  2272. log.info(' %-15s %s', a, m)
  2273. _actprint[(a, m)] = 1
  2274. log.info('')
  2275. # Print the actions that were not used. (debugging)
  2276. not_used = 0
  2277. for a, p, m in actlist:
  2278. if a in st_action:
  2279. if p is not st_actionp[a]:
  2280. if not (a, m) in _actprint:
  2281. log.debug(' ! %-15s [ %s ]', a, m)
  2282. not_used = 1
  2283. _actprint[(a, m)] = 1
  2284. if not_used:
  2285. log.debug('')
  2286. # Construct the goto table for this state
  2287. nkeys = {}
  2288. for ii in I:
  2289. for s in ii.usyms:
  2290. if s in self.grammar.Nonterminals:
  2291. nkeys[s] = None
  2292. for n in nkeys:
  2293. g = self.lr0_goto(I, n)
  2294. j = self.lr0_cidhash.get(id(g), -1)
  2295. if j >= 0:
  2296. st_goto[n] = j
  2297. log.info(' %-30s shift and go to state %d', n, j)
  2298. action[st] = st_action
  2299. actionp[st] = st_actionp
  2300. goto[st] = st_goto
  2301. st += 1
  2302. # -----------------------------------------------------------------------------
  2303. # write()
  2304. #
  2305. # This function writes the LR parsing tables to a file
  2306. # -----------------------------------------------------------------------------
  2307. def write_table(self, tabmodule, outputdir='', signature=''):
  2308. if isinstance(tabmodule, types.ModuleType):
  2309. raise IOError("Won't overwrite existing tabmodule")
  2310. basemodulename = tabmodule.split('.')[-1]
  2311. filename = os.path.join(outputdir, basemodulename) + '.py'
  2312. try:
  2313. f = open(filename, 'w')
  2314. f.write('''
  2315. # %s
  2316. # This file is automatically generated. Do not edit.
  2317. # pylint: disable=W,C,R
  2318. _tabversion = %r
  2319. _lr_method = %r
  2320. _lr_signature = %r
  2321. ''' % (os.path.basename(filename), __tabversion__, self.lr_method, signature))
  2322. # Change smaller to 0 to go back to original tables
  2323. smaller = 1
  2324. # Factor out names to try and make smaller
  2325. if smaller:
  2326. items = {}
  2327. for s, nd in self.lr_action.items():
  2328. for name, v in nd.items():
  2329. i = items.get(name)
  2330. if not i:
  2331. i = ([], [])
  2332. items[name] = i
  2333. i[0].append(s)
  2334. i[1].append(v)
  2335. f.write('\n_lr_action_items = {')
  2336. for k, v in items.items():
  2337. f.write('%r:([' % k)
  2338. for i in v[0]:
  2339. f.write('%r,' % i)
  2340. f.write('],[')
  2341. for i in v[1]:
  2342. f.write('%r,' % i)
  2343. f.write(']),')
  2344. f.write('}\n')
  2345. f.write('''
  2346. _lr_action = {}
  2347. for _k, _v in _lr_action_items.items():
  2348. for _x,_y in zip(_v[0],_v[1]):
  2349. if not _x in _lr_action: _lr_action[_x] = {}
  2350. _lr_action[_x][_k] = _y
  2351. del _lr_action_items
  2352. ''')
  2353. else:
  2354. f.write('\n_lr_action = { ')
  2355. for k, v in self.lr_action.items():
  2356. f.write('(%r,%r):%r,' % (k[0], k[1], v))
  2357. f.write('}\n')
  2358. if smaller:
  2359. # Factor out names to try and make smaller
  2360. items = {}
  2361. for s, nd in self.lr_goto.items():
  2362. for name, v in nd.items():
  2363. i = items.get(name)
  2364. if not i:
  2365. i = ([], [])
  2366. items[name] = i
  2367. i[0].append(s)
  2368. i[1].append(v)
  2369. f.write('\n_lr_goto_items = {')
  2370. for k, v in items.items():
  2371. f.write('%r:([' % k)
  2372. for i in v[0]:
  2373. f.write('%r,' % i)
  2374. f.write('],[')
  2375. for i in v[1]:
  2376. f.write('%r,' % i)
  2377. f.write(']),')
  2378. f.write('}\n')
  2379. f.write('''
  2380. _lr_goto = {}
  2381. for _k, _v in _lr_goto_items.items():
  2382. for _x, _y in zip(_v[0], _v[1]):
  2383. if not _x in _lr_goto: _lr_goto[_x] = {}
  2384. _lr_goto[_x][_k] = _y
  2385. del _lr_goto_items
  2386. ''')
  2387. else:
  2388. f.write('\n_lr_goto = { ')
  2389. for k, v in self.lr_goto.items():
  2390. f.write('(%r,%r):%r,' % (k[0], k[1], v))
  2391. f.write('}\n')
  2392. # Write production table
  2393. f.write('_lr_productions = [\n')
  2394. for p in self.lr_productions:
  2395. if p.func:
  2396. f.write(' (%r,%r,%d,%r,%r,%d),\n' % (p.str, p.name, p.len,
  2397. p.func, os.path.basename(p.file), p.line))
  2398. else:
  2399. f.write(' (%r,%r,%d,None,None,None),\n' % (str(p), p.name, p.len))
  2400. f.write(']\n')
  2401. f.close()
  2402. except IOError as e:
  2403. raise
  2404. # -----------------------------------------------------------------------------
  2405. # pickle_table()
  2406. #
  2407. # This function pickles the LR parsing tables to a supplied file object
  2408. # -----------------------------------------------------------------------------
  2409. def pickle_table(self, filename, signature=''):
  2410. try:
  2411. import cPickle as pickle
  2412. except ImportError:
  2413. import pickle
  2414. with open(filename, 'wb') as outf:
  2415. pickle.dump(__tabversion__, outf, pickle_protocol)
  2416. pickle.dump(self.lr_method, outf, pickle_protocol)
  2417. pickle.dump(signature, outf, pickle_protocol)
  2418. pickle.dump(self.lr_action, outf, pickle_protocol)
  2419. pickle.dump(self.lr_goto, outf, pickle_protocol)
  2420. outp = []
  2421. for p in self.lr_productions:
  2422. if p.func:
  2423. outp.append((p.str, p.name, p.len, p.func, os.path.basename(p.file), p.line))
  2424. else:
  2425. outp.append((str(p), p.name, p.len, None, None, None))
  2426. pickle.dump(outp, outf, pickle_protocol)
  2427. # -----------------------------------------------------------------------------
  2428. # === INTROSPECTION ===
  2429. #
  2430. # The following functions and classes are used to implement the PLY
  2431. # introspection features followed by the yacc() function itself.
  2432. # -----------------------------------------------------------------------------
  2433. # -----------------------------------------------------------------------------
  2434. # get_caller_module_dict()
  2435. #
  2436. # This function returns a dictionary containing all of the symbols defined within
  2437. # a caller further down the call stack. This is used to get the environment
  2438. # associated with the yacc() call if none was provided.
  2439. # -----------------------------------------------------------------------------
  2440. def get_caller_module_dict(levels):
  2441. f = sys._getframe(levels)
  2442. ldict = f.f_globals.copy()
  2443. if f.f_globals != f.f_locals:
  2444. ldict.update(f.f_locals)
  2445. return ldict
  2446. # -----------------------------------------------------------------------------
  2447. # parse_grammar()
  2448. #
  2449. # This takes a raw grammar rule string and parses it into production data
  2450. # -----------------------------------------------------------------------------
  2451. def parse_grammar(doc, file, line):
  2452. grammar = []
  2453. # Split the doc string into lines
  2454. pstrings = doc.splitlines()
  2455. lastp = None
  2456. dline = line
  2457. for ps in pstrings:
  2458. dline += 1
  2459. p = ps.split()
  2460. if not p:
  2461. continue
  2462. try:
  2463. if p[0] == '|':
  2464. # This is a continuation of a previous rule
  2465. if not lastp:
  2466. raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline))
  2467. prodname = lastp
  2468. syms = p[1:]
  2469. else:
  2470. prodname = p[0]
  2471. lastp = prodname
  2472. syms = p[2:]
  2473. assign = p[1]
  2474. if assign != ':' and assign != '::=':
  2475. raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline))
  2476. grammar.append((file, dline, prodname, syms))
  2477. except SyntaxError:
  2478. raise
  2479. except Exception:
  2480. raise SyntaxError('%s:%d: Syntax error in rule %r' % (file, dline, ps.strip()))
  2481. return grammar
  2482. # -----------------------------------------------------------------------------
  2483. # ParserReflect()
  2484. #
  2485. # This class represents information extracted for building a parser including
  2486. # start symbol, error function, tokens, precedence list, action functions,
  2487. # etc.
  2488. # -----------------------------------------------------------------------------
  2489. class ParserReflect(object):
  2490. def __init__(self, pdict, log=None):
  2491. self.pdict = pdict
  2492. self.start = None
  2493. self.error_func = None
  2494. self.tokens = None
  2495. self.modules = set()
  2496. self.grammar = []
  2497. self.error = False
  2498. if log is None:
  2499. self.log = PlyLogger(sys.stderr)
  2500. else:
  2501. self.log = log
  2502. # Get all of the basic information
  2503. def get_all(self):
  2504. self.get_start()
  2505. self.get_error_func()
  2506. self.get_tokens()
  2507. self.get_precedence()
  2508. self.get_pfunctions()
  2509. # Validate all of the information
  2510. def validate_all(self):
  2511. self.validate_start()
  2512. self.validate_error_func()
  2513. self.validate_tokens()
  2514. self.validate_precedence()
  2515. self.validate_pfunctions()
  2516. self.validate_modules()
  2517. return self.error
  2518. # Compute a signature over the grammar
  2519. def signature(self):
  2520. parts = []
  2521. try:
  2522. if self.start:
  2523. parts.append(self.start)
  2524. if self.prec:
  2525. parts.append(''.join([''.join(p) for p in self.prec]))
  2526. if self.tokens:
  2527. parts.append(' '.join(self.tokens))
  2528. for f in self.pfuncs:
  2529. if f[3]:
  2530. parts.append(f[3])
  2531. except (TypeError, ValueError):
  2532. pass
  2533. return ''.join(parts)
  2534. # -----------------------------------------------------------------------------
  2535. # validate_modules()
  2536. #
  2537. # This method checks to see if there are duplicated p_rulename() functions
  2538. # in the parser module file. Without this function, it is really easy for
  2539. # users to make mistakes by cutting and pasting code fragments (and it's a real
  2540. # bugger to try and figure out why the resulting parser doesn't work). Therefore,
  2541. # we just do a little regular expression pattern matching of def statements
  2542. # to try and detect duplicates.
  2543. # -----------------------------------------------------------------------------
  2544. def validate_modules(self):
  2545. # Match def p_funcname(
  2546. fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(')
  2547. for module in self.modules:
  2548. try:
  2549. lines, linen = inspect.getsourcelines(module)
  2550. except IOError:
  2551. continue
  2552. counthash = {}
  2553. for linen, line in enumerate(lines):
  2554. linen += 1
  2555. m = fre.match(line)
  2556. if m:
  2557. name = m.group(1)
  2558. prev = counthash.get(name)
  2559. if not prev:
  2560. counthash[name] = linen
  2561. else:
  2562. filename = inspect.getsourcefile(module)
  2563. self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d',
  2564. filename, linen, name, prev)
  2565. # Get the start symbol
  2566. def get_start(self):
  2567. self.start = self.pdict.get('start')
  2568. # Validate the start symbol
  2569. def validate_start(self):
  2570. if self.start is not None:
  2571. if not isinstance(self.start, string_types):
  2572. self.log.error("'start' must be a string")
  2573. # Look for error handler
  2574. def get_error_func(self):
  2575. self.error_func = self.pdict.get('p_error')
  2576. # Validate the error function
  2577. def validate_error_func(self):
  2578. if self.error_func:
  2579. if isinstance(self.error_func, types.FunctionType):
  2580. ismethod = 0
  2581. elif isinstance(self.error_func, types.MethodType):
  2582. ismethod = 1
  2583. else:
  2584. self.log.error("'p_error' defined, but is not a function or method")
  2585. self.error = True
  2586. return
  2587. eline = self.error_func.__code__.co_firstlineno
  2588. efile = self.error_func.__code__.co_filename
  2589. module = inspect.getmodule(self.error_func)
  2590. self.modules.add(module)
  2591. argcount = self.error_func.__code__.co_argcount - ismethod
  2592. if argcount != 1:
  2593. self.log.error('%s:%d: p_error() requires 1 argument', efile, eline)
  2594. self.error = True
  2595. # Get the tokens map
  2596. def get_tokens(self):
  2597. tokens = self.pdict.get('tokens')
  2598. if not tokens:
  2599. self.log.error('No token list is defined')
  2600. self.error = True
  2601. return
  2602. if not isinstance(tokens, (list, tuple)):
  2603. self.log.error('tokens must be a list or tuple')
  2604. self.error = True
  2605. return
  2606. if not tokens:
  2607. self.log.error('tokens is empty')
  2608. self.error = True
  2609. return
  2610. self.tokens = sorted(tokens)
  2611. # Validate the tokens
  2612. def validate_tokens(self):
  2613. # Validate the tokens.
  2614. if 'error' in self.tokens:
  2615. self.log.error("Illegal token name 'error'. Is a reserved word")
  2616. self.error = True
  2617. return
  2618. terminals = set()
  2619. for n in self.tokens:
  2620. if n in terminals:
  2621. self.log.warning('Token %r multiply defined', n)
  2622. terminals.add(n)
  2623. # Get the precedence map (if any)
  2624. def get_precedence(self):
  2625. self.prec = self.pdict.get('precedence')
  2626. # Validate and parse the precedence map
  2627. def validate_precedence(self):
  2628. preclist = []
  2629. if self.prec:
  2630. if not isinstance(self.prec, (list, tuple)):
  2631. self.log.error('precedence must be a list or tuple')
  2632. self.error = True
  2633. return
  2634. for level, p in enumerate(self.prec):
  2635. if not isinstance(p, (list, tuple)):
  2636. self.log.error('Bad precedence table')
  2637. self.error = True
  2638. return
  2639. if len(p) < 2:
  2640. self.log.error('Malformed precedence entry %s. Must be (assoc, term, ..., term)', p)
  2641. self.error = True
  2642. return
  2643. assoc = p[0]
  2644. if not isinstance(assoc, string_types):
  2645. self.log.error('precedence associativity must be a string')
  2646. self.error = True
  2647. return
  2648. for term in p[1:]:
  2649. if not isinstance(term, string_types):
  2650. self.log.error('precedence items must be strings')
  2651. self.error = True
  2652. return
  2653. preclist.append((term, assoc, level+1))
  2654. self.preclist = preclist
  2655. # Get all p_functions from the grammar
  2656. def get_pfunctions(self):
  2657. p_functions = []
  2658. for name, item in self.pdict.items():
  2659. if not name.startswith('p_') or name == 'p_error':
  2660. continue
  2661. if isinstance(item, (types.FunctionType, types.MethodType)):
  2662. line = getattr(item, 'co_firstlineno', item.__code__.co_firstlineno)
  2663. module = inspect.getmodule(item)
  2664. p_functions.append((line, module, name, item.__doc__))
  2665. # Sort all of the actions by line number; make sure to stringify
  2666. # modules to make them sortable, since `line` may not uniquely sort all
  2667. # p functions
  2668. p_functions.sort(key=lambda p_function: (
  2669. p_function[0],
  2670. str(p_function[1]),
  2671. p_function[2],
  2672. p_function[3]))
  2673. self.pfuncs = p_functions
  2674. # Validate all of the p_functions
  2675. def validate_pfunctions(self):
  2676. grammar = []
  2677. # Check for non-empty symbols
  2678. if len(self.pfuncs) == 0:
  2679. self.log.error('no rules of the form p_rulename are defined')
  2680. self.error = True
  2681. return
  2682. for line, module, name, doc in self.pfuncs:
  2683. file = inspect.getsourcefile(module)
  2684. func = self.pdict[name]
  2685. if isinstance(func, types.MethodType):
  2686. reqargs = 2
  2687. else:
  2688. reqargs = 1
  2689. if func.__code__.co_argcount > reqargs:
  2690. self.log.error('%s:%d: Rule %r has too many arguments', file, line, func.__name__)
  2691. self.error = True
  2692. elif func.__code__.co_argcount < reqargs:
  2693. self.log.error('%s:%d: Rule %r requires an argument', file, line, func.__name__)
  2694. self.error = True
  2695. elif not func.__doc__:
  2696. self.log.warning('%s:%d: No documentation string specified in function %r (ignored)',
  2697. file, line, func.__name__)
  2698. else:
  2699. try:
  2700. parsed_g = parse_grammar(doc, file, line)
  2701. for g in parsed_g:
  2702. grammar.append((name, g))
  2703. except SyntaxError as e:
  2704. self.log.error(str(e))
  2705. self.error = True
  2706. # Looks like a valid grammar rule
  2707. # Mark the file in which defined.
  2708. self.modules.add(module)
  2709. # Secondary validation step that looks for p_ definitions that are not functions
  2710. # or functions that look like they might be grammar rules.
  2711. for n, v in self.pdict.items():
  2712. if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)):
  2713. continue
  2714. if n.startswith('t_'):
  2715. continue
  2716. if n.startswith('p_') and n != 'p_error':
  2717. self.log.warning('%r not defined as a function', n)
  2718. if ((isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or
  2719. (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2)):
  2720. if v.__doc__:
  2721. try:
  2722. doc = v.__doc__.split(' ')
  2723. if doc[1] == ':':
  2724. self.log.warning('%s:%d: Possible grammar rule %r defined without p_ prefix',
  2725. v.__code__.co_filename, v.__code__.co_firstlineno, n)
  2726. except IndexError:
  2727. pass
  2728. self.grammar = grammar
  2729. # -----------------------------------------------------------------------------
  2730. # yacc(module)
  2731. #
  2732. # Build a parser
  2733. # -----------------------------------------------------------------------------
  2734. def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None,
  2735. check_recursion=True, optimize=False, write_tables=True, debugfile=debug_file,
  2736. outputdir=None, debuglog=None, errorlog=None, picklefile=None):
  2737. if tabmodule is None:
  2738. tabmodule = tab_module
  2739. # Reference to the parsing method of the last built parser
  2740. global parse
  2741. # If pickling is enabled, table files are not created
  2742. if picklefile:
  2743. write_tables = 0
  2744. if errorlog is None:
  2745. errorlog = PlyLogger(sys.stderr)
  2746. # Get the module dictionary used for the parser
  2747. if module:
  2748. _items = [(k, getattr(module, k)) for k in dir(module)]
  2749. pdict = dict(_items)
  2750. # If no __file__ or __package__ attributes are available, try to obtain them
  2751. # from the __module__ instead
  2752. if '__file__' not in pdict:
  2753. pdict['__file__'] = sys.modules[pdict['__module__']].__file__
  2754. if '__package__' not in pdict and '__module__' in pdict:
  2755. if hasattr(sys.modules[pdict['__module__']], '__package__'):
  2756. pdict['__package__'] = sys.modules[pdict['__module__']].__package__
  2757. else:
  2758. pdict = get_caller_module_dict(2)
  2759. if outputdir is None:
  2760. # If no output directory is set, the location of the output files
  2761. # is determined according to the following rules:
  2762. # - If tabmodule specifies a package, files go into that package directory
  2763. # - Otherwise, files go in the same directory as the specifying module
  2764. if isinstance(tabmodule, types.ModuleType):
  2765. srcfile = tabmodule.__file__
  2766. else:
  2767. if '.' not in tabmodule:
  2768. srcfile = pdict['__file__']
  2769. else:
  2770. parts = tabmodule.split('.')
  2771. pkgname = '.'.join(parts[:-1])
  2772. exec('import %s' % pkgname)
  2773. srcfile = getattr(sys.modules[pkgname], '__file__', '')
  2774. outputdir = os.path.dirname(srcfile)
  2775. # Determine if the module is package of a package or not.
  2776. # If so, fix the tabmodule setting so that tables load correctly
  2777. pkg = pdict.get('__package__')
  2778. if pkg and isinstance(tabmodule, str):
  2779. if '.' not in tabmodule:
  2780. tabmodule = pkg + '.' + tabmodule
  2781. # Set start symbol if it's specified directly using an argument
  2782. if start is not None:
  2783. pdict['start'] = start
  2784. # Collect parser information from the dictionary
  2785. pinfo = ParserReflect(pdict, log=errorlog)
  2786. pinfo.get_all()
  2787. if pinfo.error:
  2788. raise YaccError('Unable to build parser')
  2789. # Check signature against table files (if any)
  2790. signature = pinfo.signature()
  2791. # Read the tables
  2792. try:
  2793. lr = LRTable()
  2794. if picklefile:
  2795. read_signature = lr.read_pickle(picklefile)
  2796. else:
  2797. read_signature = lr.read_table(tabmodule)
  2798. if optimize or (read_signature == signature):
  2799. try:
  2800. lr.bind_callables(pinfo.pdict)
  2801. parser = LRParser(lr, pinfo.error_func)
  2802. parse = parser.parse
  2803. return parser
  2804. except Exception as e:
  2805. errorlog.warning('There was a problem loading the table file: %r', e)
  2806. except VersionError as e:
  2807. errorlog.warning(str(e))
  2808. except ImportError:
  2809. pass
  2810. if debuglog is None:
  2811. if debug:
  2812. try:
  2813. debuglog = PlyLogger(open(os.path.join(outputdir, debugfile), 'w'))
  2814. except IOError as e:
  2815. errorlog.warning("Couldn't open %r. %s" % (debugfile, e))
  2816. debuglog = NullLogger()
  2817. else:
  2818. debuglog = NullLogger()
  2819. debuglog.info('Created by PLY version %s (http://www.dabeaz.com/ply)', __version__)
  2820. errors = False
  2821. # Validate the parser information
  2822. if pinfo.validate_all():
  2823. raise YaccError('Unable to build parser')
  2824. if not pinfo.error_func:
  2825. errorlog.warning('no p_error() function is defined')
  2826. # Create a grammar object
  2827. grammar = Grammar(pinfo.tokens)
  2828. # Set precedence level for terminals
  2829. for term, assoc, level in pinfo.preclist:
  2830. try:
  2831. grammar.set_precedence(term, assoc, level)
  2832. except GrammarError as e:
  2833. errorlog.warning('%s', e)
  2834. # Add productions to the grammar
  2835. for funcname, gram in pinfo.grammar:
  2836. file, line, prodname, syms = gram
  2837. try:
  2838. grammar.add_production(prodname, syms, funcname, file, line)
  2839. except GrammarError as e:
  2840. errorlog.error('%s', e)
  2841. errors = True
  2842. # Set the grammar start symbols
  2843. try:
  2844. if start is None:
  2845. grammar.set_start(pinfo.start)
  2846. else:
  2847. grammar.set_start(start)
  2848. except GrammarError as e:
  2849. errorlog.error(str(e))
  2850. errors = True
  2851. if errors:
  2852. raise YaccError('Unable to build parser')
  2853. # Verify the grammar structure
  2854. undefined_symbols = grammar.undefined_symbols()
  2855. for sym, prod in undefined_symbols:
  2856. errorlog.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym)
  2857. errors = True
  2858. unused_terminals = grammar.unused_terminals()
  2859. if unused_terminals:
  2860. debuglog.info('')
  2861. debuglog.info('Unused terminals:')
  2862. debuglog.info('')
  2863. for term in unused_terminals:
  2864. errorlog.warning('Token %r defined, but not used', term)
  2865. debuglog.info(' %s', term)
  2866. # Print out all productions to the debug log
  2867. if debug:
  2868. debuglog.info('')
  2869. debuglog.info('Grammar')
  2870. debuglog.info('')
  2871. for n, p in enumerate(grammar.Productions):
  2872. debuglog.info('Rule %-5d %s', n, p)
  2873. # Find unused non-terminals
  2874. unused_rules = grammar.unused_rules()
  2875. for prod in unused_rules:
  2876. errorlog.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name)
  2877. if len(unused_terminals) == 1:
  2878. errorlog.warning('There is 1 unused token')
  2879. if len(unused_terminals) > 1:
  2880. errorlog.warning('There are %d unused tokens', len(unused_terminals))
  2881. if len(unused_rules) == 1:
  2882. errorlog.warning('There is 1 unused rule')
  2883. if len(unused_rules) > 1:
  2884. errorlog.warning('There are %d unused rules', len(unused_rules))
  2885. if debug:
  2886. debuglog.info('')
  2887. debuglog.info('Terminals, with rules where they appear')
  2888. debuglog.info('')
  2889. terms = list(grammar.Terminals)
  2890. terms.sort()
  2891. for term in terms:
  2892. debuglog.info('%-20s : %s', term, ' '.join([str(s) for s in grammar.Terminals[term]]))
  2893. debuglog.info('')
  2894. debuglog.info('Nonterminals, with rules where they appear')
  2895. debuglog.info('')
  2896. nonterms = list(grammar.Nonterminals)
  2897. nonterms.sort()
  2898. for nonterm in nonterms:
  2899. debuglog.info('%-20s : %s', nonterm, ' '.join([str(s) for s in grammar.Nonterminals[nonterm]]))
  2900. debuglog.info('')
  2901. if check_recursion:
  2902. unreachable = grammar.find_unreachable()
  2903. for u in unreachable:
  2904. errorlog.warning('Symbol %r is unreachable', u)
  2905. infinite = grammar.infinite_cycles()
  2906. for inf in infinite:
  2907. errorlog.error('Infinite recursion detected for symbol %r', inf)
  2908. errors = True
  2909. unused_prec = grammar.unused_precedence()
  2910. for term, assoc in unused_prec:
  2911. errorlog.error('Precedence rule %r defined for unknown symbol %r', assoc, term)
  2912. errors = True
  2913. if errors:
  2914. raise YaccError('Unable to build parser')
  2915. # Run the LRGeneratedTable on the grammar
  2916. if debug:
  2917. errorlog.debug('Generating %s tables', method)
  2918. lr = LRGeneratedTable(grammar, method, debuglog)
  2919. if debug:
  2920. num_sr = len(lr.sr_conflicts)
  2921. # Report shift/reduce and reduce/reduce conflicts
  2922. if num_sr == 1:
  2923. errorlog.warning('1 shift/reduce conflict')
  2924. elif num_sr > 1:
  2925. errorlog.warning('%d shift/reduce conflicts', num_sr)
  2926. num_rr = len(lr.rr_conflicts)
  2927. if num_rr == 1:
  2928. errorlog.warning('1 reduce/reduce conflict')
  2929. elif num_rr > 1:
  2930. errorlog.warning('%d reduce/reduce conflicts', num_rr)
  2931. # Write out conflicts to the output file
  2932. if debug and (lr.sr_conflicts or lr.rr_conflicts):
  2933. debuglog.warning('')
  2934. debuglog.warning('Conflicts:')
  2935. debuglog.warning('')
  2936. for state, tok, resolution in lr.sr_conflicts:
  2937. debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s', tok, state, resolution)
  2938. already_reported = set()
  2939. for state, rule, rejected in lr.rr_conflicts:
  2940. if (state, id(rule), id(rejected)) in already_reported:
  2941. continue
  2942. debuglog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule)
  2943. debuglog.warning('rejected rule (%s) in state %d', rejected, state)
  2944. errorlog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule)
  2945. errorlog.warning('rejected rule (%s) in state %d', rejected, state)
  2946. already_reported.add((state, id(rule), id(rejected)))
  2947. warned_never = []
  2948. for state, rule, rejected in lr.rr_conflicts:
  2949. if not rejected.reduced and (rejected not in warned_never):
  2950. debuglog.warning('Rule (%s) is never reduced', rejected)
  2951. errorlog.warning('Rule (%s) is never reduced', rejected)
  2952. warned_never.append(rejected)
  2953. # Write the table file if requested
  2954. if write_tables:
  2955. try:
  2956. lr.write_table(tabmodule, outputdir, signature)
  2957. if tabmodule in sys.modules:
  2958. del sys.modules[tabmodule]
  2959. except IOError as e:
  2960. errorlog.warning("Couldn't create %r. %s" % (tabmodule, e))
  2961. # Write a pickled version of the tables
  2962. if picklefile:
  2963. try:
  2964. lr.pickle_table(picklefile, signature)
  2965. except IOError as e:
  2966. errorlog.warning("Couldn't create %r. %s" % (picklefile, e))
  2967. # Build the parser
  2968. lr.bind_callables(pinfo.pdict)
  2969. parser = LRParser(lr, pinfo.error_func)
  2970. parse = parser.parse
  2971. return parser