You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

914 lines
33 KiB

4 years ago
  1. # -----------------------------------------------------------------------------
  2. # cpp.py
  3. #
  4. # Author: David Beazley (http://www.dabeaz.com)
  5. # Copyright (C) 2007
  6. # All rights reserved
  7. #
  8. # This module implements an ANSI-C style lexical preprocessor for PLY.
  9. # -----------------------------------------------------------------------------
  10. from __future__ import generators
  11. import sys
  12. # Some Python 3 compatibility shims
  13. if sys.version_info.major < 3:
  14. STRING_TYPES = (str, unicode)
  15. else:
  16. STRING_TYPES = str
  17. xrange = range
  18. # -----------------------------------------------------------------------------
  19. # Default preprocessor lexer definitions. These tokens are enough to get
  20. # a basic preprocessor working. Other modules may import these if they want
  21. # -----------------------------------------------------------------------------
  22. tokens = (
  23. 'CPP_ID','CPP_INTEGER', 'CPP_FLOAT', 'CPP_STRING', 'CPP_CHAR', 'CPP_WS', 'CPP_COMMENT1', 'CPP_COMMENT2', 'CPP_POUND','CPP_DPOUND'
  24. )
  25. literals = "+-*/%|&~^<>=!?()[]{}.,;:\\\'\""
  26. # Whitespace
  27. def t_CPP_WS(t):
  28. r'\s+'
  29. t.lexer.lineno += t.value.count("\n")
  30. return t
  31. t_CPP_POUND = r'\#'
  32. t_CPP_DPOUND = r'\#\#'
  33. # Identifier
  34. t_CPP_ID = r'[A-Za-z_][\w_]*'
  35. # Integer literal
  36. def CPP_INTEGER(t):
  37. r'(((((0x)|(0X))[0-9a-fA-F]+)|(\d+))([uU][lL]|[lL][uU]|[uU]|[lL])?)'
  38. return t
  39. t_CPP_INTEGER = CPP_INTEGER
  40. # Floating literal
  41. t_CPP_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
  42. # String literal
  43. def t_CPP_STRING(t):
  44. r'\"([^\\\n]|(\\(.|\n)))*?\"'
  45. t.lexer.lineno += t.value.count("\n")
  46. return t
  47. # Character constant 'c' or L'c'
  48. def t_CPP_CHAR(t):
  49. r'(L)?\'([^\\\n]|(\\(.|\n)))*?\''
  50. t.lexer.lineno += t.value.count("\n")
  51. return t
  52. # Comment
  53. def t_CPP_COMMENT1(t):
  54. r'(/\*(.|\n)*?\*/)'
  55. ncr = t.value.count("\n")
  56. t.lexer.lineno += ncr
  57. # replace with one space or a number of '\n'
  58. t.type = 'CPP_WS'; t.value = '\n' * ncr if ncr else ' '
  59. return t
  60. # Line comment
  61. def t_CPP_COMMENT2(t):
  62. r'(//.*?(\n|$))'
  63. # replace with '/n'
  64. t.type = 'CPP_WS'; t.value = '\n'
  65. return t
  66. def t_error(t):
  67. t.type = t.value[0]
  68. t.value = t.value[0]
  69. t.lexer.skip(1)
  70. return t
  71. import re
  72. import copy
  73. import time
  74. import os.path
  75. # -----------------------------------------------------------------------------
  76. # trigraph()
  77. #
  78. # Given an input string, this function replaces all trigraph sequences.
  79. # The following mapping is used:
  80. #
  81. # ??= #
  82. # ??/ \
  83. # ??' ^
  84. # ??( [
  85. # ??) ]
  86. # ??! |
  87. # ??< {
  88. # ??> }
  89. # ??- ~
  90. # -----------------------------------------------------------------------------
  91. _trigraph_pat = re.compile(r'''\?\?[=/\'\(\)\!<>\-]''')
  92. _trigraph_rep = {
  93. '=':'#',
  94. '/':'\\',
  95. "'":'^',
  96. '(':'[',
  97. ')':']',
  98. '!':'|',
  99. '<':'{',
  100. '>':'}',
  101. '-':'~'
  102. }
  103. def trigraph(input):
  104. return _trigraph_pat.sub(lambda g: _trigraph_rep[g.group()[-1]],input)
  105. # ------------------------------------------------------------------
  106. # Macro object
  107. #
  108. # This object holds information about preprocessor macros
  109. #
  110. # .name - Macro name (string)
  111. # .value - Macro value (a list of tokens)
  112. # .arglist - List of argument names
  113. # .variadic - Boolean indicating whether or not variadic macro
  114. # .vararg - Name of the variadic parameter
  115. #
  116. # When a macro is created, the macro replacement token sequence is
  117. # pre-scanned and used to create patch lists that are later used
  118. # during macro expansion
  119. # ------------------------------------------------------------------
  120. class Macro(object):
  121. def __init__(self,name,value,arglist=None,variadic=False):
  122. self.name = name
  123. self.value = value
  124. self.arglist = arglist
  125. self.variadic = variadic
  126. if variadic:
  127. self.vararg = arglist[-1]
  128. self.source = None
  129. # ------------------------------------------------------------------
  130. # Preprocessor object
  131. #
  132. # Object representing a preprocessor. Contains macro definitions,
  133. # include directories, and other information
  134. # ------------------------------------------------------------------
  135. class Preprocessor(object):
  136. def __init__(self,lexer=None):
  137. if lexer is None:
  138. lexer = lex.lexer
  139. self.lexer = lexer
  140. self.macros = { }
  141. self.path = []
  142. self.temp_path = []
  143. # Probe the lexer for selected tokens
  144. self.lexprobe()
  145. tm = time.localtime()
  146. self.define("__DATE__ \"%s\"" % time.strftime("%b %d %Y",tm))
  147. self.define("__TIME__ \"%s\"" % time.strftime("%H:%M:%S",tm))
  148. self.parser = None
  149. # -----------------------------------------------------------------------------
  150. # tokenize()
  151. #
  152. # Utility function. Given a string of text, tokenize into a list of tokens
  153. # -----------------------------------------------------------------------------
  154. def tokenize(self,text):
  155. tokens = []
  156. self.lexer.input(text)
  157. while True:
  158. tok = self.lexer.token()
  159. if not tok: break
  160. tokens.append(tok)
  161. return tokens
  162. # ---------------------------------------------------------------------
  163. # error()
  164. #
  165. # Report a preprocessor error/warning of some kind
  166. # ----------------------------------------------------------------------
  167. def error(self,file,line,msg):
  168. print("%s:%d %s" % (file,line,msg))
  169. # ----------------------------------------------------------------------
  170. # lexprobe()
  171. #
  172. # This method probes the preprocessor lexer object to discover
  173. # the token types of symbols that are important to the preprocessor.
  174. # If this works right, the preprocessor will simply "work"
  175. # with any suitable lexer regardless of how tokens have been named.
  176. # ----------------------------------------------------------------------
  177. def lexprobe(self):
  178. # Determine the token type for identifiers
  179. self.lexer.input("identifier")
  180. tok = self.lexer.token()
  181. if not tok or tok.value != "identifier":
  182. print("Couldn't determine identifier type")
  183. else:
  184. self.t_ID = tok.type
  185. # Determine the token type for integers
  186. self.lexer.input("12345")
  187. tok = self.lexer.token()
  188. if not tok or int(tok.value) != 12345:
  189. print("Couldn't determine integer type")
  190. else:
  191. self.t_INTEGER = tok.type
  192. self.t_INTEGER_TYPE = type(tok.value)
  193. # Determine the token type for strings enclosed in double quotes
  194. self.lexer.input("\"filename\"")
  195. tok = self.lexer.token()
  196. if not tok or tok.value != "\"filename\"":
  197. print("Couldn't determine string type")
  198. else:
  199. self.t_STRING = tok.type
  200. # Determine the token type for whitespace--if any
  201. self.lexer.input(" ")
  202. tok = self.lexer.token()
  203. if not tok or tok.value != " ":
  204. self.t_SPACE = None
  205. else:
  206. self.t_SPACE = tok.type
  207. # Determine the token type for newlines
  208. self.lexer.input("\n")
  209. tok = self.lexer.token()
  210. if not tok or tok.value != "\n":
  211. self.t_NEWLINE = None
  212. print("Couldn't determine token for newlines")
  213. else:
  214. self.t_NEWLINE = tok.type
  215. self.t_WS = (self.t_SPACE, self.t_NEWLINE)
  216. # Check for other characters used by the preprocessor
  217. chars = [ '<','>','#','##','\\','(',')',',','.']
  218. for c in chars:
  219. self.lexer.input(c)
  220. tok = self.lexer.token()
  221. if not tok or tok.value != c:
  222. print("Unable to lex '%s' required for preprocessor" % c)
  223. # ----------------------------------------------------------------------
  224. # add_path()
  225. #
  226. # Adds a search path to the preprocessor.
  227. # ----------------------------------------------------------------------
  228. def add_path(self,path):
  229. self.path.append(path)
  230. # ----------------------------------------------------------------------
  231. # group_lines()
  232. #
  233. # Given an input string, this function splits it into lines. Trailing whitespace
  234. # is removed. Any line ending with \ is grouped with the next line. This
  235. # function forms the lowest level of the preprocessor---grouping into text into
  236. # a line-by-line format.
  237. # ----------------------------------------------------------------------
  238. def group_lines(self,input):
  239. lex = self.lexer.clone()
  240. lines = [x.rstrip() for x in input.splitlines()]
  241. for i in xrange(len(lines)):
  242. j = i+1
  243. while lines[i].endswith('\\') and (j < len(lines)):
  244. lines[i] = lines[i][:-1]+lines[j]
  245. lines[j] = ""
  246. j += 1
  247. input = "\n".join(lines)
  248. lex.input(input)
  249. lex.lineno = 1
  250. current_line = []
  251. while True:
  252. tok = lex.token()
  253. if not tok:
  254. break
  255. current_line.append(tok)
  256. if tok.type in self.t_WS and '\n' in tok.value:
  257. yield current_line
  258. current_line = []
  259. if current_line:
  260. yield current_line
  261. # ----------------------------------------------------------------------
  262. # tokenstrip()
  263. #
  264. # Remove leading/trailing whitespace tokens from a token list
  265. # ----------------------------------------------------------------------
  266. def tokenstrip(self,tokens):
  267. i = 0
  268. while i < len(tokens) and tokens[i].type in self.t_WS:
  269. i += 1
  270. del tokens[:i]
  271. i = len(tokens)-1
  272. while i >= 0 and tokens[i].type in self.t_WS:
  273. i -= 1
  274. del tokens[i+1:]
  275. return tokens
  276. # ----------------------------------------------------------------------
  277. # collect_args()
  278. #
  279. # Collects comma separated arguments from a list of tokens. The arguments
  280. # must be enclosed in parenthesis. Returns a tuple (tokencount,args,positions)
  281. # where tokencount is the number of tokens consumed, args is a list of arguments,
  282. # and positions is a list of integers containing the starting index of each
  283. # argument. Each argument is represented by a list of tokens.
  284. #
  285. # When collecting arguments, leading and trailing whitespace is removed
  286. # from each argument.
  287. #
  288. # This function properly handles nested parenthesis and commas---these do not
  289. # define new arguments.
  290. # ----------------------------------------------------------------------
  291. def collect_args(self,tokenlist):
  292. args = []
  293. positions = []
  294. current_arg = []
  295. nesting = 1
  296. tokenlen = len(tokenlist)
  297. # Search for the opening '('.
  298. i = 0
  299. while (i < tokenlen) and (tokenlist[i].type in self.t_WS):
  300. i += 1
  301. if (i < tokenlen) and (tokenlist[i].value == '('):
  302. positions.append(i+1)
  303. else:
  304. self.error(self.source,tokenlist[0].lineno,"Missing '(' in macro arguments")
  305. return 0, [], []
  306. i += 1
  307. while i < tokenlen:
  308. t = tokenlist[i]
  309. if t.value == '(':
  310. current_arg.append(t)
  311. nesting += 1
  312. elif t.value == ')':
  313. nesting -= 1
  314. if nesting == 0:
  315. if current_arg:
  316. args.append(self.tokenstrip(current_arg))
  317. positions.append(i)
  318. return i+1,args,positions
  319. current_arg.append(t)
  320. elif t.value == ',' and nesting == 1:
  321. args.append(self.tokenstrip(current_arg))
  322. positions.append(i+1)
  323. current_arg = []
  324. else:
  325. current_arg.append(t)
  326. i += 1
  327. # Missing end argument
  328. self.error(self.source,tokenlist[-1].lineno,"Missing ')' in macro arguments")
  329. return 0, [],[]
  330. # ----------------------------------------------------------------------
  331. # macro_prescan()
  332. #
  333. # Examine the macro value (token sequence) and identify patch points
  334. # This is used to speed up macro expansion later on---we'll know
  335. # right away where to apply patches to the value to form the expansion
  336. # ----------------------------------------------------------------------
  337. def macro_prescan(self,macro):
  338. macro.patch = [] # Standard macro arguments
  339. macro.str_patch = [] # String conversion expansion
  340. macro.var_comma_patch = [] # Variadic macro comma patch
  341. i = 0
  342. while i < len(macro.value):
  343. if macro.value[i].type == self.t_ID and macro.value[i].value in macro.arglist:
  344. argnum = macro.arglist.index(macro.value[i].value)
  345. # Conversion of argument to a string
  346. if i > 0 and macro.value[i-1].value == '#':
  347. macro.value[i] = copy.copy(macro.value[i])
  348. macro.value[i].type = self.t_STRING
  349. del macro.value[i-1]
  350. macro.str_patch.append((argnum,i-1))
  351. continue
  352. # Concatenation
  353. elif (i > 0 and macro.value[i-1].value == '##'):
  354. macro.patch.append(('c',argnum,i-1))
  355. del macro.value[i-1]
  356. i -= 1
  357. continue
  358. elif ((i+1) < len(macro.value) and macro.value[i+1].value == '##'):
  359. macro.patch.append(('c',argnum,i))
  360. del macro.value[i + 1]
  361. continue
  362. # Standard expansion
  363. else:
  364. macro.patch.append(('e',argnum,i))
  365. elif macro.value[i].value == '##':
  366. if macro.variadic and (i > 0) and (macro.value[i-1].value == ',') and \
  367. ((i+1) < len(macro.value)) and (macro.value[i+1].type == self.t_ID) and \
  368. (macro.value[i+1].value == macro.vararg):
  369. macro.var_comma_patch.append(i-1)
  370. i += 1
  371. macro.patch.sort(key=lambda x: x[2],reverse=True)
  372. # ----------------------------------------------------------------------
  373. # macro_expand_args()
  374. #
  375. # Given a Macro and list of arguments (each a token list), this method
  376. # returns an expanded version of a macro. The return value is a token sequence
  377. # representing the replacement macro tokens
  378. # ----------------------------------------------------------------------
  379. def macro_expand_args(self,macro,args):
  380. # Make a copy of the macro token sequence
  381. rep = [copy.copy(_x) for _x in macro.value]
  382. # Make string expansion patches. These do not alter the length of the replacement sequence
  383. str_expansion = {}
  384. for argnum, i in macro.str_patch:
  385. if argnum not in str_expansion:
  386. str_expansion[argnum] = ('"%s"' % "".join([x.value for x in args[argnum]])).replace("\\","\\\\")
  387. rep[i] = copy.copy(rep[i])
  388. rep[i].value = str_expansion[argnum]
  389. # Make the variadic macro comma patch. If the variadic macro argument is empty, we get rid
  390. comma_patch = False
  391. if macro.variadic and not args[-1]:
  392. for i in macro.var_comma_patch:
  393. rep[i] = None
  394. comma_patch = True
  395. # Make all other patches. The order of these matters. It is assumed that the patch list
  396. # has been sorted in reverse order of patch location since replacements will cause the
  397. # size of the replacement sequence to expand from the patch point.
  398. expanded = { }
  399. for ptype, argnum, i in macro.patch:
  400. # Concatenation. Argument is left unexpanded
  401. if ptype == 'c':
  402. rep[i:i+1] = args[argnum]
  403. # Normal expansion. Argument is macro expanded first
  404. elif ptype == 'e':
  405. if argnum not in expanded:
  406. expanded[argnum] = self.expand_macros(args[argnum])
  407. rep[i:i+1] = expanded[argnum]
  408. # Get rid of removed comma if necessary
  409. if comma_patch:
  410. rep = [_i for _i in rep if _i]
  411. return rep
  412. # ----------------------------------------------------------------------
  413. # expand_macros()
  414. #
  415. # Given a list of tokens, this function performs macro expansion.
  416. # The expanded argument is a dictionary that contains macros already
  417. # expanded. This is used to prevent infinite recursion.
  418. # ----------------------------------------------------------------------
  419. def expand_macros(self,tokens,expanded=None):
  420. if expanded is None:
  421. expanded = {}
  422. i = 0
  423. while i < len(tokens):
  424. t = tokens[i]
  425. if t.type == self.t_ID:
  426. if t.value in self.macros and t.value not in expanded:
  427. # Yes, we found a macro match
  428. expanded[t.value] = True
  429. m = self.macros[t.value]
  430. if not m.arglist:
  431. # A simple macro
  432. ex = self.expand_macros([copy.copy(_x) for _x in m.value],expanded)
  433. for e in ex:
  434. e.lineno = t.lineno
  435. tokens[i:i+1] = ex
  436. i += len(ex)
  437. else:
  438. # A macro with arguments
  439. j = i + 1
  440. while j < len(tokens) and tokens[j].type in self.t_WS:
  441. j += 1
  442. if j < len(tokens) and tokens[j].value == '(':
  443. tokcount,args,positions = self.collect_args(tokens[j:])
  444. if not m.variadic and len(args) != len(m.arglist):
  445. self.error(self.source,t.lineno,"Macro %s requires %d arguments" % (t.value,len(m.arglist)))
  446. i = j + tokcount
  447. elif m.variadic and len(args) < len(m.arglist)-1:
  448. if len(m.arglist) > 2:
  449. self.error(self.source,t.lineno,"Macro %s must have at least %d arguments" % (t.value, len(m.arglist)-1))
  450. else:
  451. self.error(self.source,t.lineno,"Macro %s must have at least %d argument" % (t.value, len(m.arglist)-1))
  452. i = j + tokcount
  453. else:
  454. if m.variadic:
  455. if len(args) == len(m.arglist)-1:
  456. args.append([])
  457. else:
  458. args[len(m.arglist)-1] = tokens[j+positions[len(m.arglist)-1]:j+tokcount-1]
  459. del args[len(m.arglist):]
  460. # Get macro replacement text
  461. rep = self.macro_expand_args(m,args)
  462. rep = self.expand_macros(rep,expanded)
  463. for r in rep:
  464. r.lineno = t.lineno
  465. tokens[i:j+tokcount] = rep
  466. i += len(rep)
  467. else:
  468. # This is not a macro. It is just a word which
  469. # equals to name of the macro. Hence, go to the
  470. # next token.
  471. i += 1
  472. del expanded[t.value]
  473. continue
  474. elif t.value == '__LINE__':
  475. t.type = self.t_INTEGER
  476. t.value = self.t_INTEGER_TYPE(t.lineno)
  477. i += 1
  478. return tokens
  479. # ----------------------------------------------------------------------
  480. # evalexpr()
  481. #
  482. # Evaluate an expression token sequence for the purposes of evaluating
  483. # integral expressions.
  484. # ----------------------------------------------------------------------
  485. def evalexpr(self,tokens):
  486. # tokens = tokenize(line)
  487. # Search for defined macros
  488. i = 0
  489. while i < len(tokens):
  490. if tokens[i].type == self.t_ID and tokens[i].value == 'defined':
  491. j = i + 1
  492. needparen = False
  493. result = "0L"
  494. while j < len(tokens):
  495. if tokens[j].type in self.t_WS:
  496. j += 1
  497. continue
  498. elif tokens[j].type == self.t_ID:
  499. if tokens[j].value in self.macros:
  500. result = "1L"
  501. else:
  502. result = "0L"
  503. if not needparen: break
  504. elif tokens[j].value == '(':
  505. needparen = True
  506. elif tokens[j].value == ')':
  507. break
  508. else:
  509. self.error(self.source,tokens[i].lineno,"Malformed defined()")
  510. j += 1
  511. tokens[i].type = self.t_INTEGER
  512. tokens[i].value = self.t_INTEGER_TYPE(result)
  513. del tokens[i+1:j+1]
  514. i += 1
  515. tokens = self.expand_macros(tokens)
  516. for i,t in enumerate(tokens):
  517. if t.type == self.t_ID:
  518. tokens[i] = copy.copy(t)
  519. tokens[i].type = self.t_INTEGER
  520. tokens[i].value = self.t_INTEGER_TYPE("0L")
  521. elif t.type == self.t_INTEGER:
  522. tokens[i] = copy.copy(t)
  523. # Strip off any trailing suffixes
  524. tokens[i].value = str(tokens[i].value)
  525. while tokens[i].value[-1] not in "0123456789abcdefABCDEF":
  526. tokens[i].value = tokens[i].value[:-1]
  527. expr = "".join([str(x.value) for x in tokens])
  528. expr = expr.replace("&&"," and ")
  529. expr = expr.replace("||"," or ")
  530. expr = expr.replace("!"," not ")
  531. try:
  532. result = eval(expr)
  533. except Exception:
  534. self.error(self.source,tokens[0].lineno,"Couldn't evaluate expression")
  535. result = 0
  536. return result
  537. # ----------------------------------------------------------------------
  538. # parsegen()
  539. #
  540. # Parse an input string/
  541. # ----------------------------------------------------------------------
  542. def parsegen(self,input,source=None):
  543. # Replace trigraph sequences
  544. t = trigraph(input)
  545. lines = self.group_lines(t)
  546. if not source:
  547. source = ""
  548. self.define("__FILE__ \"%s\"" % source)
  549. self.source = source
  550. chunk = []
  551. enable = True
  552. iftrigger = False
  553. ifstack = []
  554. for x in lines:
  555. for i,tok in enumerate(x):
  556. if tok.type not in self.t_WS: break
  557. if tok.value == '#':
  558. # Preprocessor directive
  559. # insert necessary whitespace instead of eaten tokens
  560. for tok in x:
  561. if tok.type in self.t_WS and '\n' in tok.value:
  562. chunk.append(tok)
  563. dirtokens = self.tokenstrip(x[i+1:])
  564. if dirtokens:
  565. name = dirtokens[0].value
  566. args = self.tokenstrip(dirtokens[1:])
  567. else:
  568. name = ""
  569. args = []
  570. if name == 'define':
  571. if enable:
  572. for tok in self.expand_macros(chunk):
  573. yield tok
  574. chunk = []
  575. self.define(args)
  576. elif name == 'include':
  577. if enable:
  578. for tok in self.expand_macros(chunk):
  579. yield tok
  580. chunk = []
  581. oldfile = self.macros['__FILE__']
  582. for tok in self.include(args):
  583. yield tok
  584. self.macros['__FILE__'] = oldfile
  585. self.source = source
  586. elif name == 'undef':
  587. if enable:
  588. for tok in self.expand_macros(chunk):
  589. yield tok
  590. chunk = []
  591. self.undef(args)
  592. elif name == 'ifdef':
  593. ifstack.append((enable,iftrigger))
  594. if enable:
  595. if not args[0].value in self.macros:
  596. enable = False
  597. iftrigger = False
  598. else:
  599. iftrigger = True
  600. elif name == 'ifndef':
  601. ifstack.append((enable,iftrigger))
  602. if enable:
  603. if args[0].value in self.macros:
  604. enable = False
  605. iftrigger = False
  606. else:
  607. iftrigger = True
  608. elif name == 'if':
  609. ifstack.append((enable,iftrigger))
  610. if enable:
  611. result = self.evalexpr(args)
  612. if not result:
  613. enable = False
  614. iftrigger = False
  615. else:
  616. iftrigger = True
  617. elif name == 'elif':
  618. if ifstack:
  619. if ifstack[-1][0]: # We only pay attention if outer "if" allows this
  620. if enable: # If already true, we flip enable False
  621. enable = False
  622. elif not iftrigger: # If False, but not triggered yet, we'll check expression
  623. result = self.evalexpr(args)
  624. if result:
  625. enable = True
  626. iftrigger = True
  627. else:
  628. self.error(self.source,dirtokens[0].lineno,"Misplaced #elif")
  629. elif name == 'else':
  630. if ifstack:
  631. if ifstack[-1][0]:
  632. if enable:
  633. enable = False
  634. elif not iftrigger:
  635. enable = True
  636. iftrigger = True
  637. else:
  638. self.error(self.source,dirtokens[0].lineno,"Misplaced #else")
  639. elif name == 'endif':
  640. if ifstack:
  641. enable,iftrigger = ifstack.pop()
  642. else:
  643. self.error(self.source,dirtokens[0].lineno,"Misplaced #endif")
  644. else:
  645. # Unknown preprocessor directive
  646. pass
  647. else:
  648. # Normal text
  649. if enable:
  650. chunk.extend(x)
  651. for tok in self.expand_macros(chunk):
  652. yield tok
  653. chunk = []
  654. # ----------------------------------------------------------------------
  655. # include()
  656. #
  657. # Implementation of file-inclusion
  658. # ----------------------------------------------------------------------
  659. def include(self,tokens):
  660. # Try to extract the filename and then process an include file
  661. if not tokens:
  662. return
  663. if tokens:
  664. if tokens[0].value != '<' and tokens[0].type != self.t_STRING:
  665. tokens = self.expand_macros(tokens)
  666. if tokens[0].value == '<':
  667. # Include <...>
  668. i = 1
  669. while i < len(tokens):
  670. if tokens[i].value == '>':
  671. break
  672. i += 1
  673. else:
  674. print("Malformed #include <...>")
  675. return
  676. filename = "".join([x.value for x in tokens[1:i]])
  677. path = self.path + [""] + self.temp_path
  678. elif tokens[0].type == self.t_STRING:
  679. filename = tokens[0].value[1:-1]
  680. path = self.temp_path + [""] + self.path
  681. else:
  682. print("Malformed #include statement")
  683. return
  684. for p in path:
  685. iname = os.path.join(p,filename)
  686. try:
  687. data = open(iname,"r").read()
  688. dname = os.path.dirname(iname)
  689. if dname:
  690. self.temp_path.insert(0,dname)
  691. for tok in self.parsegen(data,filename):
  692. yield tok
  693. if dname:
  694. del self.temp_path[0]
  695. break
  696. except IOError:
  697. pass
  698. else:
  699. print("Couldn't find '%s'" % filename)
  700. # ----------------------------------------------------------------------
  701. # define()
  702. #
  703. # Define a new macro
  704. # ----------------------------------------------------------------------
  705. def define(self,tokens):
  706. if isinstance(tokens,STRING_TYPES):
  707. tokens = self.tokenize(tokens)
  708. linetok = tokens
  709. try:
  710. name = linetok[0]
  711. if len(linetok) > 1:
  712. mtype = linetok[1]
  713. else:
  714. mtype = None
  715. if not mtype:
  716. m = Macro(name.value,[])
  717. self.macros[name.value] = m
  718. elif mtype.type in self.t_WS:
  719. # A normal macro
  720. m = Macro(name.value,self.tokenstrip(linetok[2:]))
  721. self.macros[name.value] = m
  722. elif mtype.value == '(':
  723. # A macro with arguments
  724. tokcount, args, positions = self.collect_args(linetok[1:])
  725. variadic = False
  726. for a in args:
  727. if variadic:
  728. print("No more arguments may follow a variadic argument")
  729. break
  730. astr = "".join([str(_i.value) for _i in a])
  731. if astr == "...":
  732. variadic = True
  733. a[0].type = self.t_ID
  734. a[0].value = '__VA_ARGS__'
  735. variadic = True
  736. del a[1:]
  737. continue
  738. elif astr[-3:] == "..." and a[0].type == self.t_ID:
  739. variadic = True
  740. del a[1:]
  741. # If, for some reason, "." is part of the identifier, strip off the name for the purposes
  742. # of macro expansion
  743. if a[0].value[-3:] == '...':
  744. a[0].value = a[0].value[:-3]
  745. continue
  746. if len(a) > 1 or a[0].type != self.t_ID:
  747. print("Invalid macro argument")
  748. break
  749. else:
  750. mvalue = self.tokenstrip(linetok[1+tokcount:])
  751. i = 0
  752. while i < len(mvalue):
  753. if i+1 < len(mvalue):
  754. if mvalue[i].type in self.t_WS and mvalue[i+1].value == '##':
  755. del mvalue[i]
  756. continue
  757. elif mvalue[i].value == '##' and mvalue[i+1].type in self.t_WS:
  758. del mvalue[i+1]
  759. i += 1
  760. m = Macro(name.value,mvalue,[x[0].value for x in args],variadic)
  761. self.macro_prescan(m)
  762. self.macros[name.value] = m
  763. else:
  764. print("Bad macro definition")
  765. except LookupError:
  766. print("Bad macro definition")
  767. # ----------------------------------------------------------------------
  768. # undef()
  769. #
  770. # Undefine a macro
  771. # ----------------------------------------------------------------------
  772. def undef(self,tokens):
  773. id = tokens[0].value
  774. try:
  775. del self.macros[id]
  776. except LookupError:
  777. pass
  778. # ----------------------------------------------------------------------
  779. # parse()
  780. #
  781. # Parse input text.
  782. # ----------------------------------------------------------------------
  783. def parse(self,input,source=None,ignore={}):
  784. self.ignore = ignore
  785. self.parser = self.parsegen(input,source)
  786. # ----------------------------------------------------------------------
  787. # token()
  788. #
  789. # Method to return individual tokens
  790. # ----------------------------------------------------------------------
  791. def token(self):
  792. try:
  793. while True:
  794. tok = next(self.parser)
  795. if tok.type not in self.ignore: return tok
  796. except StopIteration:
  797. self.parser = None
  798. return None
  799. if __name__ == '__main__':
  800. import ply.lex as lex
  801. lexer = lex.lex()
  802. # Run a preprocessor
  803. import sys
  804. f = open(sys.argv[1])
  805. input = f.read()
  806. p = Preprocessor(lexer)
  807. p.parse(input,sys.argv[1])
  808. while True:
  809. tok = p.token()
  810. if not tok: break
  811. print(p.source, tok)