You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

127 lines
3.1 KiB

4 years ago
  1. # ----------------------------------------------------------------------
  2. # ctokens.py
  3. #
  4. # Token specifications for symbols in ANSI C and C++. This file is
  5. # meant to be used as a library in other tokenizers.
  6. # ----------------------------------------------------------------------
  7. # Reserved words
  8. tokens = [
  9. # Literals (identifier, integer constant, float constant, string constant, char const)
  10. 'ID', 'TYPEID', 'INTEGER', 'FLOAT', 'STRING', 'CHARACTER',
  11. # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=)
  12. 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MODULO',
  13. 'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT',
  14. 'LOR', 'LAND', 'LNOT',
  15. 'LT', 'LE', 'GT', 'GE', 'EQ', 'NE',
  16. # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=)
  17. 'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL',
  18. 'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL',
  19. # Increment/decrement (++,--)
  20. 'INCREMENT', 'DECREMENT',
  21. # Structure dereference (->)
  22. 'ARROW',
  23. # Ternary operator (?)
  24. 'TERNARY',
  25. # Delimeters ( ) [ ] { } , . ; :
  26. 'LPAREN', 'RPAREN',
  27. 'LBRACKET', 'RBRACKET',
  28. 'LBRACE', 'RBRACE',
  29. 'COMMA', 'PERIOD', 'SEMI', 'COLON',
  30. # Ellipsis (...)
  31. 'ELLIPSIS',
  32. ]
  33. # Operators
  34. t_PLUS = r'\+'
  35. t_MINUS = r'-'
  36. t_TIMES = r'\*'
  37. t_DIVIDE = r'/'
  38. t_MODULO = r'%'
  39. t_OR = r'\|'
  40. t_AND = r'&'
  41. t_NOT = r'~'
  42. t_XOR = r'\^'
  43. t_LSHIFT = r'<<'
  44. t_RSHIFT = r'>>'
  45. t_LOR = r'\|\|'
  46. t_LAND = r'&&'
  47. t_LNOT = r'!'
  48. t_LT = r'<'
  49. t_GT = r'>'
  50. t_LE = r'<='
  51. t_GE = r'>='
  52. t_EQ = r'=='
  53. t_NE = r'!='
  54. # Assignment operators
  55. t_EQUALS = r'='
  56. t_TIMESEQUAL = r'\*='
  57. t_DIVEQUAL = r'/='
  58. t_MODEQUAL = r'%='
  59. t_PLUSEQUAL = r'\+='
  60. t_MINUSEQUAL = r'-='
  61. t_LSHIFTEQUAL = r'<<='
  62. t_RSHIFTEQUAL = r'>>='
  63. t_ANDEQUAL = r'&='
  64. t_OREQUAL = r'\|='
  65. t_XOREQUAL = r'\^='
  66. # Increment/decrement
  67. t_INCREMENT = r'\+\+'
  68. t_DECREMENT = r'--'
  69. # ->
  70. t_ARROW = r'->'
  71. # ?
  72. t_TERNARY = r'\?'
  73. # Delimeters
  74. t_LPAREN = r'\('
  75. t_RPAREN = r'\)'
  76. t_LBRACKET = r'\['
  77. t_RBRACKET = r'\]'
  78. t_LBRACE = r'\{'
  79. t_RBRACE = r'\}'
  80. t_COMMA = r','
  81. t_PERIOD = r'\.'
  82. t_SEMI = r';'
  83. t_COLON = r':'
  84. t_ELLIPSIS = r'\.\.\.'
  85. # Identifiers
  86. t_ID = r'[A-Za-z_][A-Za-z0-9_]*'
  87. # Integer literal
  88. t_INTEGER = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?'
  89. # Floating literal
  90. t_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
  91. # String literal
  92. t_STRING = r'\"([^\\\n]|(\\.))*?\"'
  93. # Character constant 'c' or L'c'
  94. t_CHARACTER = r'(L)?\'([^\\\n]|(\\.))*?\''
  95. # Comment (C-Style)
  96. def t_COMMENT(t):
  97. r'/\*(.|\n)*?\*/'
  98. t.lexer.lineno += t.value.count('\n')
  99. return t
  100. # Comment (C++-Style)
  101. def t_CPPCOMMENT(t):
  102. r'//.*\n'
  103. t.lexer.lineno += 1
  104. return t