|
|
- # ----------------------------------------------------------------------
- # ctokens.py
- #
- # Token specifications for symbols in ANSI C and C++. This file is
- # meant to be used as a library in other tokenizers.
- # ----------------------------------------------------------------------
-
- # Reserved words
-
- tokens = [
- # Literals (identifier, integer constant, float constant, string constant, char const)
- 'ID', 'TYPEID', 'INTEGER', 'FLOAT', 'STRING', 'CHARACTER',
-
- # Operators (+,-,*,/,%,|,&,~,^,<<,>>, ||, &&, !, <, <=, >, >=, ==, !=)
- 'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MODULO',
- 'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFT',
- 'LOR', 'LAND', 'LNOT',
- 'LT', 'LE', 'GT', 'GE', 'EQ', 'NE',
-
- # Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, &=, ^=, |=)
- 'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL',
- 'LSHIFTEQUAL','RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL', 'OREQUAL',
-
- # Increment/decrement (++,--)
- 'INCREMENT', 'DECREMENT',
-
- # Structure dereference (->)
- 'ARROW',
-
- # Ternary operator (?)
- 'TERNARY',
-
- # Delimeters ( ) [ ] { } , . ; :
- 'LPAREN', 'RPAREN',
- 'LBRACKET', 'RBRACKET',
- 'LBRACE', 'RBRACE',
- 'COMMA', 'PERIOD', 'SEMI', 'COLON',
-
- # Ellipsis (...)
- 'ELLIPSIS',
- ]
-
- # Operators
- t_PLUS = r'\+'
- t_MINUS = r'-'
- t_TIMES = r'\*'
- t_DIVIDE = r'/'
- t_MODULO = r'%'
- t_OR = r'\|'
- t_AND = r'&'
- t_NOT = r'~'
- t_XOR = r'\^'
- t_LSHIFT = r'<<'
- t_RSHIFT = r'>>'
- t_LOR = r'\|\|'
- t_LAND = r'&&'
- t_LNOT = r'!'
- t_LT = r'<'
- t_GT = r'>'
- t_LE = r'<='
- t_GE = r'>='
- t_EQ = r'=='
- t_NE = r'!='
-
- # Assignment operators
-
- t_EQUALS = r'='
- t_TIMESEQUAL = r'\*='
- t_DIVEQUAL = r'/='
- t_MODEQUAL = r'%='
- t_PLUSEQUAL = r'\+='
- t_MINUSEQUAL = r'-='
- t_LSHIFTEQUAL = r'<<='
- t_RSHIFTEQUAL = r'>>='
- t_ANDEQUAL = r'&='
- t_OREQUAL = r'\|='
- t_XOREQUAL = r'\^='
-
- # Increment/decrement
- t_INCREMENT = r'\+\+'
- t_DECREMENT = r'--'
-
- # ->
- t_ARROW = r'->'
-
- # ?
- t_TERNARY = r'\?'
-
- # Delimeters
- t_LPAREN = r'\('
- t_RPAREN = r'\)'
- t_LBRACKET = r'\['
- t_RBRACKET = r'\]'
- t_LBRACE = r'\{'
- t_RBRACE = r'\}'
- t_COMMA = r','
- t_PERIOD = r'\.'
- t_SEMI = r';'
- t_COLON = r':'
- t_ELLIPSIS = r'\.\.\.'
-
- # Identifiers
- t_ID = r'[A-Za-z_][A-Za-z0-9_]*'
-
- # Integer literal
- t_INTEGER = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?'
-
- # Floating literal
- t_FLOAT = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
-
- # String literal
- t_STRING = r'\"([^\\\n]|(\\.))*?\"'
-
- # Character constant 'c' or L'c'
- t_CHARACTER = r'(L)?\'([^\\\n]|(\\.))*?\''
-
- # Comment (C-Style)
- def t_COMMENT(t):
- r'/\*(.|\n)*?\*/'
- t.lexer.lineno += t.value.count('\n')
- return t
-
- # Comment (C++-Style)
- def t_CPPCOMMENT(t):
- r'//.*\n'
- t.lexer.lineno += 1
- return t
|