alpcentaur
/
basabuuka_prototyp

import reimport ply.lex as lex
states = (    ('instring', 'exclusive'),)
tokens = (    'COMMENT', 'HEXSTRING', 'INT', 'FLOAT', 'LITERAL', 'KEYWORD', 'STRING', 'OPERATOR')
delimiter = r'\(\)\<\>\[\]\{\}\/\%\s'delimiter_end = r'(?=[%s]|$)' % delimiter
def t_COMMENT(t):    # r'^%!.+\n'    r'%.*\n'    pass
RE_SPC = re.compile(r'\s')RE_HEX_PAIR = re.compile(r'[0-9a-fA-F]{2}|.')@lex.TOKEN(r'<[0-9A-Fa-f\s]*>')def t_HEXSTRING(t):    cleaned = RE_SPC.sub('', t.value[1:-1])    pairs = RE_HEX_PAIR.findall(cleaned)    token_bytes = bytes([int(pair, 16) for pair in pairs])    try:        t.value = token_bytes.decode('ascii')    except UnicodeDecodeError:        # should be kept as bytes        t.value = token_bytes    return t
@lex.TOKEN(r'(\-|\+)?[0-9]+' + delimiter_end)def t_INT(t):    t.value = int(t.value)    return t
@lex.TOKEN(r'(\-|\+)?([0-9]+\.|[0-9]*\.[0-9]+|[0-9]+\.[0-9]*)((e|E)[0-9]+)?' + delimiter_end)def t_FLOAT(t):    t.value = float(t.value)    return t
RE_LITERAL_HEX = re.compile(r'#[0-9A-Fa-f]{2}')@lex.TOKEN(r'/.+?' + delimiter_end)def t_LITERAL(t):    newvalue = t.value[1:]    # If there's '#' chars in the literal, we much de-hex it    def re_sub(m):        # convert any hex str to int (without the # char) and the convert that         return bytes.fromhex(m.group(0)[1:]).decode('latin-1')    newvalue = RE_LITERAL_HEX.sub(re_sub , newvalue)    # If there's any lone # char left, remove them    newvalue = newvalue.replace('#', '')    t.value = newvalue    return t
def t_OPERATOR(t):    r'{|}|<<|>>|\[|\]'    return t
t_KEYWORD = r'.+?' + delimiter_end
def t_instring(t):    r'\('    t.lexer.value_buffer = []    t.lexer.string_startpos = t.lexpos    t.lexer.level = 1    t.lexer.begin('instring')
# The parens situation: it's complicated. We can have both escaped parens and unescaped parens.# If they're escaped, there's nothing special, we unescape them and add them to the string. If# they're not escaped, we have to count how many of them there are, to know when a rparen is the# end of the string. The regular expression for this is messed up, so what we do is when we hit# a paren, we look if the previous buffer ended up with a backslash. If it did, we don't to paren# balancing.
def t_instring_lparen(t):         r'\('    is_escaped = t.lexer.value_buffer and t.lexer.value_buffer[-1].endswith('\\')    if is_escaped:        t.lexer.value_buffer[-1] = t.lexer.value_buffer[-1][:-1]    else:        t.lexer.level +=1    t.lexer.value_buffer.append('(')
def t_instring_rparen(t):    r'\)'    is_escaped = t.lexer.value_buffer and t.lexer.value_buffer[-1].endswith('\\')    if is_escaped:        t.lexer.value_buffer[-1] = t.lexer.value_buffer[-1][:-1]    else:        t.lexer.level -=1
    if t.lexer.level == 0:         t.value = ''.join(t.lexer.value_buffer)         if any(ord(c) > 0x7f for c in t.value):             t.value = t.value.encode('latin-1')         t.type = "STRING"         t.lexpos = t.lexer.string_startpos         t.lexer.begin('INITIAL')                    return t    else:        t.lexer.value_buffer.append(')')
RE_STRING_ESCAPE = re.compile(r'\\[btnfr\\]')RE_STRING_OCTAL = re.compile(r'\\[0-7]{1,3}')RE_STRING_LINE_CONT = re.compile(r'\\\n|\\\r|\\\r\n')ESC_STRING = { 'b': '\b', 't': '\t', 'n': '\n', 'f': '\f', 'r': '\r', '\\': '\\' }
def repl_string_escape(m):    return ESC_STRING[m.group(0)[1]]
def repl_string_octal(m):    i = int(m.group(0)[1:], 8)    if i < 0xff: # we never want to go above 256 because it's unencodable        return chr(i)    else:        return m.group(0)
def t_instring_contents(t):    r'[^()]+'    s = t.value    s = RE_STRING_ESCAPE.sub(repl_string_escape, s)    s = RE_STRING_OCTAL.sub(repl_string_octal, s)    s = RE_STRING_LINE_CONT.sub('', s)    t.lexer.value_buffer.append(s)
t_instring_ignore = ''t_ignore = ' \t\r\n'
# Error handling ruledef t_error(t):    print("Illegal character '%r'" % t.value[0])    t.lexer.skip(1)t_instring_error = t_error
lexer = lex.lex()