244 lines
7.2 KiB
Python
244 lines
7.2 KiB
Python
|
import re
|
||
|
|
||
|
# --- Tokenizer ---
|
||
|
TOKEN_REGEX = [
|
||
|
('KEYWORD', r'\b(and|break|do|else|elseif|end|false|for|function|goto|if|in|local|nil|not|or|repeat|return|then|true|until|while)\b'),
|
||
|
('NAME', r'[A-Za-z_][A-Za-z0-9_]*'),
|
||
|
('NUMBER', r'\d+(\.\d+)?'),
|
||
|
('STRING', r'"([^"\\]|\\.)*"|\'([^\'\\]|\\.)*\''),
|
||
|
('SYMBOL', r'==|~=|<=|>=|\.{2}|[+\-*/%^#=<>;:,.\[\](){}]'),
|
||
|
('COMMENT', r'--\[=*?\[.*?\]\=*?\]|--.*'),
|
||
|
('SKIP', r'[ \t\r\n]+'),
|
||
|
('MISMATCH', r'.'),
|
||
|
]
|
||
|
|
||
|
TOKEN_RE = re.compile('|'.join(f'(?P<{name}>{pattern})' for name, pattern in TOKEN_REGEX), re.DOTALL)
|
||
|
|
||
|
def tokenize(code):
|
||
|
for match in TOKEN_RE.finditer(code):
|
||
|
kind = match.lastgroup
|
||
|
value = match.group()
|
||
|
|
||
|
if kind in ('SKIP', 'COMMENT'):
|
||
|
continue
|
||
|
elif kind == 'MISMATCH':
|
||
|
raise SyntaxError(f'Unexpected token: {value}')
|
||
|
else:
|
||
|
yield kind, value
|
||
|
|
||
|
|
||
|
# --- Parser ---
|
||
|
class LuaParser:
|
||
|
def __init__(self, tokens):
|
||
|
self.tokens = list(tokens)
|
||
|
self.pos = 0
|
||
|
|
||
|
def peek(self):
|
||
|
return self.tokens[self.pos] if self.pos < len(self.tokens) else (None, None)
|
||
|
|
||
|
def eat(self, kind=None, value=None):
|
||
|
token = self.peek()
|
||
|
if kind and token[0] != kind:
|
||
|
raise SyntaxError(f'Expected {kind}, got {token[0]}')
|
||
|
if value and token[1] != value:
|
||
|
raise SyntaxError(f'Expected {value}, got {token[1]}')
|
||
|
self.pos += 1
|
||
|
return token
|
||
|
|
||
|
def parse(self):
|
||
|
return self.parse_block()
|
||
|
|
||
|
def parse_block(self):
|
||
|
block = []
|
||
|
while self.pos < len(self.tokens):
|
||
|
kind, value = self.peek()
|
||
|
if value in ('end', 'elseif', 'else', 'until'):
|
||
|
break
|
||
|
stmt = self.parse_statement()
|
||
|
block.append(stmt)
|
||
|
return ('block', block)
|
||
|
|
||
|
def parse_statement(self):
|
||
|
kind, value = self.peek()
|
||
|
if value == 'local':
|
||
|
return self.parse_local()
|
||
|
elif value == 'function':
|
||
|
return self.parse_function()
|
||
|
elif value == 'if':
|
||
|
return self.parse_if()
|
||
|
elif value == 'while':
|
||
|
return self.parse_while()
|
||
|
elif value == 'return':
|
||
|
return self.parse_return()
|
||
|
elif kind == 'NAME':
|
||
|
return self.parse_assignment_or_call()
|
||
|
else:
|
||
|
raise SyntaxError(f'Unexpected statement: {value}')
|
||
|
|
||
|
def parse_local(self):
|
||
|
self.eat('KEYWORD', 'local')
|
||
|
name = self.eat('NAME')[1]
|
||
|
if self.peek()[1] == '=':
|
||
|
self.eat('SYMBOL', '=')
|
||
|
expr = self.parse_expression()
|
||
|
return ('local', name, expr)
|
||
|
return ('local', name, None)
|
||
|
|
||
|
def parse_function(self):
|
||
|
self.eat('KEYWORD', 'function')
|
||
|
name = self.eat('NAME')[1]
|
||
|
self.eat('SYMBOL', '(')
|
||
|
args = []
|
||
|
while self.peek()[1] != ')':
|
||
|
if args:
|
||
|
self.eat('SYMBOL', ',')
|
||
|
args.append(self.eat('NAME')[1])
|
||
|
self.eat('SYMBOL', ')')
|
||
|
body = self.parse_block()
|
||
|
self.eat('KEYWORD', 'end')
|
||
|
return ('function', name, args, body)
|
||
|
|
||
|
def parse_if(self):
|
||
|
self.eat('KEYWORD', 'if')
|
||
|
cond = self.parse_expression()
|
||
|
self.eat('KEYWORD', 'then')
|
||
|
then_block = self.parse_block()
|
||
|
elseif_blocks = []
|
||
|
while self.peek()[1] == 'elseif':
|
||
|
self.eat('KEYWORD', 'elseif')
|
||
|
elseif_cond = self.parse_expression()
|
||
|
self.eat('KEYWORD', 'then')
|
||
|
elseif_block = self.parse_block()
|
||
|
elseif_blocks.append((elseif_cond, elseif_block))
|
||
|
else_block = None
|
||
|
if self.peek()[1] == 'else':
|
||
|
self.eat('KEYWORD', 'else')
|
||
|
else_block = self.parse_block()
|
||
|
self.eat('KEYWORD', 'end')
|
||
|
return ('if', cond, then_block, elseif_blocks, else_block)
|
||
|
|
||
|
def parse_while(self):
|
||
|
self.eat('KEYWORD', 'while')
|
||
|
cond = self.parse_expression()
|
||
|
self.eat('KEYWORD', 'do')
|
||
|
body = self.parse_block()
|
||
|
self.eat('KEYWORD', 'end')
|
||
|
return ('while', cond, body)
|
||
|
|
||
|
def parse_return(self):
|
||
|
self.eat('KEYWORD', 'return')
|
||
|
expr = self.parse_expression()
|
||
|
return ('return', expr)
|
||
|
|
||
|
def parse_assignment_or_call(self):
|
||
|
name = self.eat('NAME')[1]
|
||
|
if self.peek()[1] == '=':
|
||
|
self.eat('SYMBOL', '=')
|
||
|
expr = self.parse_expression()
|
||
|
return ('assign', name, expr)
|
||
|
elif self.peek()[1] == '(':
|
||
|
self.eat('SYMBOL', '(')
|
||
|
args = []
|
||
|
while self.peek()[1] != ')':
|
||
|
if args:
|
||
|
self.eat('SYMBOL', ',')
|
||
|
args.append(self.parse_expression())
|
||
|
self.eat('SYMBOL', ')')
|
||
|
return ('call', name, args)
|
||
|
else:
|
||
|
raise SyntaxError(f'Unexpected token after identifier: {self.peek()}')
|
||
|
|
||
|
def parse_expression(self, precedence=0):
|
||
|
expr = self.parse_primary()
|
||
|
while True:
|
||
|
kind, op = self.peek()
|
||
|
if kind != 'SYMBOL' and kind != 'KEYWORD':
|
||
|
break
|
||
|
prec = self.get_precedence(op)
|
||
|
if prec < precedence:
|
||
|
break
|
||
|
self.eat()
|
||
|
right = self.parse_expression(prec + 1)
|
||
|
expr = ('binop', op, expr, right)
|
||
|
return expr
|
||
|
|
||
|
def parse_primary(self):
|
||
|
kind, value = self.peek()
|
||
|
if kind == 'NUMBER':
|
||
|
return ('number', float(self.eat()[1]))
|
||
|
elif kind == 'STRING':
|
||
|
return ('string', self.eat()[1])
|
||
|
elif kind == 'NAME':
|
||
|
return ('name', self.eat()[1])
|
||
|
elif value == '(':
|
||
|
self.eat('SYMBOL', '(')
|
||
|
expr = self.parse_expression()
|
||
|
self.eat('SYMBOL', ')')
|
||
|
return expr
|
||
|
elif value == '{':
|
||
|
return self.parse_table()
|
||
|
elif value == 'nil':
|
||
|
self.eat('KEYWORD', 'nil')
|
||
|
return ('nil',)
|
||
|
elif value == 'true':
|
||
|
self.eat('KEYWORD', 'true')
|
||
|
return ('bool', True)
|
||
|
elif value == 'false':
|
||
|
self.eat('KEYWORD', 'false')
|
||
|
return ('bool', False)
|
||
|
else:
|
||
|
raise SyntaxError(f'Unexpected token in expression: {value}')
|
||
|
|
||
|
def parse_table(self):
|
||
|
self.eat('SYMBOL', '{')
|
||
|
fields = []
|
||
|
while self.peek()[1] != '}':
|
||
|
if fields:
|
||
|
self.eat('SYMBOL', ',')
|
||
|
fields.append(self.parse_expression())
|
||
|
self.eat('SYMBOL', '}')
|
||
|
return ('table', fields)
|
||
|
|
||
|
def get_precedence(self, op):
|
||
|
return {
|
||
|
'or': 1,
|
||
|
'and': 2,
|
||
|
'<': 3, '>': 3, '<=': 3, '>=': 3, '==': 3, '~=': 3,
|
||
|
'..': 4,
|
||
|
'+': 5, '-': 5,
|
||
|
'*': 6, '/': 6, '%': 6,
|
||
|
}.get(op, -1)
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
# --- Test ---
|
||
|
if __name__ == '__main__':
|
||
|
code = """
|
||
|
local x = 1
|
||
|
local y = x + 2 * 3
|
||
|
function hello(a, b)
|
||
|
if a > b then
|
||
|
return a
|
||
|
elseif b > a then
|
||
|
return b
|
||
|
else
|
||
|
return 0
|
||
|
end
|
||
|
end
|
||
|
while x < 10 do
|
||
|
x = x + 1
|
||
|
print(x)
|
||
|
end
|
||
|
"""
|
||
|
|
||
|
tokens = tokenize(code)
|
||
|
parser = LuaParser(tokens)
|
||
|
ast = parser.parse()
|
||
|
|
||
|
from pprint import pprint
|
||
|
pprint(ast)
|
||
|
|