small-projects/pylua/main.py

244 lines
7.2 KiB
Python
Raw Normal View History

2025-04-16 22:27:55 +00:00
import re
# --- Tokenizer ---
TOKEN_REGEX = [
('KEYWORD', r'\b(and|break|do|else|elseif|end|false|for|function|goto|if|in|local|nil|not|or|repeat|return|then|true|until|while)\b'),
('NAME', r'[A-Za-z_][A-Za-z0-9_]*'),
('NUMBER', r'\d+(\.\d+)?'),
('STRING', r'"([^"\\]|\\.)*"|\'([^\'\\]|\\.)*\''),
('SYMBOL', r'==|~=|<=|>=|\.{2}|[+\-*/%^#=<>;:,.\[\](){}]'),
('COMMENT', r'--\[=*?\[.*?\]\=*?\]|--.*'),
('SKIP', r'[ \t\r\n]+'),
('MISMATCH', r'.'),
]
TOKEN_RE = re.compile('|'.join(f'(?P<{name}>{pattern})' for name, pattern in TOKEN_REGEX), re.DOTALL)
def tokenize(code):
for match in TOKEN_RE.finditer(code):
kind = match.lastgroup
value = match.group()
if kind in ('SKIP', 'COMMENT'):
continue
elif kind == 'MISMATCH':
raise SyntaxError(f'Unexpected token: {value}')
else:
yield kind, value
# --- Parser ---
class LuaParser:
def __init__(self, tokens):
self.tokens = list(tokens)
self.pos = 0
def peek(self):
return self.tokens[self.pos] if self.pos < len(self.tokens) else (None, None)
def eat(self, kind=None, value=None):
token = self.peek()
if kind and token[0] != kind:
raise SyntaxError(f'Expected {kind}, got {token[0]}')
if value and token[1] != value:
raise SyntaxError(f'Expected {value}, got {token[1]}')
self.pos += 1
return token
def parse(self):
return self.parse_block()
def parse_block(self):
block = []
while self.pos < len(self.tokens):
kind, value = self.peek()
if value in ('end', 'elseif', 'else', 'until'):
break
stmt = self.parse_statement()
block.append(stmt)
return ('block', block)
def parse_statement(self):
kind, value = self.peek()
if value == 'local':
return self.parse_local()
elif value == 'function':
return self.parse_function()
elif value == 'if':
return self.parse_if()
elif value == 'while':
return self.parse_while()
elif value == 'return':
return self.parse_return()
elif kind == 'NAME':
return self.parse_assignment_or_call()
else:
raise SyntaxError(f'Unexpected statement: {value}')
def parse_local(self):
self.eat('KEYWORD', 'local')
name = self.eat('NAME')[1]
if self.peek()[1] == '=':
self.eat('SYMBOL', '=')
expr = self.parse_expression()
return ('local', name, expr)
return ('local', name, None)
def parse_function(self):
self.eat('KEYWORD', 'function')
name = self.eat('NAME')[1]
self.eat('SYMBOL', '(')
args = []
while self.peek()[1] != ')':
if args:
self.eat('SYMBOL', ',')
args.append(self.eat('NAME')[1])
self.eat('SYMBOL', ')')
body = self.parse_block()
self.eat('KEYWORD', 'end')
return ('function', name, args, body)
def parse_if(self):
self.eat('KEYWORD', 'if')
cond = self.parse_expression()
self.eat('KEYWORD', 'then')
then_block = self.parse_block()
elseif_blocks = []
while self.peek()[1] == 'elseif':
self.eat('KEYWORD', 'elseif')
elseif_cond = self.parse_expression()
self.eat('KEYWORD', 'then')
elseif_block = self.parse_block()
elseif_blocks.append((elseif_cond, elseif_block))
else_block = None
if self.peek()[1] == 'else':
self.eat('KEYWORD', 'else')
else_block = self.parse_block()
self.eat('KEYWORD', 'end')
return ('if', cond, then_block, elseif_blocks, else_block)
def parse_while(self):
self.eat('KEYWORD', 'while')
cond = self.parse_expression()
self.eat('KEYWORD', 'do')
body = self.parse_block()
self.eat('KEYWORD', 'end')
return ('while', cond, body)
def parse_return(self):
self.eat('KEYWORD', 'return')
expr = self.parse_expression()
return ('return', expr)
def parse_assignment_or_call(self):
name = self.eat('NAME')[1]
if self.peek()[1] == '=':
self.eat('SYMBOL', '=')
expr = self.parse_expression()
return ('assign', name, expr)
elif self.peek()[1] == '(':
self.eat('SYMBOL', '(')
args = []
while self.peek()[1] != ')':
if args:
self.eat('SYMBOL', ',')
args.append(self.parse_expression())
self.eat('SYMBOL', ')')
return ('call', name, args)
else:
raise SyntaxError(f'Unexpected token after identifier: {self.peek()}')
def parse_expression(self, precedence=0):
expr = self.parse_primary()
while True:
kind, op = self.peek()
if kind != 'SYMBOL' and kind != 'KEYWORD':
break
prec = self.get_precedence(op)
if prec < precedence:
break
self.eat()
right = self.parse_expression(prec + 1)
expr = ('binop', op, expr, right)
return expr
def parse_primary(self):
kind, value = self.peek()
if kind == 'NUMBER':
return ('number', float(self.eat()[1]))
elif kind == 'STRING':
return ('string', self.eat()[1])
elif kind == 'NAME':
return ('name', self.eat()[1])
elif value == '(':
self.eat('SYMBOL', '(')
expr = self.parse_expression()
self.eat('SYMBOL', ')')
return expr
elif value == '{':
return self.parse_table()
elif value == 'nil':
self.eat('KEYWORD', 'nil')
return ('nil',)
elif value == 'true':
self.eat('KEYWORD', 'true')
return ('bool', True)
elif value == 'false':
self.eat('KEYWORD', 'false')
return ('bool', False)
else:
raise SyntaxError(f'Unexpected token in expression: {value}')
def parse_table(self):
self.eat('SYMBOL', '{')
fields = []
while self.peek()[1] != '}':
if fields:
self.eat('SYMBOL', ',')
fields.append(self.parse_expression())
self.eat('SYMBOL', '}')
return ('table', fields)
def get_precedence(self, op):
return {
'or': 1,
'and': 2,
'<': 3, '>': 3, '<=': 3, '>=': 3, '==': 3, '~=': 3,
'..': 4,
'+': 5, '-': 5,
'*': 6, '/': 6, '%': 6,
}.get(op, -1)
# --- Test ---
if __name__ == '__main__':
code = """
local x = 1
local y = x + 2 * 3
function hello(a, b)
if a > b then
return a
elseif b > a then
return b
else
return 0
end
end
while x < 10 do
x = x + 1
print(x)
end
"""
tokens = tokenize(code)
parser = LuaParser(tokens)
ast = parser.parse()
from pprint import pprint
pprint(ast)