function gettoken(tokenlist) local token = table.remove(tokenlist, 1) return token end function peektoken(tokenlist, n) if not n then n = 1 end local token = tokenlist[n] return token end function expecttoken(token, expected) if not token then error("unexpected EOF", 1) end for k,v in pairs(expected) do if token.type == v then return token end end error("unexpected token "..token.type,1) end function parser_global(tokenlist, parsetree) -- GLOBAL = (VARIABLE | FUNCTION) local token = peektoken(tokenlist) while token do if token.type == "int" then local subtoken expecttoken(peektoken(tokenlist, 2), {"identifier"}) subtoken = peektoken(tokenlist, 3) if not subtoken then error("unexpected EOF") end if subtoken.type == "(" then table.insert(parsetree, parser_function(tokenlist)) elseif subtoken.type == ";" or subtoken.type == "=" then table.insert(parsetree, parser_variable(tokenlist)) else error("unexpected token "..subtoken.type.." following 'int '") end else error("unexpected token "..token.type) end token = peektoken(tokenlist) end end function parser_function(tokenlist) -- FUNCTION = TYPE IDENTIFIER ARGSLIST STMTLIST local myfunction = {} myfunction.type = "function" myfunction.returntype = parser_type(tokenlist) local token = expecttoken(gettoken(tokenlist), {"identifier"}) myfunction.name = token.value myfunction.args = parser_argslist(tokenlist) myfunction.body = parser_stmtlist(tokenlist) return myfunction end function parser_type(tokenlist) -- TYPE = INT return expecttoken(gettoken(tokenlist), { "int" }).type end function parser_istype(tokenlist) if peektoken(tokenlist).type == "int" then return true end return false end function parser_argslist(tokenlist) -- ARGSLIST = ( [TYPE IDENTIFIER [, TYPE IDENTIFIER]*] ) local token local argslist = {} local firstarg = true expecttoken(gettoken(tokenlist), { "(" }) while true do local type, ident token = peektoken(tokenlist) if token.type == ")" then gettoken(tokenlist) return argslist end if not firstarg then expecttoken(gettoken(tokenlist), { "," }) token = peektoken(tokenlist) end firstarg = false type = parser_type(tokenlist) token = expecttoken(gettoken(tokenlist), { "identifier" }) ident = token.value table.insert(argslist, { type = type, name = ident }) end end function parser_stmtlist(tokenlist) -- STMTLIST = { [( VARIABLE | STATEMENT | STMTLIST)]* } local token local stmtlist = {} stmtlist.type = "stmtlist" stmtlist.body = {} expecttoken(gettoken(tokenlist), {"{"}) while true do token = peektoken(tokenlist) if token.type == "}" then gettoken(tokenlist) return stmtlist end if parser_istype(tokenlist) then table.insert(stmtlist.body, parser_variable(tokenlist)) elseif token.type == "{" then table.insert(stmtlist.body, parser_stmtlist(tokenlist)) else table.insert(stmtlist.body, parser_statement(tokenlist)) end end end function parser_variable(tokenlist) -- VARIABLE = TYPE IDENTIFIER [ = TYPEINITIALIZER ] ; local variable = {} variable.type = "variable" variable.vtype = parser_type(tokenlist) variable.name = expecttoken(gettoken(tokenlist), { "identifier" }).value expecttoken(peektoken(tokenlist), { ";", "=" }) local token = gettoken(tokenlist) if token.type == ";" then return variable end if token.type == '=' then if variable.vtype == "int" then variable.initializer = parser_getinitializer_int(tokenlist) else error("internal error") end expecttoken(gettoken(tokenlist), { ";" }) return variable end error("internal error") end function parser_getinitializer_int(tokenlist) return expecttoken(gettoken(tokenlist), { "number" }).value end function parser_statement(tokenlist) -- STATEMENT = ; | EXPRESSION ; | (WHILE | IF) local token = peektoken(tokenlist) if token.type == ";" then return nil elseif token.type == "while" then return parser_while(tokenlist) elseif token.type == "if" then return parser_if(tokenlist) elseif token.type == "return" then return parser_return(tokenlist) else local result = parser_expression(tokenlist) expecttoken(gettoken(tokenlist), { ";" }) return result end end function parser_while(tokenlist) -- WHILE = WHILE ( EXPRESSION ) CHUNK local result = {} result.type = "while" expecttoken(gettoken(tokenlist), { "while" }) expecttoken(gettoken(tokenlist), { "(" }) result.expression = parser_expression(tokenlist) expecttoken(gettoken(tokenlist), { ")" }) result.chunk = parser_chunk(tokenlist) return result end function parser_if(tokenlist) -- IF = IF ( EXPRESSION ) CHUNK local result = {} result.type = "if" expecttoken(gettoken(tokenlist), { "if" }) expecttoken(gettoken(tokenlist), { "(" }) result.expression = parser_expression(tokenlist) expecttoken(gettoken(tokenlist), { ")" }) result.chunk = parser_chunk(tokenlist) return result end function parser_return(tokenlist) -- RETURN [EXPRESSION] ; local result = {} result.type = "return" expecttoken(gettoken(tokenlist), { "return" }) if peektoken(tokenlist).type == ";" then gettoken(tokenlist) return result end result.expression = parser_expression(tokenlist) expecttoken(gettoken(tokenlist), { ";" }) return result end function parser_chunk(tokenlist) local token = peektoken(tokenlist) if token.type == "{" then return parser_stmtlist(tokenlist) else return parser_statement(tokenlist) end end function parser_expression(tokenlist) -- we support: -- ( ) + - < > == = -- of those, ( ) has precedence 0 -- + - has precedence 3 -- < > has precedence 5 -- == has precedence 6 -- = has precedence 13 local operators = { ["*"] = { precedence = 2, associative = "left" }, ["/"] = { precedence = 2, associative = "left" }, ["+"] = { precedence = 3, associative = "left" }, ["-"] = { precedence = 3, associative = "left" }, ["<"] = { precedence = 5, associative = "left" }, [">"] = { precedence = 5, associative = "left" }, ["=="] ={ precedence = 6, associative = "left" }, ["="] = { precedence = 13, associative = "left" } } -- Shunting yard algorithm local outstack = {} local opstack = {} local addopertooutstack = function(oper) assert(table.getn(outstack) >= 2, "attempted to add operator "..oper.value.." to stack containing "..table.getn(outstack).." elements") oper.right = table.remove(outstack, table.getn(outstack)) oper.left = table.remove(outstack, table.getn(outstack)) table.insert(outstack, oper) end while true do local token = peektoken(tokenlist) if token.type == "identifier" or token.type == "number" then gettoken(tokenlist) if peektoken(tokenlist).type == "(" then -- function call guyz! gettoken(tokenlist) local outp = { type = "funccall", name = token.value, args = {} } local token = peektoken(tokenlist) while token.type ~= ")" do table.insert(outp.args, parser_expression(tokenlist)) token = expecttoken(peektoken(tokenlist), { ")", "," }) if token.type == "," then gettoken(tokenlist) end end gettoken(tokenlist) table.insert(outstack, outp) else table.insert(outstack, { type = token.type, value = token.value }) end elseif token.type == "(" then gettoken(tokenlist) local outp = parser_expression(tokenlist) expecttoken(gettoken(tokenlist), { ")" }) table.insert(outstack, outp) elseif operators[token.type] then gettoken(tokenlist) while table.getn(opstack) > 0 and ((operators[token.type].associative == "left" and operators[token.type].precedence >= operators[opstack[table.getn(opstack)].value].precedence) or (operators[token.type].associative == "right" and operators[token.type].precedence > operators[opstack[table.getn(opstack)].value].precedence)) do addopertooutstack(table.remove(opstack, table.getn(opstack))) end table.insert(opstack, { type = "operator", value = token.type }) elseif token.type == ")" or token.type == ";" or token.type == "," then break end end while table.getn(opstack) > 0 do addopertooutstack(table.remove(opstack, table.getn(opstack))) end assert(table.getn(outstack) == 1, "wrong number of objects ("..table.getn(outstack)..") on output stack after expression parse") return { type = "expression", value = outstack[1] } end function parse(tokenlist) local parsetree = {} parser_global(tokenlist, parsetree) return parsetree end