]> Joshua Wise's Git repositories - jwcc.git/blobdiff - lib/parser/oldstyle.lua
update grammar a bit
[jwcc.git] / lib / parser / oldstyle.lua
diff --git a/lib/parser/oldstyle.lua b/lib/parser/oldstyle.lua
new file mode 100644 (file)
index 0000000..66a380a
--- /dev/null
@@ -0,0 +1,303 @@
+function gettoken(tokenlist)
+       local token = table.remove(tokenlist, 1)
+       return token
+end
+
+function peektoken(tokenlist, n)
+       if not n then
+               n = 1
+       end
+       local token = tokenlist[n]
+       return token
+end
+
+function expecttoken(token, expected)
+       if not token then
+               error("unexpected EOF", 1)
+       end
+       for k,v in pairs(expected) do
+               if token.type == v then
+                       return token
+               end
+       end
+       error("unexpected token "..token.type,1)
+end
+
+function parser_global(tokenlist, parsetree)
+       -- GLOBAL = (VARIABLE | FUNCTION)
+       local token = peektoken(tokenlist)
+       while token do
+               if token.type == "int" then
+                       local subtoken
+                       expecttoken(peektoken(tokenlist, 2), {"identifier"})
+                       subtoken = peektoken(tokenlist, 3)
+                       if not subtoken then
+                               error("unexpected EOF")
+                       end
+                       if subtoken.type == "(" then
+                               table.insert(parsetree, parser_function(tokenlist))
+                       elseif subtoken.type == ";" or subtoken.type == "=" then
+                               table.insert(parsetree, parser_variable(tokenlist))
+                       else
+                               error("unexpected token "..subtoken.type.." following 'int <identifier>'")
+                       end
+               else
+                       error("unexpected token "..token.type)
+               end
+               token = peektoken(tokenlist)
+       end
+end
+
+function parser_function(tokenlist)
+       -- FUNCTION = TYPE IDENTIFIER ARGSLIST STMTLIST
+       local myfunction = {}
+       
+       myfunction.type = "function"
+       myfunction.returntype = parser_type(tokenlist)
+       local token = expecttoken(gettoken(tokenlist), {"identifier"})
+       myfunction.name = token.value
+       myfunction.args = parser_argslist(tokenlist)
+       myfunction.body = parser_stmtlist(tokenlist)
+       return myfunction
+end
+
+function parser_type(tokenlist)
+       -- TYPE = INT
+       return expecttoken(gettoken(tokenlist), { "int" }).type
+end
+
+function parser_istype(tokenlist)
+       if peektoken(tokenlist).type == "int" then
+               return true
+       end
+       return false
+end
+
+function parser_argslist(tokenlist)
+       -- ARGSLIST = ( [TYPE IDENTIFIER [, TYPE IDENTIFIER]*] )
+       local token
+       local argslist = {}
+       local firstarg = true
+       expecttoken(gettoken(tokenlist), { "(" })
+       while true do
+               local type, ident
+               token = peektoken(tokenlist)
+               if token.type == ")" then
+                       gettoken(tokenlist)
+                       return argslist
+               end
+               if not firstarg then
+                       expecttoken(gettoken(tokenlist), { "," })
+                       token = peektoken(tokenlist)
+               end
+               firstarg = false
+               type = parser_type(tokenlist)
+               token = expecttoken(gettoken(tokenlist), { "identifier" })
+               ident = token.value
+               table.insert(argslist, { type = type, name = ident })
+       end
+end
+
+function parser_stmtlist(tokenlist)
+       -- STMTLIST = { [( VARIABLE | STATEMENT | STMTLIST)]* }
+       local token
+       local stmtlist = {}
+       
+       stmtlist.type = "stmtlist"
+       stmtlist.body = {}
+       expecttoken(gettoken(tokenlist), {"{"})
+       while true do
+               token = peektoken(tokenlist)
+               if token.type == "}" then
+                       gettoken(tokenlist)
+                       return stmtlist
+               end
+               if parser_istype(tokenlist) then
+                       table.insert(stmtlist.body, parser_variable(tokenlist))
+               elseif token.type == "{" then
+                       table.insert(stmtlist.body, parser_stmtlist(tokenlist))
+               else
+                       table.insert(stmtlist.body, parser_statement(tokenlist))
+               end
+       end
+end
+
+function parser_variable(tokenlist)
+       -- VARIABLE = TYPE IDENTIFIER [ = TYPEINITIALIZER ] ;
+       local variable = {}
+       variable.type = "variable"
+       variable.vtype = parser_type(tokenlist)
+       variable.name = expecttoken(gettoken(tokenlist), { "identifier" }).value
+       expecttoken(peektoken(tokenlist), { ";", "=" })
+       local token = gettoken(tokenlist)
+       if token.type == ";" then
+               return variable
+       end
+       if token.type == '=' then
+               if variable.vtype == "int" then
+                       variable.initializer = parser_getinitializer_int(tokenlist)
+               else
+                       error("internal error")
+               end
+               expecttoken(gettoken(tokenlist), { ";" })
+               return variable
+       end
+       error("internal error")
+end
+
+function parser_getinitializer_int(tokenlist)
+       return expecttoken(gettoken(tokenlist), { "number" }).value
+end
+
+function parser_statement(tokenlist)
+       -- STATEMENT = ; | EXPRESSION ; | (WHILE | IF)
+       local token = peektoken(tokenlist)
+       if token.type == ";" then
+               return nil
+       elseif token.type == "while" then
+               return parser_while(tokenlist)
+       elseif token.type == "if" then
+               return parser_if(tokenlist)
+       elseif token.type == "return" then
+               return parser_return(tokenlist)
+       else
+               local result = parser_expression(tokenlist)
+               expecttoken(gettoken(tokenlist), { ";" })
+               return result
+       end
+end
+
+function parser_while(tokenlist)
+       -- WHILE = WHILE ( EXPRESSION ) CHUNK
+       local result = {}
+       result.type = "while"
+       expecttoken(gettoken(tokenlist), { "while" })
+       expecttoken(gettoken(tokenlist), { "(" })
+       result.expression = parser_expression(tokenlist)
+       expecttoken(gettoken(tokenlist), { ")" })
+       result.chunk = parser_chunk(tokenlist)
+       return result
+end
+
+function parser_if(tokenlist)
+       -- IF = IF ( EXPRESSION ) CHUNK
+       local result = {}
+       result.type = "if"
+       expecttoken(gettoken(tokenlist), { "if" })
+       expecttoken(gettoken(tokenlist), { "(" })
+       result.expression = parser_expression(tokenlist)
+       expecttoken(gettoken(tokenlist), { ")" })
+       result.chunk = parser_chunk(tokenlist)
+       return result
+end
+
+function parser_return(tokenlist)
+       -- RETURN [EXPRESSION] ;
+       local result = {}
+       result.type = "return"
+       expecttoken(gettoken(tokenlist), { "return" })
+       if peektoken(tokenlist).type == ";" then
+               gettoken(tokenlist)
+               return result
+       end
+       result.expression = parser_expression(tokenlist)
+       expecttoken(gettoken(tokenlist), { ";" })
+       return result
+end
+
+function parser_chunk(tokenlist)
+       local token = peektoken(tokenlist)
+       if token.type == "{" then
+               return parser_stmtlist(tokenlist)
+       else
+               return parser_statement(tokenlist)
+       end
+end
+
+function parser_expression(tokenlist)
+       -- we support:
+       -- ( ) + - < > == =
+       -- of those, ( ) has precedence 0
+       -- + - has precedence 3
+       -- < > has precedence 5
+       -- == has precedence 6
+       -- = has precedence 13
+       
+       local operators = {
+               ["*"] = { precedence = 2, associative = "left" }, 
+               ["/"] = { precedence = 2, associative = "left" }, 
+               ["+"] = { precedence = 3, associative = "left" },
+               ["-"] = { precedence = 3, associative = "left" }, 
+               ["<"] = { precedence = 5, associative = "left" },
+               [">"] = { precedence = 5, associative = "left" },
+               ["=="] ={ precedence = 6, associative = "left" },
+               ["="] = { precedence = 13, associative = "left" }
+       }
+       
+       -- Shunting yard algorithm
+       local outstack = {}
+       local opstack = {}
+       
+       local addopertooutstack = function(oper)
+               assert(table.getn(outstack) >= 2, "attempted to add operator "..oper.value.." to stack containing "..table.getn(outstack).." elements")
+               oper.right = table.remove(outstack, table.getn(outstack))
+               oper.left = table.remove(outstack, table.getn(outstack))
+               table.insert(outstack, oper)
+       end
+       
+       while true do
+               local token = peektoken(tokenlist)
+               if token.type == "identifier" or token.type == "number" then
+                       gettoken(tokenlist)
+                       
+                       if peektoken(tokenlist).type == "(" then        -- function call guyz!
+                               gettoken(tokenlist)
+                               local outp = { type = "funccall", name = token.value, args = {} }
+                               local token = peektoken(tokenlist)
+                               while token.type ~= ")" do
+                                       table.insert(outp.args, parser_expression(tokenlist))
+                                       token = expecttoken(peektoken(tokenlist), { ")", "," })
+                                       if token.type == "," then
+                                               gettoken(tokenlist)
+                                       end
+                               end
+                               gettoken(tokenlist)
+                               
+                               table.insert(outstack, outp)
+                       else
+                               table.insert(outstack, { type = token.type, value = token.value })
+                       end
+               elseif token.type == "(" then
+                       gettoken(tokenlist)
+                       
+                       local outp = parser_expression(tokenlist)
+                       expecttoken(gettoken(tokenlist), { ")" })
+                       table.insert(outstack, outp)
+               elseif operators[token.type] then
+                       gettoken(tokenlist)
+                       
+                       while table.getn(opstack) > 0 and
+                                       ((operators[token.type].associative == "left" and
+                                         operators[token.type].precedence >= operators[opstack[table.getn(opstack)].value].precedence) or
+                                        (operators[token.type].associative == "right" and
+                                         operators[token.type].precedence > operators[opstack[table.getn(opstack)].value].precedence)) do
+                               addopertooutstack(table.remove(opstack, table.getn(opstack)))
+                       end
+                       table.insert(opstack, { type = "operator", value = token.type })
+               elseif token.type == ")" or token.type == ";" or token.type == "," then
+                       break
+               end
+       end
+       
+       while table.getn(opstack) > 0 do
+               addopertooutstack(table.remove(opstack, table.getn(opstack)))
+       end
+       assert(table.getn(outstack) == 1, "wrong number of objects ("..table.getn(outstack)..") on output stack after expression parse")
+       return { type = "expression", value = outstack[1] }
+end
+
+function parse(tokenlist)
+       local parsetree = {}
+       parser_global(tokenlist, parsetree)
+       return parsetree
+end
This page took 0.030399 seconds and 4 git commands to generate.