Initial commit
[jwcc.git] / lib / parser / parser.lua
1 function gettoken(tokenlist)
2         local token = table.remove(tokenlist, 1)
3         return token
4 end
5
6 function peektoken(tokenlist, n)
7         if not n then
8                 n = 1
9         end
10         local token = tokenlist[n]
11         return token
12 end
13
14 function expecttoken(token, expected)
15         if not token then
16                 error("unexpected EOF", 1)
17         end
18         for k,v in pairs(expected) do
19                 if token.type == v then
20                         return token
21                 end
22         end
23         error("unexpected token "..token.type,1)
24 end
25
26 function parser_global(tokenlist, parsetree)
27         -- GLOBAL = (VARIABLE | FUNCTION)
28         local token = peektoken(tokenlist)
29         while token do
30                 if token.type == "int" then
31                         local subtoken
32                         expecttoken(peektoken(tokenlist, 2), {"identifier"})
33                         subtoken = peektoken(tokenlist, 3)
34                         if not subtoken then
35                                 error("unexpected EOF")
36                         end
37                         if subtoken.type == "(" then
38                                 table.insert(parsetree, parser_function(tokenlist))
39                         elseif subtoken.type == ";" or subtoken.type == "=" then
40                                 table.insert(parsetree, parser_variable(tokenlist))
41                         else
42                                 error("unexpected token "..subtoken.type.." following 'int <identifier>'")
43                         end
44                 else
45                         error("unexpected token "..token.type)
46                 end
47                 token = peektoken(tokenlist)
48         end
49 end
50
51 function parser_function(tokenlist)
52         -- FUNCTION = TYPE IDENTIFIER ARGSLIST STMTLIST
53         local myfunction = {}
54         
55         myfunction.type = "function"
56         myfunction.returntype = parser_type(tokenlist)
57         local token = expecttoken(gettoken(tokenlist), {"identifier"})
58         myfunction.name = token.value
59         myfunction.args = parser_argslist(tokenlist)
60         myfunction.body = parser_stmtlist(tokenlist)
61         return myfunction
62 end
63
64 function parser_type(tokenlist)
65         -- TYPE = INT
66         return expecttoken(gettoken(tokenlist), { "int" }).type
67 end
68
69 function parser_istype(tokenlist)
70         if peektoken(tokenlist).type == "int" then
71                 return true
72         end
73         return false
74 end
75
76 function parser_argslist(tokenlist)
77         -- ARGSLIST = ( [TYPE IDENTIFIER [, TYPE IDENTIFIER]*] )
78         local token
79         local argslist = {}
80         local firstarg = true
81         expecttoken(gettoken(tokenlist), { "(" })
82         while true do
83                 local type, ident
84                 token = peektoken(tokenlist)
85                 if token.type == ")" then
86                         gettoken(tokenlist)
87                         return argslist
88                 end
89                 if not firstarg then
90                         expecttoken(gettoken(tokenlist), { "," })
91                         token = peektoken(tokenlist)
92                 end
93                 firstarg = false
94                 type = parser_type(tokenlist)
95                 token = expecttoken(gettoken(tokenlist), { "identifier" })
96                 ident = token.value
97                 table.insert(argslist, { type = type, name = ident })
98         end
99 end
100
101 function parser_stmtlist(tokenlist)
102         -- STMTLIST = { [( VARIABLE | STATEMENT | STMTLIST)]* }
103         local token
104         local stmtlist = {}
105         
106         stmtlist.type = "stmtlist"
107         stmtlist.body = {}
108         expecttoken(gettoken(tokenlist), {"{"})
109         while true do
110                 token = peektoken(tokenlist)
111                 if token.type == "}" then
112                         gettoken(tokenlist)
113                         return stmtlist
114                 end
115                 if parser_istype(tokenlist) then
116                         table.insert(stmtlist.body, parser_variable(tokenlist))
117                 elseif token.type == "{" then
118                         table.insert(stmtlist.body, parser_stmtlist(tokenlist))
119                 else
120                         table.insert(stmtlist.body, parser_statement(tokenlist))
121                 end
122         end
123 end
124
125 function parser_variable(tokenlist)
126         -- VARIABLE = TYPE IDENTIFIER [ = TYPEINITIALIZER ] ;
127         local variable = {}
128         variable.type = "variable"
129         variable.vtype = parser_type(tokenlist)
130         variable.name = expecttoken(gettoken(tokenlist), { "identifier" }).value
131         expecttoken(peektoken(tokenlist), { ";", "=" })
132         local token = gettoken(tokenlist)
133         if token.type == ";" then
134                 return variable
135         end
136         if token.type == '=' then
137                 if variable.vtype == "int" then
138                         variable.initializer = parser_getinitializer_int(tokenlist)
139                 else
140                         error("internal error")
141                 end
142                 expecttoken(gettoken(tokenlist), { ";" })
143                 return variable
144         end
145         error("internal error")
146 end
147
148 function parser_getinitializer_int(tokenlist)
149         return expecttoken(gettoken(tokenlist), { "number" }).value
150 end
151
152 function parser_statement(tokenlist)
153         -- STATEMENT = ; | EXPRESSION ; | (WHILE | IF)
154         local token = peektoken(tokenlist)
155         if token.type == ";" then
156                 return nil
157         elseif token.type == "while" then
158                 return parser_while(tokenlist)
159         elseif token.type == "if" then
160                 return parser_if(tokenlist)
161         elseif token.type == "return" then
162                 return parser_return(tokenlist)
163         else
164                 local result = parser_expression(tokenlist)
165                 expecttoken(gettoken(tokenlist), { ";" })
166                 return result
167         end
168 end
169
170 function parser_while(tokenlist)
171         -- WHILE = WHILE ( EXPRESSION ) CHUNK
172         local result = {}
173         result.type = "while"
174         expecttoken(gettoken(tokenlist), { "while" })
175         expecttoken(gettoken(tokenlist), { "(" })
176         result.expression = parser_expression(tokenlist)
177         expecttoken(gettoken(tokenlist), { ")" })
178         result.chunk = parser_chunk(tokenlist)
179         return result
180 end
181
182 function parser_if(tokenlist)
183         -- IF = IF ( EXPRESSION ) CHUNK
184         local result = {}
185         result.type = "if"
186         expecttoken(gettoken(tokenlist), { "if" })
187         expecttoken(gettoken(tokenlist), { "(" })
188         result.expression = parser_expression(tokenlist)
189         expecttoken(gettoken(tokenlist), { ")" })
190         result.chunk = parser_chunk(tokenlist)
191         return result
192 end
193
194 function parser_return(tokenlist)
195         -- RETURN [EXPRESSION] ;
196         local result = {}
197         result.type = "return"
198         expecttoken(gettoken(tokenlist), { "return" })
199         if peektoken(tokenlist).type == ";" then
200                 gettoken(tokenlist)
201                 return result
202         end
203         result.expression = parser_expression(tokenlist)
204         expecttoken(gettoken(tokenlist), { ";" })
205         return result
206 end
207
208 function parser_chunk(tokenlist)
209         local token = peektoken(tokenlist)
210         if token.type == "{" then
211                 return parser_stmtlist(tokenlist)
212         else
213                 return parser_statement(tokenlist)
214         end
215 end
216
217 function parser_expression(tokenlist)
218         -- we support:
219         -- ( ) + - < > == =
220         -- of those, ( ) has precedence 0
221         -- + - has precedence 3
222         -- < > has precedence 5
223         -- == has precedence 6
224         -- = has precedence 13
225         
226         local operators = {
227                 ["*"] = { precedence = 2, associative = "left" }, 
228                 ["/"] = { precedence = 2, associative = "left" }, 
229                 ["+"] = { precedence = 3, associative = "left" },
230                 ["-"] = { precedence = 3, associative = "left" }, 
231                 ["<"] = { precedence = 5, associative = "left" },
232                 [">"] = { precedence = 5, associative = "left" },
233                 ["=="] ={ precedence = 6, associative = "left" },
234                 ["="] = { precedence = 13, associative = "left" }
235         }
236         
237         -- Shunting yard algorithm
238         local outstack = {}
239         local opstack = {}
240         
241         local addopertooutstack = function(oper)
242                 assert(table.getn(outstack) >= 2, "attempted to add operator "..oper.value.." to stack containing "..table.getn(outstack).." elements")
243                 oper.right = table.remove(outstack, table.getn(outstack))
244                 oper.left = table.remove(outstack, table.getn(outstack))
245                 table.insert(outstack, oper)
246         end
247         
248         while true do
249                 local token = peektoken(tokenlist)
250                 if token.type == "identifier" or token.type == "number" then
251                         gettoken(tokenlist)
252                         
253                         if peektoken(tokenlist).type == "(" then        -- function call guyz!
254                                 gettoken(tokenlist)
255                                 local outp = { type = "funccall", name = token.value, args = {} }
256                                 local token = peektoken(tokenlist)
257                                 while token.type ~= ")" do
258                                         table.insert(outp.args, parser_expression(tokenlist))
259                                         token = expecttoken(peektoken(tokenlist), { ")", "," })
260                                         if token.type == "," then
261                                                 gettoken(tokenlist)
262                                         end
263                                 end
264                                 gettoken(tokenlist)
265                                 
266                                 table.insert(outstack, outp)
267                         else
268                                 table.insert(outstack, { type = token.type, value = token.value })
269                         end
270                 elseif token.type == "(" then
271                         gettoken(tokenlist)
272                         
273                         local outp = parser_expression(tokenlist)
274                         expecttoken(gettoken(tokenlist), { ")" })
275                         table.insert(outstack, outp)
276                 elseif operators[token.type] then
277                         gettoken(tokenlist)
278                         
279                         while table.getn(opstack) > 0 and
280                                         ((operators[token.type].associative == "left" and
281                                           operators[token.type].precedence >= operators[opstack[table.getn(opstack)].value].precedence) or
282                                          (operators[token.type].associative == "right" and
283                                           operators[token.type].precedence > operators[opstack[table.getn(opstack)].value].precedence)) do
284                                 addopertooutstack(table.remove(opstack, table.getn(opstack)))
285                         end
286                         table.insert(opstack, { type = "operator", value = token.type })
287                 elseif token.type == ")" or token.type == ";" or token.type == "," then
288                         break
289                 end
290         end
291         
292         while table.getn(opstack) > 0 do
293                 addopertooutstack(table.remove(opstack, table.getn(opstack)))
294         end
295         assert(table.getn(outstack) == 1, "wrong number of objects ("..table.getn(outstack)..") on output stack after expression parse")
296         return { type = "expression", value = outstack[1] }
297 end
298
299 function parse(tokenlist)
300         local parsetree = {}
301         parser_global(tokenlist, parsetree)
302         return parsetree
303 end
This page took 0.034297 seconds and 4 git commands to generate.