]>
Commit | Line | Data |
---|---|---|
933e60e3 JW |
1 | function chartonumber(char) |
2 | local chartable = { ["0"] = 0, ["1"] = 1, ["2"] = 2, ["3"] = 3, ["4"] = 4, ["5"] = 5, | |
3 | ["6"] = 6, ["7"] = 7, ["8"] = 8, ["9"] = 9 } | |
4 | return chartable[char] | |
5 | end | |
6 | ||
7 | function iswhitespace(char) | |
8 | local whitespace = { [" "] = true, ["\r"] = true, ["\n"] = true, ["\t"] = true, [""] = true} | |
9 | return whitespace[char] | |
10 | end | |
11 | ||
12 | function isidentifierchar(char) | |
13 | if chartonumber(char) then | |
14 | return true | |
15 | end | |
16 | if (char:byte(1) >= ("A"):byte(1) and char:byte(1) <= ("Z"):byte(1)) or | |
17 | (char:byte(1) >= ("a"):byte(1) and char:byte(1) <= ("z"):byte(1)) or | |
18 | char == "_" then | |
19 | return true | |
20 | end | |
21 | return false | |
22 | end | |
23 | ||
24 | function readToken(input) | |
25 | local token = {} | |
26 | local keywords = {"if", "int", "(", ")", "{", "}", ";", ",", "+", "-", "*", "/", "while", "<", ">", "==", "=", "return"} | |
27 | ||
28 | -- strip off whitespace from the input | |
29 | while iswhitespace(input:sub(1,1)) and input:len() > 0 do | |
30 | input = input:sub(2) | |
31 | end | |
32 | ||
33 | if input:len() == 0 then | |
34 | return "", nil | |
35 | end | |
36 | ||
37 | for i,keyword in pairs(keywords) do | |
38 | if input:sub(1,keyword:len()) == keyword then | |
39 | input = input:sub(keyword:len() + 1) | |
40 | token.type = keyword | |
41 | return input,token | |
42 | end | |
43 | end | |
44 | ||
45 | -- okay, let's try to tokenize a number | |
46 | if chartonumber(input:sub(1,1)) then | |
47 | token.type = "number" | |
48 | token.value = 0 | |
49 | while chartonumber(input:sub(1,1)) do | |
50 | token.value = token.value*10 + chartonumber(input:sub(1,1)) | |
51 | input = input:sub(2) | |
52 | end | |
53 | if not iswhitespace(input:sub(1,1)) | |
54 | and input:sub(1,1) ~= ")" | |
55 | and input:sub(1,1) ~= "}" | |
56 | and input:sub(1,1) ~= ";" | |
57 | and input:sub(1,1) ~= "+" | |
58 | and input:sub(1,1) ~= "," | |
59 | and input:sub(1,1) ~= "-" then | |
60 | error("expected one of whitespace, ), }, ;, +, - after number; got "..input:sub(1,1)) | |
61 | end | |
62 | return input,token | |
63 | end | |
64 | ||
65 | -- ok, let's try to tokenize an identifier now. | |
66 | if isidentifierchar(input:sub(1,1)) then | |
67 | token.type = "identifier" | |
68 | token.value = "" | |
69 | while isidentifierchar(input:sub(1,1)) do | |
70 | token.value = token.value .. input:sub(1,1) | |
71 | input = input:sub(2) | |
72 | end | |
73 | return input,token | |
74 | end | |
75 | ||
76 | error("invalid character to start token: "..input:sub(1,1).." ("..input:byte(1)..")") | |
77 | end | |
78 | ||
79 | function tokenize(input) | |
80 | local tokenlist = {} | |
81 | while input:len() > 0 do | |
82 | local token | |
83 | input,token = readToken(input) | |
84 | table.insert(tokenlist, token) | |
85 | end | |
86 | return tokenlist | |
87 | end |