]> Joshua Wise's Git repositories - jwcc.git/blame - lib/tokenizer/tokenizer.lua
update grammar a bit
[jwcc.git] / lib / tokenizer / tokenizer.lua
CommitLineData
933e60e3
JW
1function chartonumber(char)
2 local chartable = { ["0"] = 0, ["1"] = 1, ["2"] = 2, ["3"] = 3, ["4"] = 4, ["5"] = 5,
3 ["6"] = 6, ["7"] = 7, ["8"] = 8, ["9"] = 9 }
4 return chartable[char]
5end
6
7function iswhitespace(char)
8 local whitespace = { [" "] = true, ["\r"] = true, ["\n"] = true, ["\t"] = true, [""] = true}
9 return whitespace[char]
10end
11
12function isidentifierchar(char)
13 if chartonumber(char) then
14 return true
15 end
16 if (char:byte(1) >= ("A"):byte(1) and char:byte(1) <= ("Z"):byte(1)) or
17 (char:byte(1) >= ("a"):byte(1) and char:byte(1) <= ("z"):byte(1)) or
18 char == "_" then
19 return true
20 end
21 return false
22end
23
24function readToken(input)
25 local token = {}
26 local keywords = {"if", "int", "(", ")", "{", "}", ";", ",", "+", "-", "*", "/", "while", "<", ">", "==", "=", "return"}
27
28 -- strip off whitespace from the input
29 while iswhitespace(input:sub(1,1)) and input:len() > 0 do
30 input = input:sub(2)
31 end
32
33 if input:len() == 0 then
34 return "", nil
35 end
36
37 for i,keyword in pairs(keywords) do
38 if input:sub(1,keyword:len()) == keyword then
39 input = input:sub(keyword:len() + 1)
40 token.type = keyword
41 return input,token
42 end
43 end
44
45 -- okay, let's try to tokenize a number
46 if chartonumber(input:sub(1,1)) then
47 token.type = "number"
48 token.value = 0
49 while chartonumber(input:sub(1,1)) do
50 token.value = token.value*10 + chartonumber(input:sub(1,1))
51 input = input:sub(2)
52 end
53 if not iswhitespace(input:sub(1,1))
54 and input:sub(1,1) ~= ")"
55 and input:sub(1,1) ~= "}"
56 and input:sub(1,1) ~= ";"
57 and input:sub(1,1) ~= "+"
58 and input:sub(1,1) ~= ","
59 and input:sub(1,1) ~= "-" then
60 error("expected one of whitespace, ), }, ;, +, - after number; got "..input:sub(1,1))
61 end
62 return input,token
63 end
64
65 -- ok, let's try to tokenize an identifier now.
66 if isidentifierchar(input:sub(1,1)) then
67 token.type = "identifier"
68 token.value = ""
69 while isidentifierchar(input:sub(1,1)) do
70 token.value = token.value .. input:sub(1,1)
71 input = input:sub(2)
72 end
73 return input,token
74 end
75
76 error("invalid character to start token: "..input:sub(1,1).." ("..input:byte(1)..")")
77end
78
79function tokenize(input)
80 local tokenlist = {}
81 while input:len() > 0 do
82 local token
83 input,token = readToken(input)
84 table.insert(tokenlist, token)
85 end
86 return tokenlist
87end
This page took 0.034896 seconds and 4 git commands to generate.