]>
Commit | Line | Data |
---|---|---|
5c79bb68 | 1 | (* L5 Compiler |
12aa4087 JW |
2 | * Lexer |
3 | * Author: Kaustuv Chaudhuri <kaustuv+@cs.cmu.edu> | |
4 | * Modified: Frank Pfenning <fp@cs.cmu.edu> | |
0a24e44d JW |
5 | * Modified: Chris Lu <czl@andrew.cmu.edu> |
6 | * Modified: Joshua Wise <jwise@andrew.cmu.edu> | |
12aa4087 JW |
7 | *) |
8 | ||
9 | structure A = Ast | |
10 | structure S = Symbol | |
11 | ||
12 | type pos = int | |
13 | type svalue = Tokens.svalue | |
14 | type ('a,'b) token = ('a,'b) Tokens.token | |
15 | type lexresult = (svalue,pos) Tokens.token | |
16 | ||
17 | local | |
18 | val commentLevel = ref 0 | |
19 | val commentPos = ref 0 | |
2ab9671f JW |
20 | val inString = ref false |
21 | val stringPos = ref 0 | |
22 | val stringAcc : string list ref = ref [] (* :( *) | |
12aa4087 JW |
23 | in |
24 | fun enterComment yypos = | |
25 | ( commentLevel := !commentLevel + 1 ; | |
26 | commentPos := yypos ) | |
27 | ||
28 | fun exitComment () = | |
29 | ( commentLevel := !commentLevel - 1 ; | |
30 | !commentLevel = 0 ) | |
31 | ||
32 | fun number (yyt, yyp) = | |
33 | let | |
34 | val ext = ParseState.ext (yyp, yyp + size yyt) | |
35 | val numOpt = Word32Signed.fromString yyt | |
36 | handle Overflow => | |
37 | ( ErrorMsg.error ext | |
38 | ("integral constant `" ^ yyt ^ "' too large") ; | |
39 | NONE ) | |
40 | in | |
41 | case numOpt | |
42 | of NONE => ( ErrorMsg.error ext | |
43 | ("cannot parse integral constant `" ^ yyt ^ "'"); | |
44 | Tokens.INTNUM (Word32Signed.ZERO, yyp, yyp + size yyt) ) | |
45 | | SOME n => Tokens.INTNUM (n,yyp,yyp + size yyt) | |
46 | end | |
5c79bb68 JW |
47 | fun hexnumber (yyt, yyp) = |
48 | let | |
49 | val t = String.extract (yyt, 2, NONE) | |
50 | val ext = ParseState.ext (yyp, yyp + size yyt) | |
51 | val numOpt = StringCvt.scanString (Word32.scan StringCvt.HEX) t | |
52 | handle Overflow => | |
53 | ( ErrorMsg.error ext | |
54 | ("integral constant `" ^ yyt ^ "' too large") ; | |
55 | NONE ) | |
56 | in | |
57 | case numOpt | |
58 | of NONE => ( ErrorMsg.error ext | |
59 | ("cannot parse integral constant `" ^ yyt ^ "'"); | |
60 | Tokens.INTNUM (Word32Signed.ZERO, yyp, yyp + size yyt) ) | |
61 | | SOME n => Tokens.INTNUM (n,yyp,yyp + size yyt) | |
62 | end | |
12aa4087 JW |
63 | |
64 | fun eof () = | |
65 | ( if (!commentLevel > 0) | |
66 | then (ErrorMsg.error (ParseState.ext (!commentPos,!commentPos)) "unterminated comment") | |
67 | else (); | |
2ab9671f JW |
68 | if (!inString) |
69 | then (ErrorMsg.error (ParseState.ext (!stringPos,!stringPos)) "unterminated string") | |
70 | else (); | |
12aa4087 JW |
71 | Tokens.EOF (0,0) ) (* bogus position information; unused *) |
72 | ||
2ab9671f JW |
73 | fun newString yyp = ( inString := true; stringPos := yyp; stringAcc := [] ) |
74 | fun endString yyp = ( Tokens.STRING (concat (rev (!stringAcc)), !stringPos, yyp+1) ) | |
75 | fun addString yyt = ( inString := false; stringAcc := yyt :: (!stringAcc) ) | |
12aa4087 JW |
76 | end |
77 | ||
78 | %% | |
5c79bb68 | 79 | %header (functor L5LexFn(structure Tokens : L5_TOKENS)); |
12aa4087 | 80 | %full |
2ab9671f | 81 | %s COMMENT COMMENT_LINE STRING; |
12aa4087 JW |
82 | |
83 | id = [A-Za-z_][A-Za-z0-9_]*; | |
84 | decnum = [0-9][0-9]*; | |
5c79bb68 | 85 | hexnum = 0x[0-9a-fA-F][0-9a-fA-F]*; |
12aa4087 JW |
86 | |
87 | ws = [\ \t\012]; | |
88 | ||
89 | %% | |
90 | ||
91 | <INITIAL> {ws}+ => (lex ()); | |
92 | <INITIAL> \n => (ParseState.newline(yypos); lex()); | |
93 | ||
94 | <INITIAL> "{" => (Tokens.LBRACE (yypos, yypos + size yytext)); | |
95 | <INITIAL> "}" => (Tokens.RBRACE (yypos, yypos + size yytext)); | |
96 | <INITIAL> "(" => (Tokens.LPAREN (yypos, yypos + size yytext)); | |
97 | <INITIAL> ")" => (Tokens.RPAREN (yypos, yypos + size yytext)); | |
98 | ||
99 | <INITIAL> ";" => (Tokens.SEMI (yypos, yypos + size yytext)); | |
100 | ||
101 | <INITIAL> "=" => (Tokens.ASSIGN (yypos, yypos + size yytext)); | |
102 | <INITIAL> "+=" => (Tokens.PLUSEQ (yypos, yypos + size yytext)); | |
103 | <INITIAL> "-=" => (Tokens.MINUSEQ (yypos, yypos + size yytext)); | |
104 | <INITIAL> "*=" => (Tokens.STAREQ (yypos, yypos + size yytext)); | |
105 | <INITIAL> "/=" => (Tokens.SLASHEQ (yypos, yypos + size yytext)); | |
106 | <INITIAL> "%=" => (Tokens.PERCENTEQ (yypos, yypos + size yytext)); | |
0a24e44d JW |
107 | <INITIAL> "<<=" => (Tokens.LSHEQ (yypos, yypos + size yytext)); |
108 | <INITIAL> ">>=" => (Tokens.RSHEQ (yypos, yypos + size yytext)); | |
109 | <INITIAL> "&=" => (Tokens.BITANDEQ (yypos, yypos + size yytext)); | |
110 | <INITIAL> "^=" => (Tokens.BITXOREQ (yypos, yypos + size yytext)); | |
111 | <INITIAL> "|=" => (Tokens.BITOREQ (yypos, yypos + size yytext)); | |
12aa4087 | 112 | |
5c79bb68 JW |
113 | <INITIAL> "++" => (Tokens.PLUSPLUS (yypos, yypos + size yytext)); |
114 | <INITIAL> "--" => (Tokens.MINUSMINUS (yypos, yypos + size yytext)); | |
115 | ||
12aa4087 JW |
116 | <INITIAL> "+" => (Tokens.PLUS (yypos, yypos + size yytext)); |
117 | <INITIAL> "-" => (Tokens.MINUS (yypos, yypos + size yytext)); | |
0a24e44d | 118 | <INITIAL> "!" => (Tokens.BANG (yypos, yypos + size yytext)); |
12aa4087 JW |
119 | <INITIAL> "*" => (Tokens.STAR (yypos, yypos + size yytext)); |
120 | <INITIAL> "/" => (Tokens.SLASH (yypos, yypos + size yytext)); | |
121 | <INITIAL> "%" => (Tokens.PERCENT (yypos, yypos + size yytext)); | |
0a24e44d JW |
122 | <INITIAL> "<<" => (Tokens.LSH (yypos, yypos + size yytext)); |
123 | <INITIAL> ">>" => (Tokens.RSH (yypos, yypos + size yytext)); | |
124 | <INITIAL> "||" => (Tokens.LOGOR (yypos, yypos + size yytext)); | |
125 | <INITIAL> "&&" => (Tokens.LOGAND (yypos, yypos + size yytext)); | |
126 | <INITIAL> "&" => (Tokens.BITAND (yypos, yypos + size yytext)); | |
127 | <INITIAL> "^" => (Tokens.BITXOR (yypos, yypos + size yytext)); | |
128 | <INITIAL> "|" => (Tokens.BITOR (yypos, yypos + size yytext)); | |
129 | <INITIAL> "~" => (Tokens.BITNOT (yypos, yypos + size yytext)); | |
130 | <INITIAL> "==" => (Tokens.EQ (yypos, yypos + size yytext)); | |
131 | <INITIAL> "!=" => (Tokens.NEQ (yypos, yypos + size yytext)); | |
132 | <INITIAL> "<" => (Tokens.LT (yypos, yypos + size yytext)); | |
133 | <INITIAL> "<=" => (Tokens.LE (yypos, yypos + size yytext)); | |
134 | <INITIAL> ">=" => (Tokens.GE (yypos, yypos + size yytext)); | |
135 | <INITIAL> ">" => (Tokens.GT (yypos, yypos + size yytext)); | |
12aa4087 | 136 | |
5c79bb68 | 137 | <INITIAL> "?" => (Tokens.QUESTION (yypos, yypos + size yytext)); |
6ade8b0a JW |
138 | <INITIAL> ":" => (Tokens.COLON (yypos, yypos + size yytext)); |
139 | <INITIAL> "," => (Tokens.COMMA (yypos, yypos + size yytext)); | |
140 | ||
1144856b JW |
141 | <INITIAL> "[" => (Tokens.LBRACKET (yypos, yypos + size yytext)); |
142 | <INITIAL> "]" => (Tokens.RBRACKET (yypos, yypos + size yytext)); | |
143 | <INITIAL> "->" => (Tokens.ARROW (yypos, yypos + size yytext)); | |
144 | <INITIAL> "." => (Tokens.DOT (yypos, yypos + size yytext)); | |
145 | ||
12aa4087 | 146 | <INITIAL> "return" => (Tokens.RETURN (yypos, yypos + size yytext)); |
0a24e44d JW |
147 | <INITIAL> "if" => (Tokens.IF (yypos, yypos + size yytext)); |
148 | <INITIAL> "while" => (Tokens.WHILE (yypos, yypos + size yytext)); | |
149 | <INITIAL> "for" => (Tokens.FOR (yypos, yypos + size yytext)); | |
150 | <INITIAL> "continue" => (Tokens.CONTINUE (yypos, yypos + size yytext)); | |
151 | <INITIAL> "break" => (Tokens.BREAK (yypos, yypos + size yytext)); | |
152 | <INITIAL> "else" => (Tokens.ELSE (yypos, yypos + size yytext)); | |
6ade8b0a JW |
153 | <INITIAL> "var" => (Tokens.VAR (yypos, yypos + size yytext)); |
154 | <INITIAL> "int" => (Tokens.INT (yypos, yypos + size yytext)); | |
2ab9671f | 155 | <INITIAL> "string" => (Tokens.TSTRING (yypos, yypos + size yytext)); |
6ade8b0a | 156 | <INITIAL> "extern" => (Tokens.EXTERN (yypos, yypos + size yytext)); |
1144856b JW |
157 | <INITIAL> "struct" => (Tokens.STRUCT (yypos, yypos + size yytext)); |
158 | <INITIAL> "NULL" => (Tokens.NULL (yypos, yypos + size yytext)); | |
159 | <INITIAL> "new" => (Tokens.NEW (yypos, yypos + size yytext)); | |
6ade8b0a | 160 | |
12aa4087 JW |
161 | |
162 | <INITIAL> {decnum} => (number (yytext, yypos)); | |
5c79bb68 | 163 | <INITIAL> {hexnum} => (hexnumber (yytext, yypos)); |
12aa4087 JW |
164 | |
165 | <INITIAL> {id} => (let | |
166 | val id = Symbol.symbol yytext | |
167 | in | |
168 | Tokens.IDENT (id, yypos, yypos + size yytext) | |
169 | end); | |
170 | ||
171 | <INITIAL> "/*" => (YYBEGIN COMMENT; enterComment yypos; lex()); | |
172 | <INITIAL> "*/" => (ErrorMsg.error (ParseState.ext (yypos, yypos)) "unbalanced comments"; | |
173 | lex()); | |
174 | ||
175 | <INITIAL> "//" => (YYBEGIN COMMENT_LINE; lex()); | |
176 | <INITIAL> "#" => (YYBEGIN COMMENT_LINE; lex()); | |
2ab9671f | 177 | <INITIAL> "\"" => (YYBEGIN STRING; newString yypos ; lex () ); |
12aa4087 JW |
178 | <INITIAL> . => (ErrorMsg.error (ParseState.ext (yypos,yypos)) |
179 | ("illegal character: \"" ^ yytext ^ "\""); | |
180 | lex ()); | |
181 | ||
2ab9671f | 182 | |
12aa4087 JW |
183 | <COMMENT> "/*" => (enterComment yypos; lex()); |
184 | <COMMENT> "*/" => (if exitComment () then YYBEGIN INITIAL else (); lex()); | |
185 | <COMMENT> \n => (ParseState.newline yypos; lex ()); | |
186 | <COMMENT> . => (lex()); | |
187 | ||
188 | <COMMENT_LINE> \n => (ParseState.newline yypos; YYBEGIN INITIAL; lex()); | |
189 | <COMMENT_LINE> . => (lex()); | |
2ab9671f JW |
190 | |
191 | <STRING> [^\"\\]* => (addString yytext ; lex() ); | |
192 | <STRING> "\"" => (YYBEGIN INITIAL; endString yypos ); |