]> Joshua Wise's Git repositories - snipe.git/blame - parse/l5.lex
Add carriage return to string lexer.
[snipe.git] / parse / l5.lex
CommitLineData
5c79bb68 1(* L5 Compiler
12aa4087
JW
2 * Lexer
3 * Author: Kaustuv Chaudhuri <kaustuv+@cs.cmu.edu>
4 * Modified: Frank Pfenning <fp@cs.cmu.edu>
0a24e44d
JW
5 * Modified: Chris Lu <czl@andrew.cmu.edu>
6 * Modified: Joshua Wise <jwise@andrew.cmu.edu>
12aa4087
JW
7 *)
8
9structure A = Ast
10structure S = Symbol
11
12type pos = int
13type svalue = Tokens.svalue
14type ('a,'b) token = ('a,'b) Tokens.token
15type lexresult = (svalue,pos) Tokens.token
16
17local
18 val commentLevel = ref 0
19 val commentPos = ref 0
2ab9671f
JW
20 val inString = ref false
21 val stringPos = ref 0
22 val stringAcc : string list ref = ref [] (* :( *)
12aa4087
JW
23in
24 fun enterComment yypos =
25 ( commentLevel := !commentLevel + 1 ;
26 commentPos := yypos )
27
28 fun exitComment () =
29 ( commentLevel := !commentLevel - 1 ;
30 !commentLevel = 0 )
31
32 fun number (yyt, yyp) =
33 let
34 val ext = ParseState.ext (yyp, yyp + size yyt)
35 val numOpt = Word32Signed.fromString yyt
36 handle Overflow =>
37 ( ErrorMsg.error ext
38 ("integral constant `" ^ yyt ^ "' too large") ;
39 NONE )
40 in
41 case numOpt
42 of NONE => ( ErrorMsg.error ext
43 ("cannot parse integral constant `" ^ yyt ^ "'");
44 Tokens.INTNUM (Word32Signed.ZERO, yyp, yyp + size yyt) )
45 | SOME n => Tokens.INTNUM (n,yyp,yyp + size yyt)
46 end
5c79bb68
JW
47 fun hexnumber (yyt, yyp) =
48 let
49 val t = String.extract (yyt, 2, NONE)
50 val ext = ParseState.ext (yyp, yyp + size yyt)
51 val numOpt = StringCvt.scanString (Word32.scan StringCvt.HEX) t
52 handle Overflow =>
53 ( ErrorMsg.error ext
54 ("integral constant `" ^ yyt ^ "' too large") ;
55 NONE )
56 in
57 case numOpt
58 of NONE => ( ErrorMsg.error ext
59 ("cannot parse integral constant `" ^ yyt ^ "'");
60 Tokens.INTNUM (Word32Signed.ZERO, yyp, yyp + size yyt) )
61 | SOME n => Tokens.INTNUM (n,yyp,yyp + size yyt)
62 end
12aa4087
JW
63
64 fun eof () =
65 ( if (!commentLevel > 0)
66 then (ErrorMsg.error (ParseState.ext (!commentPos,!commentPos)) "unterminated comment")
67 else ();
2ab9671f
JW
68 if (!inString)
69 then (ErrorMsg.error (ParseState.ext (!stringPos,!stringPos)) "unterminated string")
70 else ();
12aa4087
JW
71 Tokens.EOF (0,0) ) (* bogus position information; unused *)
72
2ab9671f
JW
73 fun newString yyp = ( inString := true; stringPos := yyp; stringAcc := [] )
74 fun endString yyp = ( Tokens.STRING (concat (rev (!stringAcc)), !stringPos, yyp+1) )
75 fun addString yyt = ( inString := false; stringAcc := yyt :: (!stringAcc) )
12aa4087
JW
76end
77
78%%
5c79bb68 79%header (functor L5LexFn(structure Tokens : L5_TOKENS));
12aa4087 80%full
2ab9671f 81%s COMMENT COMMENT_LINE STRING;
12aa4087
JW
82
83id = [A-Za-z_][A-Za-z0-9_]*;
84decnum = [0-9][0-9]*;
5c79bb68 85hexnum = 0x[0-9a-fA-F][0-9a-fA-F]*;
12aa4087
JW
86
87ws = [\ \t\012];
c2d3f412 88quote = [\"];
12aa4087
JW
89
90%%
91
92<INITIAL> {ws}+ => (lex ());
93<INITIAL> \n => (ParseState.newline(yypos); lex());
94
95<INITIAL> "{" => (Tokens.LBRACE (yypos, yypos + size yytext));
96<INITIAL> "}" => (Tokens.RBRACE (yypos, yypos + size yytext));
97<INITIAL> "(" => (Tokens.LPAREN (yypos, yypos + size yytext));
98<INITIAL> ")" => (Tokens.RPAREN (yypos, yypos + size yytext));
99
100<INITIAL> ";" => (Tokens.SEMI (yypos, yypos + size yytext));
101
102<INITIAL> "=" => (Tokens.ASSIGN (yypos, yypos + size yytext));
103<INITIAL> "+=" => (Tokens.PLUSEQ (yypos, yypos + size yytext));
104<INITIAL> "-=" => (Tokens.MINUSEQ (yypos, yypos + size yytext));
105<INITIAL> "*=" => (Tokens.STAREQ (yypos, yypos + size yytext));
106<INITIAL> "/=" => (Tokens.SLASHEQ (yypos, yypos + size yytext));
107<INITIAL> "%=" => (Tokens.PERCENTEQ (yypos, yypos + size yytext));
0a24e44d
JW
108<INITIAL> "<<=" => (Tokens.LSHEQ (yypos, yypos + size yytext));
109<INITIAL> ">>=" => (Tokens.RSHEQ (yypos, yypos + size yytext));
110<INITIAL> "&=" => (Tokens.BITANDEQ (yypos, yypos + size yytext));
111<INITIAL> "^=" => (Tokens.BITXOREQ (yypos, yypos + size yytext));
112<INITIAL> "|=" => (Tokens.BITOREQ (yypos, yypos + size yytext));
12aa4087 113
5c79bb68
JW
114<INITIAL> "++" => (Tokens.PLUSPLUS (yypos, yypos + size yytext));
115<INITIAL> "--" => (Tokens.MINUSMINUS (yypos, yypos + size yytext));
116
12aa4087
JW
117<INITIAL> "+" => (Tokens.PLUS (yypos, yypos + size yytext));
118<INITIAL> "-" => (Tokens.MINUS (yypos, yypos + size yytext));
0a24e44d 119<INITIAL> "!" => (Tokens.BANG (yypos, yypos + size yytext));
12aa4087
JW
120<INITIAL> "*" => (Tokens.STAR (yypos, yypos + size yytext));
121<INITIAL> "/" => (Tokens.SLASH (yypos, yypos + size yytext));
122<INITIAL> "%" => (Tokens.PERCENT (yypos, yypos + size yytext));
0a24e44d
JW
123<INITIAL> "<<" => (Tokens.LSH (yypos, yypos + size yytext));
124<INITIAL> ">>" => (Tokens.RSH (yypos, yypos + size yytext));
125<INITIAL> "||" => (Tokens.LOGOR (yypos, yypos + size yytext));
126<INITIAL> "&&" => (Tokens.LOGAND (yypos, yypos + size yytext));
127<INITIAL> "&" => (Tokens.BITAND (yypos, yypos + size yytext));
128<INITIAL> "^" => (Tokens.BITXOR (yypos, yypos + size yytext));
129<INITIAL> "|" => (Tokens.BITOR (yypos, yypos + size yytext));
130<INITIAL> "~" => (Tokens.BITNOT (yypos, yypos + size yytext));
131<INITIAL> "==" => (Tokens.EQ (yypos, yypos + size yytext));
132<INITIAL> "!=" => (Tokens.NEQ (yypos, yypos + size yytext));
133<INITIAL> "<" => (Tokens.LT (yypos, yypos + size yytext));
134<INITIAL> "<=" => (Tokens.LE (yypos, yypos + size yytext));
135<INITIAL> ">=" => (Tokens.GE (yypos, yypos + size yytext));
136<INITIAL> ">" => (Tokens.GT (yypos, yypos + size yytext));
12aa4087 137
5c79bb68 138<INITIAL> "?" => (Tokens.QUESTION (yypos, yypos + size yytext));
6ade8b0a
JW
139<INITIAL> ":" => (Tokens.COLON (yypos, yypos + size yytext));
140<INITIAL> "," => (Tokens.COMMA (yypos, yypos + size yytext));
141
1144856b
JW
142<INITIAL> "[" => (Tokens.LBRACKET (yypos, yypos + size yytext));
143<INITIAL> "]" => (Tokens.RBRACKET (yypos, yypos + size yytext));
144<INITIAL> "->" => (Tokens.ARROW (yypos, yypos + size yytext));
145<INITIAL> "." => (Tokens.DOT (yypos, yypos + size yytext));
146
12aa4087 147<INITIAL> "return" => (Tokens.RETURN (yypos, yypos + size yytext));
0a24e44d
JW
148<INITIAL> "if" => (Tokens.IF (yypos, yypos + size yytext));
149<INITIAL> "while" => (Tokens.WHILE (yypos, yypos + size yytext));
150<INITIAL> "for" => (Tokens.FOR (yypos, yypos + size yytext));
151<INITIAL> "continue" => (Tokens.CONTINUE (yypos, yypos + size yytext));
152<INITIAL> "break" => (Tokens.BREAK (yypos, yypos + size yytext));
153<INITIAL> "else" => (Tokens.ELSE (yypos, yypos + size yytext));
6ade8b0a
JW
154<INITIAL> "var" => (Tokens.VAR (yypos, yypos + size yytext));
155<INITIAL> "int" => (Tokens.INT (yypos, yypos + size yytext));
2ab9671f 156<INITIAL> "string" => (Tokens.TSTRING (yypos, yypos + size yytext));
6ade8b0a 157<INITIAL> "extern" => (Tokens.EXTERN (yypos, yypos + size yytext));
1144856b
JW
158<INITIAL> "struct" => (Tokens.STRUCT (yypos, yypos + size yytext));
159<INITIAL> "NULL" => (Tokens.NULL (yypos, yypos + size yytext));
160<INITIAL> "new" => (Tokens.NEW (yypos, yypos + size yytext));
6ade8b0a 161
12aa4087
JW
162
163<INITIAL> {decnum} => (number (yytext, yypos));
5c79bb68 164<INITIAL> {hexnum} => (hexnumber (yytext, yypos));
12aa4087
JW
165
166<INITIAL> {id} => (let
167 val id = Symbol.symbol yytext
168 in
169 Tokens.IDENT (id, yypos, yypos + size yytext)
170 end);
171
172<INITIAL> "/*" => (YYBEGIN COMMENT; enterComment yypos; lex());
173<INITIAL> "*/" => (ErrorMsg.error (ParseState.ext (yypos, yypos)) "unbalanced comments";
174 lex());
175
176<INITIAL> "//" => (YYBEGIN COMMENT_LINE; lex());
177<INITIAL> "#" => (YYBEGIN COMMENT_LINE; lex());
2ab9671f 178<INITIAL> "\"" => (YYBEGIN STRING; newString yypos ; lex () );
12aa4087
JW
179<INITIAL> . => (ErrorMsg.error (ParseState.ext (yypos,yypos))
180 ("illegal character: \"" ^ yytext ^ "\"");
181 lex ());
182
2ab9671f 183
12aa4087
JW
184<COMMENT> "/*" => (enterComment yypos; lex());
185<COMMENT> "*/" => (if exitComment () then YYBEGIN INITIAL else (); lex());
186<COMMENT> \n => (ParseState.newline yypos; lex ());
187<COMMENT> . => (lex());
188
189<COMMENT_LINE> \n => (ParseState.newline yypos; YYBEGIN INITIAL; lex());
190<COMMENT_LINE> . => (lex());
2ab9671f 191
c2d3f412
JW
192<STRING> "\\\\" => (addString "\\" ; lex() );
193<STRING> "\\n" => (addString "\n" ; lex() );
ab221774 194<STRING> "\\r" => (addString "\r" ; lex() );
c2d3f412
JW
195<STRING> "\\t" => (addString "\t" ; lex() );
196<STRING> "\\\"" => (addString "\t" ; lex() );
197<STRING> "\\". => (ErrorMsg.error (ParseState.ext (yypos,yypos))
198 ("illegal escape sequence: \"" ^ yytext ^ "\"");
199 lex ());
200<STRING> "\n" => (ErrorMsg.error (ParseState.ext (yypos,yypos))
201 ("illegal newline in the middle of the string, asshole");
202 lex ());
2ab9671f 203<STRING> "\"" => (YYBEGIN INITIAL; endString yypos );
c2d3f412 204<STRING> . => (addString yytext ; lex() );
This page took 0.045215 seconds and 4 git commands to generate.