3 * Author: Kaustuv Chaudhuri <kaustuv+@cs.cmu.edu>
4 * Modified: Frank Pfenning <fp@cs.cmu.edu>
5 * Modified: Chris Lu <czl@andrew.cmu.edu>
6 * Modified: Joshua Wise <jwise@andrew.cmu.edu>
13 type svalue = Tokens.svalue
14 type ('a,'b) token = ('a,'b) Tokens.token
15 type lexresult = (svalue,pos) Tokens.token
18 val commentLevel = ref 0
19 val commentPos = ref 0
20 val inString = ref false
22 val stringAcc : string list ref = ref [] (* :( *)
24 fun enterComment yypos =
25 ( commentLevel := !commentLevel + 1 ;
29 ( commentLevel := !commentLevel - 1 ;
32 fun number (yyt, yyp) =
34 val ext = ParseState.ext (yyp, yyp + size yyt)
35 val numOpt = Word32Signed.fromString yyt
38 ("integral constant `" ^ yyt ^ "' too large") ;
42 of NONE => ( ErrorMsg.error ext
43 ("cannot parse integral constant `" ^ yyt ^ "'");
44 Tokens.INTNUM (Word32Signed.ZERO, yyp, yyp + size yyt) )
45 | SOME n => Tokens.INTNUM (n,yyp,yyp + size yyt)
47 fun hexnumber (yyt, yyp) =
49 val t = String.extract (yyt, 2, NONE)
50 val ext = ParseState.ext (yyp, yyp + size yyt)
51 val numOpt = StringCvt.scanString (Word32.scan StringCvt.HEX) t
54 ("integral constant `" ^ yyt ^ "' too large") ;
58 of NONE => ( ErrorMsg.error ext
59 ("cannot parse integral constant `" ^ yyt ^ "'");
60 Tokens.INTNUM (Word32Signed.ZERO, yyp, yyp + size yyt) )
61 | SOME n => Tokens.INTNUM (n,yyp,yyp + size yyt)
65 ( if (!commentLevel > 0)
66 then (ErrorMsg.error (ParseState.ext (!commentPos,!commentPos)) "unterminated comment")
69 then (ErrorMsg.error (ParseState.ext (!stringPos,!stringPos)) "unterminated string")
71 Tokens.EOF (0,0) ) (* bogus position information; unused *)
73 fun newString yyp = ( inString := true; stringPos := yyp; stringAcc := [] )
74 fun endString yyp = ( Tokens.STRING (concat (rev (!stringAcc)), !stringPos, yyp+1) )
75 fun addString yyt = ( inString := false; stringAcc := yyt :: (!stringAcc) )
79 %header (functor L5LexFn(structure Tokens : L5_TOKENS));
81 %s COMMENT COMMENT_LINE STRING;
83 id = [A-Za-z_][A-Za-z0-9_]*;
85 hexnum = 0x[0-9a-fA-F][0-9a-fA-F]*;
92 <INITIAL> {ws}+ => (lex ());
93 <INITIAL> \n => (ParseState.newline(yypos); lex());
95 <INITIAL> "{" => (Tokens.LBRACE (yypos, yypos + size yytext));
96 <INITIAL> "}" => (Tokens.RBRACE (yypos, yypos + size yytext));
97 <INITIAL> "(" => (Tokens.LPAREN (yypos, yypos + size yytext));
98 <INITIAL> ")" => (Tokens.RPAREN (yypos, yypos + size yytext));
100 <INITIAL> ";" => (Tokens.SEMI (yypos, yypos + size yytext));
102 <INITIAL> "=" => (Tokens.ASSIGN (yypos, yypos + size yytext));
103 <INITIAL> "+=" => (Tokens.PLUSEQ (yypos, yypos + size yytext));
104 <INITIAL> "-=" => (Tokens.MINUSEQ (yypos, yypos + size yytext));
105 <INITIAL> "*=" => (Tokens.STAREQ (yypos, yypos + size yytext));
106 <INITIAL> "/=" => (Tokens.SLASHEQ (yypos, yypos + size yytext));
107 <INITIAL> "%=" => (Tokens.PERCENTEQ (yypos, yypos + size yytext));
108 <INITIAL> "<<=" => (Tokens.LSHEQ (yypos, yypos + size yytext));
109 <INITIAL> ">>=" => (Tokens.RSHEQ (yypos, yypos + size yytext));
110 <INITIAL> "&=" => (Tokens.BITANDEQ (yypos, yypos + size yytext));
111 <INITIAL> "^=" => (Tokens.BITXOREQ (yypos, yypos + size yytext));
112 <INITIAL> "|=" => (Tokens.BITOREQ (yypos, yypos + size yytext));
114 <INITIAL> "++" => (Tokens.PLUSPLUS (yypos, yypos + size yytext));
115 <INITIAL> "--" => (Tokens.MINUSMINUS (yypos, yypos + size yytext));
117 <INITIAL> "+" => (Tokens.PLUS (yypos, yypos + size yytext));
118 <INITIAL> "-" => (Tokens.MINUS (yypos, yypos + size yytext));
119 <INITIAL> "!" => (Tokens.BANG (yypos, yypos + size yytext));
120 <INITIAL> "*" => (Tokens.STAR (yypos, yypos + size yytext));
121 <INITIAL> "/" => (Tokens.SLASH (yypos, yypos + size yytext));
122 <INITIAL> "%" => (Tokens.PERCENT (yypos, yypos + size yytext));
123 <INITIAL> "<<" => (Tokens.LSH (yypos, yypos + size yytext));
124 <INITIAL> ">>" => (Tokens.RSH (yypos, yypos + size yytext));
125 <INITIAL> "||" => (Tokens.LOGOR (yypos, yypos + size yytext));
126 <INITIAL> "&&" => (Tokens.LOGAND (yypos, yypos + size yytext));
127 <INITIAL> "&" => (Tokens.BITAND (yypos, yypos + size yytext));
128 <INITIAL> "^" => (Tokens.BITXOR (yypos, yypos + size yytext));
129 <INITIAL> "|" => (Tokens.BITOR (yypos, yypos + size yytext));
130 <INITIAL> "~" => (Tokens.BITNOT (yypos, yypos + size yytext));
131 <INITIAL> "==" => (Tokens.EQ (yypos, yypos + size yytext));
132 <INITIAL> "!=" => (Tokens.NEQ (yypos, yypos + size yytext));
133 <INITIAL> "<" => (Tokens.LT (yypos, yypos + size yytext));
134 <INITIAL> "<=" => (Tokens.LE (yypos, yypos + size yytext));
135 <INITIAL> ">=" => (Tokens.GE (yypos, yypos + size yytext));
136 <INITIAL> ">" => (Tokens.GT (yypos, yypos + size yytext));
138 <INITIAL> "?" => (Tokens.QUESTION (yypos, yypos + size yytext));
139 <INITIAL> ":" => (Tokens.COLON (yypos, yypos + size yytext));
140 <INITIAL> "," => (Tokens.COMMA (yypos, yypos + size yytext));
142 <INITIAL> "[" => (Tokens.LBRACKET (yypos, yypos + size yytext));
143 <INITIAL> "]" => (Tokens.RBRACKET (yypos, yypos + size yytext));
144 <INITIAL> "->" => (Tokens.ARROW (yypos, yypos + size yytext));
145 <INITIAL> "." => (Tokens.DOT (yypos, yypos + size yytext));
147 <INITIAL> "return" => (Tokens.RETURN (yypos, yypos + size yytext));
148 <INITIAL> "if" => (Tokens.IF (yypos, yypos + size yytext));
149 <INITIAL> "while" => (Tokens.WHILE (yypos, yypos + size yytext));
150 <INITIAL> "for" => (Tokens.FOR (yypos, yypos + size yytext));
151 <INITIAL> "continue" => (Tokens.CONTINUE (yypos, yypos + size yytext));
152 <INITIAL> "break" => (Tokens.BREAK (yypos, yypos + size yytext));
153 <INITIAL> "else" => (Tokens.ELSE (yypos, yypos + size yytext));
154 <INITIAL> "var" => (Tokens.VAR (yypos, yypos + size yytext));
155 <INITIAL> "int" => (Tokens.INT (yypos, yypos + size yytext));
156 <INITIAL> "string" => (Tokens.TSTRING (yypos, yypos + size yytext));
157 <INITIAL> "extern" => (Tokens.EXTERN (yypos, yypos + size yytext));
158 <INITIAL> "struct" => (Tokens.STRUCT (yypos, yypos + size yytext));
159 <INITIAL> "NULL" => (Tokens.NULL (yypos, yypos + size yytext));
160 <INITIAL> "new" => (Tokens.NEW (yypos, yypos + size yytext));
163 <INITIAL> {decnum} => (number (yytext, yypos));
164 <INITIAL> {hexnum} => (hexnumber (yytext, yypos));
166 <INITIAL> {id} => (let
167 val id = Symbol.symbol yytext
169 Tokens.IDENT (id, yypos, yypos + size yytext)
172 <INITIAL> "/*" => (YYBEGIN COMMENT; enterComment yypos; lex());
173 <INITIAL> "*/" => (ErrorMsg.error (ParseState.ext (yypos, yypos)) "unbalanced comments";
176 <INITIAL> "//" => (YYBEGIN COMMENT_LINE; lex());
177 <INITIAL> "#" => (YYBEGIN COMMENT_LINE; lex());
178 <INITIAL> "\"" => (YYBEGIN STRING; newString yypos ; lex () );
179 <INITIAL> . => (ErrorMsg.error (ParseState.ext (yypos,yypos))
180 ("illegal character: \"" ^ yytext ^ "\"");
184 <COMMENT> "/*" => (enterComment yypos; lex());
185 <COMMENT> "*/" => (if exitComment () then YYBEGIN INITIAL else (); lex());
186 <COMMENT> \n => (ParseState.newline yypos; lex ());
187 <COMMENT> . => (lex());
189 <COMMENT_LINE> \n => (ParseState.newline yypos; YYBEGIN INITIAL; lex());
190 <COMMENT_LINE> . => (lex());
192 <STRING> "\\\\" => (addString "\\" ; lex() );
193 <STRING> "\\n" => (addString "\n" ; lex() );
194 <STRING> "\\r" => (addString "\r" ; lex() );
195 <STRING> "\\t" => (addString "\t" ; lex() );
196 <STRING> "\\\"" => (addString "\t" ; lex() );
197 <STRING> "\\". => (ErrorMsg.error (ParseState.ext (yypos,yypos))
198 ("illegal escape sequence: \"" ^ yytext ^ "\"");
200 <STRING> "\n" => (ErrorMsg.error (ParseState.ext (yypos,yypos))
201 ("illegal newline in the middle of the string, asshole");
203 <STRING> "\"" => (YYBEGIN INITIAL; endString yypos );
204 <STRING> . => (addString yytext ; lex() );