--- /dev/null
+(* L5 Compiler
+ * Lexer
+ * Author: Kaustuv Chaudhuri <kaustuv+@cs.cmu.edu>
+ * Modified: Frank Pfenning <fp@cs.cmu.edu>
+ * Modified: Chris Lu <czl@andrew.cmu.edu>
+ * Modified: Joshua Wise <jwise@andrew.cmu.edu>
+ *)
+
+structure A = Ast
+structure S = Symbol
+
+type pos = int
+type svalue = Tokens.svalue
+type ('a,'b) token = ('a,'b) Tokens.token
+type lexresult = (svalue,pos) Tokens.token
+
+local
+ val commentLevel = ref 0
+ val commentPos = ref 0
+in
+ fun enterComment yypos =
+ ( commentLevel := !commentLevel + 1 ;
+ commentPos := yypos )
+
+ fun exitComment () =
+ ( commentLevel := !commentLevel - 1 ;
+ !commentLevel = 0 )
+
+ fun number (yyt, yyp) =
+ let
+ val ext = ParseState.ext (yyp, yyp + size yyt)
+ val numOpt = Word32Signed.fromString yyt
+ handle Overflow =>
+ ( ErrorMsg.error ext
+ ("integral constant `" ^ yyt ^ "' too large") ;
+ NONE )
+ in
+ case numOpt
+ of NONE => ( ErrorMsg.error ext
+ ("cannot parse integral constant `" ^ yyt ^ "'");
+ Tokens.INTNUM (Word32Signed.ZERO, yyp, yyp + size yyt) )
+ | SOME n => Tokens.INTNUM (n,yyp,yyp + size yyt)
+ end
+ fun hexnumber (yyt, yyp) =
+ let
+ val t = String.extract (yyt, 2, NONE)
+ val ext = ParseState.ext (yyp, yyp + size yyt)
+ val numOpt = StringCvt.scanString (Word32.scan StringCvt.HEX) t
+ handle Overflow =>
+ ( ErrorMsg.error ext
+ ("integral constant `" ^ yyt ^ "' too large") ;
+ NONE )
+ in
+ case numOpt
+ of NONE => ( ErrorMsg.error ext
+ ("cannot parse integral constant `" ^ yyt ^ "'");
+ Tokens.INTNUM (Word32Signed.ZERO, yyp, yyp + size yyt) )
+ | SOME n => Tokens.INTNUM (n,yyp,yyp + size yyt)
+ end
+
+ fun eof () =
+ ( if (!commentLevel > 0)
+ then (ErrorMsg.error (ParseState.ext (!commentPos,!commentPos)) "unterminated comment")
+ else ();
+ Tokens.EOF (0,0) ) (* bogus position information; unused *)
+
+end
+
+%%
+%header (functor L5LexFn(structure Tokens : L5_TOKENS));
+%full
+%s COMMENT COMMENT_LINE;
+
+id = [A-Za-z_][A-Za-z0-9_]*;
+decnum = [0-9][0-9]*;
+hexnum = 0x[0-9a-fA-F][0-9a-fA-F]*;
+
+ws = [\ \t\012];
+
+%%
+
+<INITIAL> {ws}+ => (lex ());
+<INITIAL> \n => (ParseState.newline(yypos); lex());
+
+<INITIAL> "{" => (Tokens.LBRACE (yypos, yypos + size yytext));
+<INITIAL> "}" => (Tokens.RBRACE (yypos, yypos + size yytext));
+<INITIAL> "(" => (Tokens.LPAREN (yypos, yypos + size yytext));
+<INITIAL> ")" => (Tokens.RPAREN (yypos, yypos + size yytext));
+
+<INITIAL> ";" => (Tokens.SEMI (yypos, yypos + size yytext));
+
+<INITIAL> "=" => (Tokens.ASSIGN (yypos, yypos + size yytext));
+<INITIAL> "+=" => (Tokens.PLUSEQ (yypos, yypos + size yytext));
+<INITIAL> "-=" => (Tokens.MINUSEQ (yypos, yypos + size yytext));
+<INITIAL> "*=" => (Tokens.STAREQ (yypos, yypos + size yytext));
+<INITIAL> "/=" => (Tokens.SLASHEQ (yypos, yypos + size yytext));
+<INITIAL> "%=" => (Tokens.PERCENTEQ (yypos, yypos + size yytext));
+<INITIAL> "<<=" => (Tokens.LSHEQ (yypos, yypos + size yytext));
+<INITIAL> ">>=" => (Tokens.RSHEQ (yypos, yypos + size yytext));
+<INITIAL> "&=" => (Tokens.BITANDEQ (yypos, yypos + size yytext));
+<INITIAL> "^=" => (Tokens.BITXOREQ (yypos, yypos + size yytext));
+<INITIAL> "|=" => (Tokens.BITOREQ (yypos, yypos + size yytext));
+
+<INITIAL> "++" => (Tokens.PLUSPLUS (yypos, yypos + size yytext));
+<INITIAL> "--" => (Tokens.MINUSMINUS (yypos, yypos + size yytext));
+
+<INITIAL> "+" => (Tokens.PLUS (yypos, yypos + size yytext));
+<INITIAL> "-" => (Tokens.MINUS (yypos, yypos + size yytext));
+<INITIAL> "!" => (Tokens.BANG (yypos, yypos + size yytext));
+<INITIAL> "*" => (Tokens.STAR (yypos, yypos + size yytext));
+<INITIAL> "/" => (Tokens.SLASH (yypos, yypos + size yytext));
+<INITIAL> "%" => (Tokens.PERCENT (yypos, yypos + size yytext));
+<INITIAL> "<<" => (Tokens.LSH (yypos, yypos + size yytext));
+<INITIAL> ">>" => (Tokens.RSH (yypos, yypos + size yytext));
+<INITIAL> "||" => (Tokens.LOGOR (yypos, yypos + size yytext));
+<INITIAL> "&&" => (Tokens.LOGAND (yypos, yypos + size yytext));
+<INITIAL> "&" => (Tokens.BITAND (yypos, yypos + size yytext));
+<INITIAL> "^" => (Tokens.BITXOR (yypos, yypos + size yytext));
+<INITIAL> "|" => (Tokens.BITOR (yypos, yypos + size yytext));
+<INITIAL> "~" => (Tokens.BITNOT (yypos, yypos + size yytext));
+<INITIAL> "==" => (Tokens.EQ (yypos, yypos + size yytext));
+<INITIAL> "!=" => (Tokens.NEQ (yypos, yypos + size yytext));
+<INITIAL> "<" => (Tokens.LT (yypos, yypos + size yytext));
+<INITIAL> "<=" => (Tokens.LE (yypos, yypos + size yytext));
+<INITIAL> ">=" => (Tokens.GE (yypos, yypos + size yytext));
+<INITIAL> ">" => (Tokens.GT (yypos, yypos + size yytext));
+
+<INITIAL> "?" => (Tokens.QUESTION (yypos, yypos + size yytext));
+<INITIAL> ":" => (Tokens.COLON (yypos, yypos + size yytext));
+<INITIAL> "," => (Tokens.COMMA (yypos, yypos + size yytext));
+
+<INITIAL> "[" => (Tokens.LBRACKET (yypos, yypos + size yytext));
+<INITIAL> "]" => (Tokens.RBRACKET (yypos, yypos + size yytext));
+<INITIAL> "->" => (Tokens.ARROW (yypos, yypos + size yytext));
+<INITIAL> "." => (Tokens.DOT (yypos, yypos + size yytext));
+
+<INITIAL> "return" => (Tokens.RETURN (yypos, yypos + size yytext));
+<INITIAL> "if" => (Tokens.IF (yypos, yypos + size yytext));
+<INITIAL> "while" => (Tokens.WHILE (yypos, yypos + size yytext));
+<INITIAL> "for" => (Tokens.FOR (yypos, yypos + size yytext));
+<INITIAL> "continue" => (Tokens.CONTINUE (yypos, yypos + size yytext));
+<INITIAL> "break" => (Tokens.BREAK (yypos, yypos + size yytext));
+<INITIAL> "else" => (Tokens.ELSE (yypos, yypos + size yytext));
+<INITIAL> "var" => (Tokens.VAR (yypos, yypos + size yytext));
+<INITIAL> "int" => (Tokens.INT (yypos, yypos + size yytext));
+<INITIAL> "extern" => (Tokens.EXTERN (yypos, yypos + size yytext));
+<INITIAL> "struct" => (Tokens.STRUCT (yypos, yypos + size yytext));
+<INITIAL> "NULL" => (Tokens.NULL (yypos, yypos + size yytext));
+<INITIAL> "new" => (Tokens.NEW (yypos, yypos + size yytext));
+
+
+<INITIAL> {decnum} => (number (yytext, yypos));
+<INITIAL> {hexnum} => (hexnumber (yytext, yypos));
+
+<INITIAL> {id} => (let
+ val id = Symbol.symbol yytext
+ in
+ Tokens.IDENT (id, yypos, yypos + size yytext)
+ end);
+
+<INITIAL> "/*" => (YYBEGIN COMMENT; enterComment yypos; lex());
+<INITIAL> "*/" => (ErrorMsg.error (ParseState.ext (yypos, yypos)) "unbalanced comments";
+ lex());
+
+<INITIAL> "//" => (YYBEGIN COMMENT_LINE; lex());
+<INITIAL> "#" => (YYBEGIN COMMENT_LINE; lex());
+<INITIAL> . => (ErrorMsg.error (ParseState.ext (yypos,yypos))
+ ("illegal character: \"" ^ yytext ^ "\"");
+ lex ());
+
+<COMMENT> "/*" => (enterComment yypos; lex());
+<COMMENT> "*/" => (if exitComment () then YYBEGIN INITIAL else (); lex());
+<COMMENT> \n => (ParseState.newline yypos; lex ());
+<COMMENT> . => (lex());
+
+<COMMENT_LINE> \n => (ParseState.newline yypos; YYBEGIN INITIAL; lex());
+<COMMENT_LINE> . => (lex());