(* L5 Compiler * Lexer * Author: Kaustuv Chaudhuri * Modified: Frank Pfenning * Modified: Chris Lu * Modified: Joshua Wise *) structure A = Ast structure S = Symbol type pos = int type svalue = Tokens.svalue type ('a,'b) token = ('a,'b) Tokens.token type lexresult = (svalue,pos) Tokens.token local val commentLevel = ref 0 val commentPos = ref 0 val inString = ref false val stringPos = ref 0 val stringAcc : string list ref = ref [] (* :( *) in fun enterComment yypos = ( commentLevel := !commentLevel + 1 ; commentPos := yypos ) fun exitComment () = ( commentLevel := !commentLevel - 1 ; !commentLevel = 0 ) fun number (yyt, yyp) = let val ext = ParseState.ext (yyp, yyp + size yyt) val numOpt = Word32Signed.fromString yyt handle Overflow => ( ErrorMsg.error ext ("integral constant `" ^ yyt ^ "' too large") ; NONE ) in case numOpt of NONE => ( ErrorMsg.error ext ("cannot parse integral constant `" ^ yyt ^ "'"); Tokens.INTNUM (Word32Signed.ZERO, yyp, yyp + size yyt) ) | SOME n => Tokens.INTNUM (n,yyp,yyp + size yyt) end fun hexnumber (yyt, yyp) = let val t = String.extract (yyt, 2, NONE) val ext = ParseState.ext (yyp, yyp + size yyt) val numOpt = StringCvt.scanString (Word32.scan StringCvt.HEX) t handle Overflow => ( ErrorMsg.error ext ("integral constant `" ^ yyt ^ "' too large") ; NONE ) in case numOpt of NONE => ( ErrorMsg.error ext ("cannot parse integral constant `" ^ yyt ^ "'"); Tokens.INTNUM (Word32Signed.ZERO, yyp, yyp + size yyt) ) | SOME n => Tokens.INTNUM (n,yyp,yyp + size yyt) end fun eof () = ( if (!commentLevel > 0) then (ErrorMsg.error (ParseState.ext (!commentPos,!commentPos)) "unterminated comment") else (); if (!inString) then (ErrorMsg.error (ParseState.ext (!stringPos,!stringPos)) "unterminated string") else (); Tokens.EOF (0,0) ) (* bogus position information; unused *) fun newString yyp = ( inString := true; stringPos := yyp; stringAcc := [] ) fun endString yyp = ( Tokens.STRING (concat (rev (!stringAcc)), !stringPos, yyp+1) ) fun addString yyt = ( inString := false; stringAcc := yyt :: (!stringAcc) ) end %% %header (functor L5LexFn(structure Tokens : L5_TOKENS)); %full %s COMMENT COMMENT_LINE STRING; id = [A-Za-z_][A-Za-z0-9_]*; decnum = [0-9][0-9]*; hexnum = 0x[0-9a-fA-F][0-9a-fA-F]*; ws = [\ \t\012]; %% {ws}+ => (lex ()); \n => (ParseState.newline(yypos); lex()); "{" => (Tokens.LBRACE (yypos, yypos + size yytext)); "}" => (Tokens.RBRACE (yypos, yypos + size yytext)); "(" => (Tokens.LPAREN (yypos, yypos + size yytext)); ")" => (Tokens.RPAREN (yypos, yypos + size yytext)); ";" => (Tokens.SEMI (yypos, yypos + size yytext)); "=" => (Tokens.ASSIGN (yypos, yypos + size yytext)); "+=" => (Tokens.PLUSEQ (yypos, yypos + size yytext)); "-=" => (Tokens.MINUSEQ (yypos, yypos + size yytext)); "*=" => (Tokens.STAREQ (yypos, yypos + size yytext)); "/=" => (Tokens.SLASHEQ (yypos, yypos + size yytext)); "%=" => (Tokens.PERCENTEQ (yypos, yypos + size yytext)); "<<=" => (Tokens.LSHEQ (yypos, yypos + size yytext)); ">>=" => (Tokens.RSHEQ (yypos, yypos + size yytext)); "&=" => (Tokens.BITANDEQ (yypos, yypos + size yytext)); "^=" => (Tokens.BITXOREQ (yypos, yypos + size yytext)); "|=" => (Tokens.BITOREQ (yypos, yypos + size yytext)); "++" => (Tokens.PLUSPLUS (yypos, yypos + size yytext)); "--" => (Tokens.MINUSMINUS (yypos, yypos + size yytext)); "+" => (Tokens.PLUS (yypos, yypos + size yytext)); "-" => (Tokens.MINUS (yypos, yypos + size yytext)); "!" => (Tokens.BANG (yypos, yypos + size yytext)); "*" => (Tokens.STAR (yypos, yypos + size yytext)); "/" => (Tokens.SLASH (yypos, yypos + size yytext)); "%" => (Tokens.PERCENT (yypos, yypos + size yytext)); "<<" => (Tokens.LSH (yypos, yypos + size yytext)); ">>" => (Tokens.RSH (yypos, yypos + size yytext)); "||" => (Tokens.LOGOR (yypos, yypos + size yytext)); "&&" => (Tokens.LOGAND (yypos, yypos + size yytext)); "&" => (Tokens.BITAND (yypos, yypos + size yytext)); "^" => (Tokens.BITXOR (yypos, yypos + size yytext)); "|" => (Tokens.BITOR (yypos, yypos + size yytext)); "~" => (Tokens.BITNOT (yypos, yypos + size yytext)); "==" => (Tokens.EQ (yypos, yypos + size yytext)); "!=" => (Tokens.NEQ (yypos, yypos + size yytext)); "<" => (Tokens.LT (yypos, yypos + size yytext)); "<=" => (Tokens.LE (yypos, yypos + size yytext)); ">=" => (Tokens.GE (yypos, yypos + size yytext)); ">" => (Tokens.GT (yypos, yypos + size yytext)); "?" => (Tokens.QUESTION (yypos, yypos + size yytext)); ":" => (Tokens.COLON (yypos, yypos + size yytext)); "," => (Tokens.COMMA (yypos, yypos + size yytext)); "[" => (Tokens.LBRACKET (yypos, yypos + size yytext)); "]" => (Tokens.RBRACKET (yypos, yypos + size yytext)); "->" => (Tokens.ARROW (yypos, yypos + size yytext)); "." => (Tokens.DOT (yypos, yypos + size yytext)); "return" => (Tokens.RETURN (yypos, yypos + size yytext)); "if" => (Tokens.IF (yypos, yypos + size yytext)); "while" => (Tokens.WHILE (yypos, yypos + size yytext)); "for" => (Tokens.FOR (yypos, yypos + size yytext)); "continue" => (Tokens.CONTINUE (yypos, yypos + size yytext)); "break" => (Tokens.BREAK (yypos, yypos + size yytext)); "else" => (Tokens.ELSE (yypos, yypos + size yytext)); "var" => (Tokens.VAR (yypos, yypos + size yytext)); "int" => (Tokens.INT (yypos, yypos + size yytext)); "string" => (Tokens.TSTRING (yypos, yypos + size yytext)); "extern" => (Tokens.EXTERN (yypos, yypos + size yytext)); "struct" => (Tokens.STRUCT (yypos, yypos + size yytext)); "NULL" => (Tokens.NULL (yypos, yypos + size yytext)); "new" => (Tokens.NEW (yypos, yypos + size yytext)); {decnum} => (number (yytext, yypos)); {hexnum} => (hexnumber (yytext, yypos)); {id} => (let val id = Symbol.symbol yytext in Tokens.IDENT (id, yypos, yypos + size yytext) end); "/*" => (YYBEGIN COMMENT; enterComment yypos; lex()); "*/" => (ErrorMsg.error (ParseState.ext (yypos, yypos)) "unbalanced comments"; lex()); "//" => (YYBEGIN COMMENT_LINE; lex()); "#" => (YYBEGIN COMMENT_LINE; lex()); "\"" => (YYBEGIN STRING; newString yypos ; lex () ); . => (ErrorMsg.error (ParseState.ext (yypos,yypos)) ("illegal character: \"" ^ yytext ^ "\""); lex ()); "/*" => (enterComment yypos; lex()); "*/" => (if exitComment () then YYBEGIN INITIAL else (); lex()); \n => (ParseState.newline yypos; lex ()); . => (lex()); \n => (ParseState.newline yypos; YYBEGIN INITIAL; lex()); . => (lex()); [^\"\\]* => (addString yytext ; lex() ); "\"" => (YYBEGIN INITIAL; endString yypos );