(* L1 Compiler * Lexer * Author: Kaustuv Chaudhuri * Modified: Frank Pfenning *) structure A = Ast structure S = Symbol type pos = int type svalue = Tokens.svalue type ('a,'b) token = ('a,'b) Tokens.token type lexresult = (svalue,pos) Tokens.token local val commentLevel = ref 0 val commentPos = ref 0 in fun enterComment yypos = ( commentLevel := !commentLevel + 1 ; commentPos := yypos ) fun exitComment () = ( commentLevel := !commentLevel - 1 ; !commentLevel = 0 ) fun number (yyt, yyp) = let val ext = ParseState.ext (yyp, yyp + size yyt) val numOpt = Word32Signed.fromString yyt handle Overflow => ( ErrorMsg.error ext ("integral constant `" ^ yyt ^ "' too large") ; NONE ) in case numOpt of NONE => ( ErrorMsg.error ext ("cannot parse integral constant `" ^ yyt ^ "'"); Tokens.INTNUM (Word32Signed.ZERO, yyp, yyp + size yyt) ) | SOME n => Tokens.INTNUM (n,yyp,yyp + size yyt) end fun eof () = ( if (!commentLevel > 0) then (ErrorMsg.error (ParseState.ext (!commentPos,!commentPos)) "unterminated comment") else (); Tokens.EOF (0,0) ) (* bogus position information; unused *) end %% %header (functor L1LexFn(structure Tokens : L1_TOKENS)); %full %s COMMENT COMMENT_LINE; id = [A-Za-z_][A-Za-z0-9_]*; decnum = [0-9][0-9]*; ws = [\ \t\012]; %% {ws}+ => (lex ()); \n => (ParseState.newline(yypos); lex()); "{" => (Tokens.LBRACE (yypos, yypos + size yytext)); "}" => (Tokens.RBRACE (yypos, yypos + size yytext)); "(" => (Tokens.LPAREN (yypos, yypos + size yytext)); ")" => (Tokens.RPAREN (yypos, yypos + size yytext)); ";" => (Tokens.SEMI (yypos, yypos + size yytext)); "=" => (Tokens.ASSIGN (yypos, yypos + size yytext)); "+=" => (Tokens.PLUSEQ (yypos, yypos + size yytext)); "-=" => (Tokens.MINUSEQ (yypos, yypos + size yytext)); "*=" => (Tokens.STAREQ (yypos, yypos + size yytext)); "/=" => (Tokens.SLASHEQ (yypos, yypos + size yytext)); "%=" => (Tokens.PERCENTEQ (yypos, yypos + size yytext)); "+" => (Tokens.PLUS (yypos, yypos + size yytext)); "-" => (Tokens.MINUS (yypos, yypos + size yytext)); "*" => (Tokens.STAR (yypos, yypos + size yytext)); "/" => (Tokens.SLASH (yypos, yypos + size yytext)); "%" => (Tokens.PERCENT (yypos, yypos + size yytext)); "return" => (Tokens.RETURN (yypos, yypos + size yytext)); {decnum} => (number (yytext, yypos)); {id} => (let val id = Symbol.symbol yytext in Tokens.IDENT (id, yypos, yypos + size yytext) end); "/*" => (YYBEGIN COMMENT; enterComment yypos; lex()); "*/" => (ErrorMsg.error (ParseState.ext (yypos, yypos)) "unbalanced comments"; lex()); "//" => (YYBEGIN COMMENT_LINE; lex()); "#" => (YYBEGIN COMMENT_LINE; lex()); . => (ErrorMsg.error (ParseState.ext (yypos,yypos)) ("illegal character: \"" ^ yytext ^ "\""); lex ()); "/*" => (enterComment yypos; lex()); "*/" => (if exitComment () then YYBEGIN INITIAL else (); lex()); \n => (ParseState.newline yypos; lex ()); . => (lex()); \n => (ParseState.newline yypos; YYBEGIN INITIAL; lex()); . => (lex());