X-Git-Url: http://git.joshuawise.com/snipe.git/blobdiff_plain/1144856ba9d6018d9922c6ede7e97779a0fe6373..5c79bb689ab446551bc7ec4497e6c9b75582837e:/parse/l5.lex diff --git a/parse/l5.lex b/parse/l5.lex new file mode 100644 index 0000000..e10f8b7 --- /dev/null +++ b/parse/l5.lex @@ -0,0 +1,177 @@ +(* L5 Compiler + * Lexer + * Author: Kaustuv Chaudhuri + * Modified: Frank Pfenning + * Modified: Chris Lu + * Modified: Joshua Wise + *) + +structure A = Ast +structure S = Symbol + +type pos = int +type svalue = Tokens.svalue +type ('a,'b) token = ('a,'b) Tokens.token +type lexresult = (svalue,pos) Tokens.token + +local + val commentLevel = ref 0 + val commentPos = ref 0 +in + fun enterComment yypos = + ( commentLevel := !commentLevel + 1 ; + commentPos := yypos ) + + fun exitComment () = + ( commentLevel := !commentLevel - 1 ; + !commentLevel = 0 ) + + fun number (yyt, yyp) = + let + val ext = ParseState.ext (yyp, yyp + size yyt) + val numOpt = Word32Signed.fromString yyt + handle Overflow => + ( ErrorMsg.error ext + ("integral constant `" ^ yyt ^ "' too large") ; + NONE ) + in + case numOpt + of NONE => ( ErrorMsg.error ext + ("cannot parse integral constant `" ^ yyt ^ "'"); + Tokens.INTNUM (Word32Signed.ZERO, yyp, yyp + size yyt) ) + | SOME n => Tokens.INTNUM (n,yyp,yyp + size yyt) + end + fun hexnumber (yyt, yyp) = + let + val t = String.extract (yyt, 2, NONE) + val ext = ParseState.ext (yyp, yyp + size yyt) + val numOpt = StringCvt.scanString (Word32.scan StringCvt.HEX) t + handle Overflow => + ( ErrorMsg.error ext + ("integral constant `" ^ yyt ^ "' too large") ; + NONE ) + in + case numOpt + of NONE => ( ErrorMsg.error ext + ("cannot parse integral constant `" ^ yyt ^ "'"); + Tokens.INTNUM (Word32Signed.ZERO, yyp, yyp + size yyt) ) + | SOME n => Tokens.INTNUM (n,yyp,yyp + size yyt) + end + + fun eof () = + ( if (!commentLevel > 0) + then (ErrorMsg.error (ParseState.ext (!commentPos,!commentPos)) "unterminated comment") + else (); + Tokens.EOF (0,0) ) (* bogus position information; unused *) + +end + +%% +%header (functor L5LexFn(structure Tokens : L5_TOKENS)); +%full +%s COMMENT COMMENT_LINE; + +id = [A-Za-z_][A-Za-z0-9_]*; +decnum = [0-9][0-9]*; +hexnum = 0x[0-9a-fA-F][0-9a-fA-F]*; + +ws = [\ \t\012]; + +%% + + {ws}+ => (lex ()); + \n => (ParseState.newline(yypos); lex()); + + "{" => (Tokens.LBRACE (yypos, yypos + size yytext)); + "}" => (Tokens.RBRACE (yypos, yypos + size yytext)); + "(" => (Tokens.LPAREN (yypos, yypos + size yytext)); + ")" => (Tokens.RPAREN (yypos, yypos + size yytext)); + + ";" => (Tokens.SEMI (yypos, yypos + size yytext)); + + "=" => (Tokens.ASSIGN (yypos, yypos + size yytext)); + "+=" => (Tokens.PLUSEQ (yypos, yypos + size yytext)); + "-=" => (Tokens.MINUSEQ (yypos, yypos + size yytext)); + "*=" => (Tokens.STAREQ (yypos, yypos + size yytext)); + "/=" => (Tokens.SLASHEQ (yypos, yypos + size yytext)); + "%=" => (Tokens.PERCENTEQ (yypos, yypos + size yytext)); + "<<=" => (Tokens.LSHEQ (yypos, yypos + size yytext)); + ">>=" => (Tokens.RSHEQ (yypos, yypos + size yytext)); + "&=" => (Tokens.BITANDEQ (yypos, yypos + size yytext)); + "^=" => (Tokens.BITXOREQ (yypos, yypos + size yytext)); + "|=" => (Tokens.BITOREQ (yypos, yypos + size yytext)); + + "++" => (Tokens.PLUSPLUS (yypos, yypos + size yytext)); + "--" => (Tokens.MINUSMINUS (yypos, yypos + size yytext)); + + "+" => (Tokens.PLUS (yypos, yypos + size yytext)); + "-" => (Tokens.MINUS (yypos, yypos + size yytext)); + "!" => (Tokens.BANG (yypos, yypos + size yytext)); + "*" => (Tokens.STAR (yypos, yypos + size yytext)); + "/" => (Tokens.SLASH (yypos, yypos + size yytext)); + "%" => (Tokens.PERCENT (yypos, yypos + size yytext)); + "<<" => (Tokens.LSH (yypos, yypos + size yytext)); + ">>" => (Tokens.RSH (yypos, yypos + size yytext)); + "||" => (Tokens.LOGOR (yypos, yypos + size yytext)); + "&&" => (Tokens.LOGAND (yypos, yypos + size yytext)); + "&" => (Tokens.BITAND (yypos, yypos + size yytext)); + "^" => (Tokens.BITXOR (yypos, yypos + size yytext)); + "|" => (Tokens.BITOR (yypos, yypos + size yytext)); + "~" => (Tokens.BITNOT (yypos, yypos + size yytext)); + "==" => (Tokens.EQ (yypos, yypos + size yytext)); + "!=" => (Tokens.NEQ (yypos, yypos + size yytext)); + "<" => (Tokens.LT (yypos, yypos + size yytext)); + "<=" => (Tokens.LE (yypos, yypos + size yytext)); + ">=" => (Tokens.GE (yypos, yypos + size yytext)); + ">" => (Tokens.GT (yypos, yypos + size yytext)); + + "?" => (Tokens.QUESTION (yypos, yypos + size yytext)); + ":" => (Tokens.COLON (yypos, yypos + size yytext)); + "," => (Tokens.COMMA (yypos, yypos + size yytext)); + + "[" => (Tokens.LBRACKET (yypos, yypos + size yytext)); + "]" => (Tokens.RBRACKET (yypos, yypos + size yytext)); + "->" => (Tokens.ARROW (yypos, yypos + size yytext)); + "." => (Tokens.DOT (yypos, yypos + size yytext)); + + "return" => (Tokens.RETURN (yypos, yypos + size yytext)); + "if" => (Tokens.IF (yypos, yypos + size yytext)); + "while" => (Tokens.WHILE (yypos, yypos + size yytext)); + "for" => (Tokens.FOR (yypos, yypos + size yytext)); + "continue" => (Tokens.CONTINUE (yypos, yypos + size yytext)); + "break" => (Tokens.BREAK (yypos, yypos + size yytext)); + "else" => (Tokens.ELSE (yypos, yypos + size yytext)); + "var" => (Tokens.VAR (yypos, yypos + size yytext)); + "int" => (Tokens.INT (yypos, yypos + size yytext)); + "extern" => (Tokens.EXTERN (yypos, yypos + size yytext)); + "struct" => (Tokens.STRUCT (yypos, yypos + size yytext)); + "NULL" => (Tokens.NULL (yypos, yypos + size yytext)); + "new" => (Tokens.NEW (yypos, yypos + size yytext)); + + + {decnum} => (number (yytext, yypos)); + {hexnum} => (hexnumber (yytext, yypos)); + + {id} => (let + val id = Symbol.symbol yytext + in + Tokens.IDENT (id, yypos, yypos + size yytext) + end); + + "/*" => (YYBEGIN COMMENT; enterComment yypos; lex()); + "*/" => (ErrorMsg.error (ParseState.ext (yypos, yypos)) "unbalanced comments"; + lex()); + + "//" => (YYBEGIN COMMENT_LINE; lex()); + "#" => (YYBEGIN COMMENT_LINE; lex()); + . => (ErrorMsg.error (ParseState.ext (yypos,yypos)) + ("illegal character: \"" ^ yytext ^ "\""); + lex ()); + + "/*" => (enterComment yypos; lex()); + "*/" => (if exitComment () then YYBEGIN INITIAL else (); lex()); + \n => (ParseState.newline yypos; lex ()); + . => (lex()); + + \n => (ParseState.newline yypos; YYBEGIN INITIAL; lex()); + . => (lex());