]>
Commit | Line | Data |
---|---|---|
1 | (* L1 Compiler | |
2 | * Lexer | |
3 | * Author: Kaustuv Chaudhuri <kaustuv+@cs.cmu.edu> | |
4 | * Modified: Frank Pfenning <fp@cs.cmu.edu> | |
5 | *) | |
6 | ||
7 | structure A = Ast | |
8 | structure S = Symbol | |
9 | ||
10 | type pos = int | |
11 | type svalue = Tokens.svalue | |
12 | type ('a,'b) token = ('a,'b) Tokens.token | |
13 | type lexresult = (svalue,pos) Tokens.token | |
14 | ||
15 | local | |
16 | val commentLevel = ref 0 | |
17 | val commentPos = ref 0 | |
18 | in | |
19 | fun enterComment yypos = | |
20 | ( commentLevel := !commentLevel + 1 ; | |
21 | commentPos := yypos ) | |
22 | ||
23 | fun exitComment () = | |
24 | ( commentLevel := !commentLevel - 1 ; | |
25 | !commentLevel = 0 ) | |
26 | ||
27 | fun number (yyt, yyp) = | |
28 | let | |
29 | val ext = ParseState.ext (yyp, yyp + size yyt) | |
30 | val numOpt = Word32Signed.fromString yyt | |
31 | handle Overflow => | |
32 | ( ErrorMsg.error ext | |
33 | ("integral constant `" ^ yyt ^ "' too large") ; | |
34 | NONE ) | |
35 | in | |
36 | case numOpt | |
37 | of NONE => ( ErrorMsg.error ext | |
38 | ("cannot parse integral constant `" ^ yyt ^ "'"); | |
39 | Tokens.INTNUM (Word32Signed.ZERO, yyp, yyp + size yyt) ) | |
40 | | SOME n => Tokens.INTNUM (n,yyp,yyp + size yyt) | |
41 | end | |
42 | ||
43 | fun eof () = | |
44 | ( if (!commentLevel > 0) | |
45 | then (ErrorMsg.error (ParseState.ext (!commentPos,!commentPos)) "unterminated comment") | |
46 | else (); | |
47 | Tokens.EOF (0,0) ) (* bogus position information; unused *) | |
48 | ||
49 | end | |
50 | ||
51 | %% | |
52 | %header (functor L1LexFn(structure Tokens : L1_TOKENS)); | |
53 | %full | |
54 | %s COMMENT COMMENT_LINE; | |
55 | ||
56 | id = [A-Za-z_][A-Za-z0-9_]*; | |
57 | decnum = [0-9][0-9]*; | |
58 | ||
59 | ws = [\ \t\012]; | |
60 | ||
61 | %% | |
62 | ||
63 | <INITIAL> {ws}+ => (lex ()); | |
64 | <INITIAL> \n => (ParseState.newline(yypos); lex()); | |
65 | ||
66 | <INITIAL> "{" => (Tokens.LBRACE (yypos, yypos + size yytext)); | |
67 | <INITIAL> "}" => (Tokens.RBRACE (yypos, yypos + size yytext)); | |
68 | <INITIAL> "(" => (Tokens.LPAREN (yypos, yypos + size yytext)); | |
69 | <INITIAL> ")" => (Tokens.RPAREN (yypos, yypos + size yytext)); | |
70 | ||
71 | <INITIAL> ";" => (Tokens.SEMI (yypos, yypos + size yytext)); | |
72 | ||
73 | <INITIAL> "=" => (Tokens.ASSIGN (yypos, yypos + size yytext)); | |
74 | <INITIAL> "+=" => (Tokens.PLUSEQ (yypos, yypos + size yytext)); | |
75 | <INITIAL> "-=" => (Tokens.MINUSEQ (yypos, yypos + size yytext)); | |
76 | <INITIAL> "*=" => (Tokens.STAREQ (yypos, yypos + size yytext)); | |
77 | <INITIAL> "/=" => (Tokens.SLASHEQ (yypos, yypos + size yytext)); | |
78 | <INITIAL> "%=" => (Tokens.PERCENTEQ (yypos, yypos + size yytext)); | |
79 | ||
80 | <INITIAL> "+" => (Tokens.PLUS (yypos, yypos + size yytext)); | |
81 | <INITIAL> "-" => (Tokens.MINUS (yypos, yypos + size yytext)); | |
82 | <INITIAL> "*" => (Tokens.STAR (yypos, yypos + size yytext)); | |
83 | <INITIAL> "/" => (Tokens.SLASH (yypos, yypos + size yytext)); | |
84 | <INITIAL> "%" => (Tokens.PERCENT (yypos, yypos + size yytext)); | |
85 | ||
86 | <INITIAL> "return" => (Tokens.RETURN (yypos, yypos + size yytext)); | |
87 | ||
88 | <INITIAL> {decnum} => (number (yytext, yypos)); | |
89 | ||
90 | <INITIAL> {id} => (let | |
91 | val id = Symbol.symbol yytext | |
92 | in | |
93 | Tokens.IDENT (id, yypos, yypos + size yytext) | |
94 | end); | |
95 | ||
96 | <INITIAL> "/*" => (YYBEGIN COMMENT; enterComment yypos; lex()); | |
97 | <INITIAL> "*/" => (ErrorMsg.error (ParseState.ext (yypos, yypos)) "unbalanced comments"; | |
98 | lex()); | |
99 | ||
100 | <INITIAL> "//" => (YYBEGIN COMMENT_LINE; lex()); | |
101 | <INITIAL> "#" => (YYBEGIN COMMENT_LINE; lex()); | |
102 | <INITIAL> . => (ErrorMsg.error (ParseState.ext (yypos,yypos)) | |
103 | ("illegal character: \"" ^ yytext ^ "\""); | |
104 | lex ()); | |
105 | ||
106 | <COMMENT> "/*" => (enterComment yypos; lex()); | |
107 | <COMMENT> "*/" => (if exitComment () then YYBEGIN INITIAL else (); lex()); | |
108 | <COMMENT> \n => (ParseState.newline yypos; lex ()); | |
109 | <COMMENT> . => (lex()); | |
110 | ||
111 | <COMMENT_LINE> \n => (ParseState.newline yypos; YYBEGIN INITIAL; lex()); | |
112 | <COMMENT_LINE> . => (lex()); |