From 2ab9671fde5297fc59583361f152e812e66c2d17 Mon Sep 17 00:00:00 2001 From: Joshua Wise Date: Sat, 10 Jul 2010 00:03:01 -0400 Subject: [PATCH] Add strings to type system and parser/lexer --- parse/ast.sml | 3 +++ parse/l5.grm | 5 ++++- parse/l5.lex | 17 ++++++++++++++++- trans/trans.sml | 12 +++++++++--- type/type.sml | 30 ++++++++++++++++++------------ type/typechecker.sml | 3 +++ 6 files changed, 53 insertions(+), 17 deletions(-) diff --git a/parse/ast.sml b/parse/ast.sml index 75f7042..ce756ab 100644 --- a/parse/ast.sml +++ b/parse/ast.sml @@ -39,6 +39,7 @@ sig datatype exp = Var of ident | ConstExp of Word32.word + | StringExp of string | OpExp of oper * exp list | Marked of (* Kane *) exp Mark.marked | FuncCall of ident * (exp list) @@ -109,6 +110,7 @@ struct datatype exp = Var of ident | ConstExp of Word32.word + | StringExp of string | OpExp of oper * exp list | Marked of exp Mark.marked | FuncCall of ident * (exp list) @@ -171,6 +173,7 @@ struct fun pp_exp (Var(id)) = pp_ident id | pp_exp (ConstExp(c)) = Word32Signed.toString c + | pp_exp (StringExp(s)) = "\"" ^ s ^ "\"" | pp_exp (OpExp(oper, [e])) = pp_oper oper ^ "(" ^ pp_exp e ^ ")" | pp_exp (OpExp(oper, [e1,e2])) = diff --git a/parse/l5.grm b/parse/l5.grm index 7195271..39f33cd 100644 --- a/parse/l5.grm +++ b/parse/l5.grm @@ -35,6 +35,7 @@ fun make_lval (A.Var(id)) ext = id | SEMI | INTNUM of Word32.word | IDENT of Symbol.symbol + | STRING of string | RETURN | PLUS | MINUS | STAR | SLASH | PERCENT | LSH | RSH | LOGOR | LOGAND | BITAND | BITXOR | BITOR | BITNOT | BANG | ASSIGN | PLUSEQ | MINUSEQ | STAREQ | SLASHEQ | PERCENTEQ | LSHEQ | RSHEQ | BITANDEQ | BITXOREQ | BITOREQ @@ -43,7 +44,7 @@ fun make_lval (A.Var(id)) ext = id | LBRACE | RBRACE | LPAREN | RPAREN | UNARY | ASNOP (* dummy *) - | EXTERN | VAR | INT | QUESTION | COLON | COMMA | STRUCT | NULL | LBRACKET | RBRACKET | ARROW | DOT | NEW + | EXTERN | VAR | INT | TSTRING | QUESTION | COLON | COMMA | STRUCT | NULL | LBRACKET | RBRACKET | ARROW | DOT | NEW | PLUSPLUS | MINUSMINUS %nonterm @@ -103,6 +104,7 @@ programx : decls (decls) | programx function (AUP.append_function programx function) vtype : INT (T.Int) + | TSTRING (T.String) | IDENT (T.Typedef IDENT) | vtype STAR (T.Pointer vtype) | vtype LBRACKET RBRACKET @@ -184,6 +186,7 @@ block : stm ([stm]) exp : LPAREN exp RPAREN (exp) | INTNUM (mark (A.ConstExp(INTNUM),(INTNUMleft,INTNUMright))) + | STRING (mark (A.StringExp(STRING),(STRINGleft,STRINGright))) | IDENT (mark (A.Var(IDENT), (IDENTleft,IDENTright))) | exp DOT IDENT (mark (A.Member(exp, IDENT), (expleft, IDENTright))) | exp ARROW IDENT (mark (A.DerefMember(exp, IDENT), (expleft, IDENTright))) diff --git a/parse/l5.lex b/parse/l5.lex index e10f8b7..3028a6c 100644 --- a/parse/l5.lex +++ b/parse/l5.lex @@ -17,6 +17,9 @@ type lexresult = (svalue,pos) Tokens.token local val commentLevel = ref 0 val commentPos = ref 0 + val inString = ref false + val stringPos = ref 0 + val stringAcc : string list ref = ref [] (* :( *) in fun enterComment yypos = ( commentLevel := !commentLevel + 1 ; @@ -62,14 +65,20 @@ in ( if (!commentLevel > 0) then (ErrorMsg.error (ParseState.ext (!commentPos,!commentPos)) "unterminated comment") else (); + if (!inString) + then (ErrorMsg.error (ParseState.ext (!stringPos,!stringPos)) "unterminated string") + else (); Tokens.EOF (0,0) ) (* bogus position information; unused *) + fun newString yyp = ( inString := true; stringPos := yyp; stringAcc := [] ) + fun endString yyp = ( Tokens.STRING (concat (rev (!stringAcc)), !stringPos, yyp+1) ) + fun addString yyt = ( inString := false; stringAcc := yyt :: (!stringAcc) ) end %% %header (functor L5LexFn(structure Tokens : L5_TOKENS)); %full -%s COMMENT COMMENT_LINE; +%s COMMENT COMMENT_LINE STRING; id = [A-Za-z_][A-Za-z0-9_]*; decnum = [0-9][0-9]*; @@ -143,6 +152,7 @@ ws = [\ \t\012]; "else" => (Tokens.ELSE (yypos, yypos + size yytext)); "var" => (Tokens.VAR (yypos, yypos + size yytext)); "int" => (Tokens.INT (yypos, yypos + size yytext)); + "string" => (Tokens.TSTRING (yypos, yypos + size yytext)); "extern" => (Tokens.EXTERN (yypos, yypos + size yytext)); "struct" => (Tokens.STRUCT (yypos, yypos + size yytext)); "NULL" => (Tokens.NULL (yypos, yypos + size yytext)); @@ -164,10 +174,12 @@ ws = [\ \t\012]; "//" => (YYBEGIN COMMENT_LINE; lex()); "#" => (YYBEGIN COMMENT_LINE; lex()); + "\"" => (YYBEGIN STRING; newString yypos ; lex () ); . => (ErrorMsg.error (ParseState.ext (yypos,yypos)) ("illegal character: \"" ^ yytext ^ "\""); lex ()); + "/*" => (enterComment yypos; lex()); "*/" => (if exitComment () then YYBEGIN INITIAL else (); lex()); \n => (ParseState.newline yypos; lex ()); @@ -175,3 +187,6 @@ ws = [\ \t\012]; \n => (ParseState.newline yypos; YYBEGIN INITIAL; lex()); . => (lex()); + + [^\"\\]* => (addString yytext ; lex() ); + "\"" => (YYBEGIN INITIAL; endString yypos ); diff --git a/trans/trans.sml b/trans/trans.sml index d8807f0..73968d9 100644 --- a/trans/trans.sml +++ b/trans/trans.sml @@ -129,8 +129,12 @@ struct | trans_exp env vartypes (A.ArrIndex(exp1, exp2)) = let val asubk = T.BINOP(T.ADD, trans_exp env vartypes exp1, - T.BINOP(T.MUL, trans_exp env vartypes exp2, - T.CONST(Word32.fromInt(sizeof (deref (typeof' vartypes exp1)))))) + if sizeof (deref (typeof' vartypes exp1)) = 1 + then trans_exp env vartypes exp2 + else T.BINOP(T.MUL, trans_exp env vartypes exp2, + T.CONST(Word32.fromInt(sizeof (deref (typeof' vartypes exp1)))) + ) + ) val tipo = deref (typeof' vartypes exp1) val d = if not (Flag.isset Flags.safe) @@ -160,7 +164,9 @@ struct end | trans_exp env vartypes (A.NewArr(tipo, exp)) = let - val size = T.BINOP(T.MUL, trans_exp env vartypes exp, T.CONST(Word32.fromInt(sizeof tipo))) + val size = if (sizeof tipo) = 1 + then trans_exp env vartypes exp + else T.BINOP(T.MUL, trans_exp env vartypes exp, T.CONST(Word32.fromInt(sizeof tipo))) val t1 = T.TEMP (Temp.new "allocated address") val ts = T.TEMP (Temp.new "size") in diff --git a/type/type.sml b/type/type.sml index 69f35f8..82bc8b1 100644 --- a/type/type.sml +++ b/type/type.sml @@ -1,7 +1,7 @@ signature TYPE = sig type ident = Symbol.symbol - datatype vtype = Int | Typedef of ident | Pointer of vtype | Array of vtype | TNull + datatype vtype = Int | String | Typedef of ident | Pointer of vtype | Array of vtype | TNull type variable = ident * vtype datatype typedef = MarkedTypedef of typedef Mark.marked | Struct of variable list @@ -28,14 +28,15 @@ end structure Type :> TYPE = struct type ident = Symbol.symbol - datatype vtype = Int | Typedef of ident | Pointer of vtype | Array of vtype | TNull + datatype vtype = Int | String | Typedef of ident | Pointer of vtype | Array of vtype | TNull type variable = ident * vtype datatype typedef = MarkedTypedef of typedef Mark.marked | Struct of variable list - fun size (Int) = 4 - | size (Pointer _) = 8 - | size (Array _) = 8 - | size (TNull) = 8 + fun size (Int) = 1 + | size (String) = 1 + | size (Pointer _) = 1 + | size (Array _) = 1 + | size (TNull) = 1 | size _ = raise ErrorMsg.InternalError "Type.size on non-small type..." (************************************************) @@ -48,9 +49,10 @@ struct (* determine size of items *) fun sizeof_reset () = ( size_memotable := Symbol.empty ) fun alignment_reset () = ( align_memotable := Symbol.empty ) - fun sizeof _ (Int) = 4 - | sizeof _ (Pointer _) = 8 - | sizeof _ (Array _) = 8 + fun sizeof _ (Int) = 1 + | sizeof _ (String) = 1 + | sizeof _ (Pointer _) = 1 + | sizeof _ (Array _) = 1 | sizeof _ (TNull) = raise ErrorMsg.InternalError "Type.sizeof on TNull?" | sizeof d (Typedef id) = (case (Symbol.look (!size_memotable) id) @@ -70,9 +72,10 @@ struct | sizeof_s d (MarkedTypedef(a)) = sizeof_s d (Mark.data a) (* determine alignment of items *) - and alignment _ (Int) = 4 - | alignment _ (Pointer _) = 8 - | alignment _ (Array _) = 8 + and alignment _ (Int) = 1 + | alignment _ (String) = 1 + | alignment _ (Pointer _) = 1 + | alignment _ (Array _) = 1 | alignment d (Typedef id) = (case Symbol.look (!align_memotable) id of SOME(r) => r @@ -104,12 +107,14 @@ struct fun issmall (Int) = true + | issmall (String) = true | issmall (Pointer _) = true | issmall (Array _) = true | issmall (TNull) = true | issmall _ = false fun typeeq (Int, Int) = true + | typeeq (String, String) = true | typeeq (Typedef a, Typedef b) = (Symbol.name a) = (Symbol.name b) | typeeq (Pointer a, Pointer b) = typeeq (a, b) | typeeq (Array a, Array b) = typeeq (a, b) @@ -131,6 +136,7 @@ struct fun pp_ident i = Symbol.name i fun pp_type (Int) = "int" + | pp_type (String) = "string" | pp_type (Pointer t) = pp_type t ^ "*" | pp_type (Array t) = pp_type t ^ "[]" | pp_type (TNull) = "{NULL type}" diff --git a/type/typechecker.sml b/type/typechecker.sml index 06c6d89..8be3731 100644 --- a/type/typechecker.sml +++ b/type/typechecker.sml @@ -25,6 +25,7 @@ struct of NONE => (ErrorMsg.error mark ("variable `"^(Symbol.name a)^"' not declared here") ; raise ErrorMsg.Error) | SOME t => t) | A.ConstExp _ => T.Int + | A.StringExp _ => T.String | A.OpExp (A.EQ, [a, b]) => (case (typeof (tds, funcs) vars mark a, typeof (tds, funcs) vars mark b) of (T.Int, T.Int) => T.Int (* You shall pass! *) @@ -207,6 +208,7 @@ struct raise ErrorMsg.Error ) | SOME ASSIGNED => ()) | varcheck_exp env (A.ConstExp _) mark = () + | varcheck_exp env (A.StringExp _) mark = () | varcheck_exp env (A.OpExp (_, l)) mark = List.app (fn znt => varcheck_exp env znt mark) l | varcheck_exp env (A.FuncCall (f, l)) mark = List.app (fn znt => varcheck_exp env znt mark) l | varcheck_exp env (A.Marked m) mark = varcheck_exp env (Mark.kane m) (Mark.ext m) @@ -408,6 +410,7 @@ struct (* XXX does not check big vs. small types *) fun typecheck_type (tds, funcs) mark T.Int = () + | typecheck_type (tds, funcs) mark T.String = () | typecheck_type (tds, funcs) mark T.TNull = () | typecheck_type (tds, funcs) mark (T.Pointer t) = typecheck_type (tds, funcs) mark t | typecheck_type (tds, funcs) mark (T.Array t) = typecheck_type (tds, funcs) mark t -- 2.39.2