From 12aa4087bee3e70f170d7457794921de4e385227 Mon Sep 17 00:00:00 2001 From: Joshua Wise Date: Wed, 13 May 2009 21:46:50 -0400 Subject: [PATCH] Initial import of l1c --- Makefile | 17 ++ README | 144 ++++++++++ bin/create-tags | 7 + bin/l1c | 1 + codegen/codegen.sml | 57 ++++ codegen/coloring.sml | 80 ++++++ codegen/colororder.sml | 75 ++++++ codegen/igraph.sml | 71 +++++ codegen/liveness.sml | 69 +++++ codegen/peephole.sml | 44 +++ codegen/solidify.sml | 139 ++++++++++ codegen/stringifier.sml | 39 +++ codegen/x86.sml | 140 ++++++++++ compile-l1c.sml | 7 + parse/ast.sml | 100 +++++++ parse/l1.grm | 99 +++++++ parse/l1.lex | 112 ++++++++ parse/l1.lex.sml | 581 ++++++++++++++++++++++++++++++++++++++++ parse/parse.sml | 49 ++++ parse/parsestate.sml | 59 ++++ sources.cm | 35 +++ top/top.sml | 147 ++++++++++ trans/temp.sml | 33 +++ trans/trans.sml | 54 ++++ trans/tree.sml | 68 +++++ type/typechecker.sml | 49 ++++ util/errormsg.sml | 44 +++ util/flag.sml | 51 ++++ util/mark.sml | 96 +++++++ util/safe-io.sml | 61 +++++ util/symbol.sml | 125 +++++++++ util/word32.sml | 41 +++ 32 files changed, 2694 insertions(+) create mode 100644 Makefile create mode 100644 README create mode 100755 bin/create-tags create mode 100755 bin/l1c create mode 100644 codegen/codegen.sml create mode 100644 codegen/coloring.sml create mode 100644 codegen/colororder.sml create mode 100644 codegen/igraph.sml create mode 100644 codegen/liveness.sml create mode 100644 codegen/peephole.sml create mode 100644 codegen/solidify.sml create mode 100644 codegen/stringifier.sml create mode 100644 codegen/x86.sml create mode 100644 compile-l1c.sml create mode 100644 parse/ast.sml create mode 100644 parse/l1.grm create mode 100644 parse/l1.lex create mode 100644 parse/l1.lex.sml create mode 100644 parse/parse.sml create mode 100644 parse/parsestate.sml create mode 100644 sources.cm create mode 100644 top/top.sml create mode 100644 trans/temp.sml create mode 100644 trans/trans.sml create mode 100644 trans/tree.sml create mode 100644 type/typechecker.sml create mode 100644 util/errormsg.sml create mode 100644 util/flag.sml create mode 100644 util/mark.sml create mode 100644 util/safe-io.sml create mode 100644 util/symbol.sml create mode 100644 util/word32.sml diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..ca58a0b --- /dev/null +++ b/Makefile @@ -0,0 +1,17 @@ +# the following are SML-NJ specific defines +SML = sml + +l1c: FORCE + echo 'use "compile-l1c.sml";' | ${SML} + +clean: + find . -type d -name .cm | xargs rm -rf + ${RM} parse/*.lex.* parse/*.grm.* + find . -type f | grep '~$$' | xargs ${RM} + ${RM} bin/l1c.heap.* + +TAGS: clean + ${RM} TAGS + bin/create-tags *.cm *.sml */*.lex */*.grm */*.sml + +FORCE: diff --git a/README b/README new file mode 100644 index 0000000..0e3bf3d --- /dev/null +++ b/README @@ -0,0 +1,144 @@ +(* README + * Author: Frank Pfenning + *) + +----------------------------------------------------------------------- +Welcome to 15-411 F08! +----------------------------------------------------------------------- + +This is some starter code for the L1 compiler you have to build for +the Lab1. It contains a lexer, parser, translator, and even a code +generator, except that the code generator creates pseudo assembly +language with fictitious instructions and an unlimited number of +registers. We took some care to use good style (according to the +instructor); you may consider this a model for your own coding. Feel +free to modify any and all of this code as you see fit. + +Bug reports to the instructor Frank Pfenning are +particularly welcome and will be noted in the extra credit category. + +----------------------------------------------------------------------- +SML Notes +----------------------------------------------------------------------- +There are many different compilers for SML, perhaps the most +popular ones are + + SML/NJ -- http://www.smlnj.org/ + MLton -- http://www.mlton.org/ + Poly/ML -- http://www.polyml.org/ + +In this class we will be using SML/NJ v110.59. Please make sure your +code compiles under specifically this version on the lab machines +where it is the default and can be invoked simply with "sml" in a +shell. + +If you develop your implementation on other machines, similar versions +of SML/NJ are likely to be compatible, but you should certainly check +your code on the lab machines. + +For (almost universal) Standard Basis Libraries, see +http://www.standardml.org/Basis/index.html. Further resources, such +as documentation for ML-Lex and ML-Yacc, and documentation for the SML/NJ +specific libraries which are used in the starter code, can be found at + + http://www.cs.cmu.edu/~fp/courses/15411-f08/resources.html + +------------------------------------------------------------------------ +Source Files +------------------------------------------------------------------------ +The following are the source files for the L1 compiler + +README -- this file + +Makefile -- makefile for the compiler + For a quick test + + % make l1c (generates file bin/l1c.heap.) + % bin/l1c --verbose ../tests/test1.c + + should generate ../tests/test1.s in pseudo assembly + + % make clean (removes generated files) + % make TAGS (creates file TAGS, for Emacs tags commands) + +compile-l1c.sml -- SML commands that will create bin/l1c.heap. +bin/l1c -- the script that will run the exported SML heap + +sources.cm -- lists all source files, including libraries, + and lexer and grammar specifications + For a quick test + + % sml + - CM.make "sources.cm"; + - Top.test "--verbose ../tests/test1.c"; + + should generate ../tests/test1.s in pseudo assembly + +parse/ast.sml -- definition and printer for abstract syntax trees (AST's) +parse/l1.lex -- L1 lexer +parse/l1.grm -- L1 grammar +parse/parse.sml -- L1 parser +parse/parsestate.sml -- L1 parser support for error messages + +type/typechecker.sml -- (trivial) type-checker for AST + +trans/temp.sml -- functions to generate and track temp's +trans/tree.sml -- definition and pretty printer for IR trees +trans/trans.sml -- translation from AST to IR trees + +codegen/assem.sml -- pseudo assembly format for this starter code +codegen/codegen.sml -- pseudo code generator + +util/errormsg.sml -- error message utilities +util/flag.sml -- library for defining flags +util/mark.sml -- library for tracking source file positions +util/safe-io.sml -- I/O utilities +util/symbol.sml -- symbol table library +util/word32.sml -- machine word utilities for two's complement interpretation + +top/top.sml -- top level function for export to binary and testing + +------------------------------------------------------------------------ +Debugging Hints +------------------------------------------------------------------------ +You can use + + - Top.test "--verbose --dump-ast --dump-ir --dump-assem file.l1"; + +to print information from all the phases of the current compiler. + +If you want to see the internal representations, you can call directly +on SML's top level: + + - val ast = Parse.parse "file.l1"; + - val ir = Trans.translate ast; + - val assem = Codegen.codgen ir; + +This will use SML's internal printing function to print the data +structures. However, not everything will show. + +"-" means that the type is opaque. Sometimes you can replace an opaque + signature ascription ":>" with a transparent one ":" to see the info. + For reasons of general hygiene, however, you should change it back + before handing in. + +"#" means that the printing depth is exceeded. Use + + - Control.Print.printDepth := 100; + + to increase the depth if you need to see more. + +"..." means that the printing length is exceeded. Use + + - Control.Print.printLength := 1000; + + to increase the length if you need to see more. + +------------------------------------------------------------------------ +Library Hints +------------------------------------------------------------------------ +See util/symbol.sml for some uses of libraries provided with SML/NJ +(and some other SML implementations). BinaryMapFn and +BinarySetFn are likely of general use. To see their interface, +you can check http://www.smlnj.org/doc/smlnj-lib/Manual/toc.html. +I found binary maps and binary sets to be occasionally helpful. diff --git a/bin/create-tags b/bin/create-tags new file mode 100755 index 0000000..521990e --- /dev/null +++ b/bin/create-tags @@ -0,0 +1,7 @@ +#! /bin/csh -f +# +if (-e TAGS) rm -i TAGS +foreach f ($*) + echo " " >> TAGS + echo "$f,0" >> TAGS + end diff --git a/bin/l1c b/bin/l1c new file mode 100755 index 0000000..f86abd5 --- /dev/null +++ b/bin/l1c @@ -0,0 +1 @@ +sml @SMLcmdname=$0 @SMLload=bin/l1c.heap.x86-linux $* diff --git a/codegen/codegen.sml b/codegen/codegen.sml new file mode 100644 index 0000000..0297b9f --- /dev/null +++ b/codegen/codegen.sml @@ -0,0 +1,57 @@ +(* L1 Compiler + * Assembly Code Generator for FAKE assembly + * Author: Alex Vaynberg + * Based on code by: Kaustuv Chaudhuri + * Modified: Frank Pfenning + * + * Implements a "convenient munch" algorithm + *) + +signature CODEGEN = +sig + val codegen : Tree.stm list -> x86.insn list +end + +structure Codegen :> CODEGEN = +struct + structure T = Tree + structure X = x86 + + (* munch_exp : prex86oper -> T.exp -> prex86insn list *) + (* munch_exp d e + * generates instructions to achieve d <- e + * d must be TEMP(t) or REG(r) + *) + fun munch_exp d (T.CONST(n)) = [X.MOVL(d, X.CONST n)] + | munch_exp d (T.TEMP(t)) = [X.MOVL(d, X.TEMP t)] + | munch_exp d (T.BINOP(T.ADD, e1, T.CONST n)) = (munch_exp d e1) @ [X.ADDL(d, X.CONST n)] + | munch_exp d (T.BINOP(T.ADD, T.CONST n, e1)) = (munch_exp d e1) @ [X.ADDL(d, X.CONST n)] + | munch_exp d (T.BINOP(T.ADD, e1, e2)) = let val t1 = Temp.new () in (munch_exp d e1) @ (munch_exp (X.TEMP t1) e2) @ [X.ADDL(d, X.TEMP t1)] end + | munch_exp d (T.BINOP(T.SUB, T.CONST 0w0, e1)) = (munch_exp d e1) @ [X.NEG d] + | munch_exp d (T.BINOP(T.SUB, e1, T.CONST(n))) = (munch_exp d e1) @ [X.SUBL(d, X.CONST n)] + | munch_exp d (T.BINOP(T.SUB, e1, e2)) = let val t1 = Temp.new () in (munch_exp d e1) @ (munch_exp (X.TEMP t1) e2) @ [X.SUBL(d, X.TEMP t1)] end + | munch_exp d (T.BINOP(T.MUL, T.TEMP t, T.CONST n)) = [X.IMUL3(d, X.TEMP t, n)] + | munch_exp d (T.BINOP(T.MUL, T.CONST n, T.TEMP t)) = [X.IMUL3(d, X.TEMP t, n)] + | munch_exp d (T.BINOP(T.MUL, e1, T.CONST n)) = (munch_exp d e1) @ [X.IMUL(d, X.CONST n)] + | munch_exp d (T.BINOP(T.MUL, T.CONST n, e1)) = (munch_exp d e1) @ [X.IMUL(d, X.CONST n)] + | munch_exp d (T.BINOP(T.MUL, e1, e2)) = let val t1 = Temp.new () in (munch_exp d e1) @ (munch_exp (X.TEMP t1) e2) @ [X.IMUL(d, X.TEMP t1)] end + | munch_exp d (T.BINOP(T.DIV, e1, e2)) = let val t1 = Temp.new () in (munch_exp (X.TEMP t1) e1) @ (munch_exp d e2) @ [X.MOVL (X.REG X.EAX, X.TEMP t1), X.CLTD, X.IDIVL d, X.MOVL (d, X.REG X.EAX)] end + | munch_exp d (T.BINOP(T.MOD, e1, e2)) = let val t1 = Temp.new () in (munch_exp (X.TEMP t1) e1) @ (munch_exp d e2) @ [X.MOVL (X.REG X.EAX, X.TEMP t1), X.CLTD, X.IDIVL d, X.MOVL (d, X.REG X.EDX)] end + + (* munch_stm : T.stm -> AS.instr list *) + (* munch_stm stm generates code to execute stm *) + fun munch_stm (T.MOVE(T.TEMP(t1), e2)) = + munch_exp (X.TEMP t1) e2 + | munch_stm (T.MOVE(_, _)) = + raise ErrorMsg.InternalError "Incorrect first operand for T.MOVE?" + | munch_stm (T.RETURN(e)) = + let + val t = Temp.new () + in + munch_exp (X.TEMP t) e + @ [X.MOVL(X.REG X.EAX, X.TEMP t), X.RET] + end + + fun codegen nil = nil + | codegen (stm::stms) = munch_stm stm @ codegen stms +end diff --git a/codegen/coloring.sml b/codegen/coloring.sml new file mode 100644 index 0000000..e1c2bcd --- /dev/null +++ b/codegen/coloring.sml @@ -0,0 +1,80 @@ +(* colorizer + * Gathers tiberium, fires rockets + * colors a graph and returns a list of nodes with associated colors + * Author: Chris Lu + *) + +signature COLORIZER = +sig + type tiberium = Temp.temp list + type colorlist = (Temp.temp * int) list + type igraph = (Temp.temp * x86.oper list) list + + val colorize : tiberium -> igraph -> colorlist +end + +structure Colorizer :> COLORIZER = +struct + type tiberium = Temp.temp list + type colorlist = (Temp.temp * int) list + type igraph = (Temp.temp * x86.oper list) list + + structure X = x86 + + (* val color_single : igraph -> Temp.temp * colorlist -> colorlist + * color_single graph (temp, regs) => takes an interference graph, the temp to be colored, and the + * already-colored nodes, colors the temp, and adds it to the list + * this is a helper function for the foldr in colorize + *) + fun color_single (graph: igraph) (temp, regs) = + let + (* Grab the list of interfering operands from the graph *) + val interfere = case List.find (fn (temp',_) => Temp.compare (temp', temp) = EQUAL) graph + of SOME(_, l) => l + | NONE => raise ErrorMsg.InternalError "Temporary not found in graph" + + (* Grab the subset of those that are already colorized *) + val colorized = + List.filter + (fn (t,_) => + List.exists + (fn X.TEMP t' => Temp.compare (t, t') = EQUAL + | _ => false) + interfere + ) regs + + (* Grab the subset of those that are fixed colors *) + val fixeds = + List.filter + (fn X.REG _ => true + | _ => false) + interfere + + (* Grab the register number of the already-colorized ones *) + val ints = + (List.map + (fn (_,i) => i) + colorized) + @ (List.map + (fn X.REG X.EAX => 0 + | X.REG X.EDX => 3 + | _ => raise ErrorMsg.InternalError "Bad kind of specreg") + fixeds) + (* Greedy-colorize -- pick the lowest number that isn't used by a neighbor *) + fun greedy i l = + if (List.exists (fn a => a = i) l) + then greedy (i+1) l + else i + + val newcolor = greedy 0 ints + val () = print (" Assigned color "^(Int.toString newcolor)^" to temp "^(Temp.name temp)^"\n") + in + (temp, (greedy 0 ints)) :: regs + end + + (* val colorize : tiberium -> igraph -> colorlist + * colorizes a graph given the graph representation and the order in which to color + * nodes, returns a list of nodes numbered with their respective color *) + fun colorize order graph = foldl (color_single graph) nil order + +end diff --git a/codegen/colororder.sml b/codegen/colororder.sml new file mode 100644 index 0000000..588086d --- /dev/null +++ b/codegen/colororder.sml @@ -0,0 +1,75 @@ +(* L1 Compiler + * Gathers tiberium, fires rockets + * Takes a interference graph and generates an ordering for coloring + * Author: Joshua Wise + *) + +signature COLORORDER = +sig + type tiberium = (Temp.temp * x86.oper list) list + type rockets = Temp.temp list + + val colororder : tiberium -> rockets +end + +structure ColorOrder :> COLORORDER = +struct + structure T = Temp + structure X = x86 + + type tiberium = (Temp.temp * x86.oper list) list + type rockets = Temp.temp list + + fun colororder (graph : tiberium) : rockets = + let + val () = print ("Ordering colors...\n"); + val initialWeights = map (fn (t, _) => (t, 0)) graph + + fun sortWeights weights = (* Sort the weights such that the largest is at left, ready to be grabbed. *) + ListMergeSort.sort (fn ((_, a), (_, b)) => a < b) weights + + (* Chooses one temporary to pick, and updates the weights. *) + fun orderOne (weights : (Temp.temp * int) list) : Temp.temp * (Temp.temp * int) list = + let + val sorted = sortWeights weights + val (chosen, w) = List.hd sorted (* Grab the temp with the highest weight. *) + val () = print (" Chose "^(Temp.name chosen)^" with weight "^(Int.toString w)^"\n"); + val remaining = List.tl sorted + val neighbors = (* Grab all the neighbors for some given temp. *) + List.hd + (List.map (fn (_, neighbors) => neighbors) + (List.filter (fn (t, _) => T.compare (t, chosen) = EQUAL) graph)) + val () = List.app + (fn (X.TEMP t) => (print (" Neighbor "^(Temp.name t)^"\n")) + | (X.REG X.EAX) => (print " Fixed color EAX\n") + | (X.REG X.EDX) => (print " Fixed color EDX\n") + | _ => raise ErrorMsg.InternalError "Unknown neighbor type -- const?" + ) neighbors; + val newWeights = + List.map + (fn (t, wt) => + (t, + if (List.exists + (fn X.TEMP t' => (T.compare (t, t') = EQUAL) + | _ => false) + neighbors) + then (wt + 1) + else wt + ) + ) remaining + in + (chosen, newWeights) + end + + (* Recursively order until we run out of things to order. *) + fun keepOrdering (nil : (Temp.temp * int) list) : Temp.temp list = nil + | keepOrdering (weights) = + let + val (chosen, newWeights) = orderOne weights + in + chosen :: (keepOrdering newWeights) + end + in + (keepOrdering initialWeights) + end +end diff --git a/codegen/igraph.sml b/codegen/igraph.sml new file mode 100644 index 0000000..b11fd0d --- /dev/null +++ b/codegen/igraph.sml @@ -0,0 +1,71 @@ +(* interference graph generator + * Gathers tiberium, fires rockets + * Takes a list of interfering temps and generates the interference graph + * Author: Chris Lu + *) + +signature IGRAPH = +sig + type tiberium = x86.oper list list + type rockets = (Temp.temp * x86.oper list) list + val gengraph : tiberium -> rockets +end + +structure Igraph :> IGRAPH = +struct + type tiberium = x86.oper list list + type rockets = (Temp.temp * x86.oper list) list + structure X = x86 + + (* val canonicalize : rockets -> rockets + * canonicalize a => puts a in the canonical form by eliminating repeat nodes and edges + * does so by sorting them first, then eliminating consecutive temps + *) + fun canonicalize orig = + let + val sorig = ListMergeSort.sort (fn ((a,_),(b,_)) => X.cmpoper (a,b) = LESS) orig + fun merge ((x, xl)::(y, yl)::rl) = (case X.cmpoper (x,y) of EQUAL => merge ((x, xl @ yl)::rl) | _ => (x, xl) :: merge ((y, yl)::rl)) + | merge (a::nil) = [a] + | merge nil = nil + val ml = merge sorig + fun uniq l = + let + val sl = ListMergeSort.sort (fn (a,b) => X.cmpoper (a,b) = LESS) l + fun merge' (x::y::rl) = (case X.cmpoper (x,y) of EQUAL => merge' (x::rl) | _ => x :: merge' (y::rl)) + | merge' (x::nil) = [x] + | merge' nil = nil + in + merge' sl + end + in + List.map (fn (a, x) => (a, uniq x)) ml + end + + (* val proc_one : Temp.temp list * rockets -> rockets + * helper function to convert a list of interfering registers to a graph + *) + fun proc_one x = + List.map + (fn item1 => (item1, (List.filter (fn item2 => X.cmpoper(item1, item2) <> EQUAL) x))) + x + + (* val gengraph : tiberium -> rockets + * generates the interference graph from a list of interfering temps + * by creating separate interference graphs for each line, concatenating them, + * and putting them in canonical form + *) + fun gengraph x = + let + val igraph' = canonicalize (List.concat (List.map proc_one x)) + in + foldr + (fn ((a,l),b) => case a + of X.REG(_) => b + | X.TEMP(t) => (t,l)::b + | _ => raise ErrorMsg.InternalError "I'm a doooodyyyheaadddd" + ) + nil + igraph' + end + +end diff --git a/codegen/liveness.sml b/codegen/liveness.sml new file mode 100644 index 0000000..34f5d47 --- /dev/null +++ b/codegen/liveness.sml @@ -0,0 +1,69 @@ +(* L1 Compiler + * Gathers tiberium, fires rockets + * Turns pseudoasm into liveness-annotated pseudoasm + * Author: Joshua Wise + *) + +signature LIVENESS = +sig + type tiberium = x86.insn list + type rockets = x86.oper list list + + val liveness : tiberium -> rockets +end + +structure Liveness :> LIVENESS = +struct + structure T = Temp + structure X = x86 + + type tiberium = x86.insn list + type rockets = x86.oper list list + + (* This does not 'follow the rules'. + * + * Since this is a straight line, we can just fold starting at the right, + * and accumulate variables. Thus, the accumulator has two parts: one to + * represent all of the line / liveness pairs that we've accumulated so far; + * and one to represent what was live at the previous line. + *) + fun mashinstr ((instr : x86.insn), (curtemps, output) : x86.oper list * rockets) : x86.oper list * rockets = + let + + (* Removes an element from a list. *) + fun blast (X.TEMP(elt)) l = + List.filter (fn a => case a of X.TEMP(b) => (T.compare (b, elt)) <> EQUAL | _ => true) l + | blast (X.REG(reg)) l = + List.filter (fn a => case a of X.REG(b) => b <> reg | _ => true) l + | blast _ l = raise ErrorMsg.InternalError "Why have we declared a CONST as live?" + + (* Adds an element to a list iff the element doesn't exist already. *) + fun addonce (X.CONST(_)) l = l + | addonce oper l = oper :: blast oper l + + val newtemps = + case instr + of X.DIRECTIVE(_) => curtemps + | X.COMMENT(_) => curtemps + | X.MOVL(dest, src) => addonce src (blast dest curtemps) + | X.SUBL(dest, src) => addonce src (addonce dest curtemps) + | X.IMUL(dest, src) => addonce src (addonce dest curtemps) + | X.IMUL3(dest, src, _) => addonce src (blast dest curtemps) + | X.ADDL(dest, src) => addonce src (addonce dest curtemps) + | X.LEAL(dest, src1, src2) => addonce src1 (addonce src2 (blast dest curtemps)) + | X.IDIVL(src) => addonce src (addonce (X.REG X.EAX) (addonce (X.REG X.EDX) curtemps)) + | X.CLTD => blast (X.REG X.EDX) (addonce (X.REG X.EAX) curtemps) + | X.NEG(src) => (* meh *) curtemps + | X.RET => addonce (X.REG X.EAX) curtemps +(* | _ => raise ErrorMsg.InternalError "Unable to compute liveness for unused instruction form";*) + in + (newtemps, newtemps :: output) + end + + fun liveness (instrs : tiberium) : rockets = + let + val (_, livelist) = foldr mashinstr (nil, nil) instrs + in + livelist + end +end diff --git a/codegen/peephole.sml b/codegen/peephole.sml new file mode 100644 index 0000000..cdbba9c --- /dev/null +++ b/codegen/peephole.sml @@ -0,0 +1,44 @@ +(* peephole optimizer + * Gathers tiberium, fires rockets + * optimizes away redundant insns such as: + mov a, b + mov a, b + + mov a, b + mov b, a + + mov a, a + + neg a + neg a + * Author: Chris Lu + *) + +signature PEEPHOLE = +sig + type tiberium = x86.insn list + type rockets = x86.insn list + val peephole : tiberium -> rockets +end + +structure Peephole :> PEEPHOLE = +struct + type tiberium = x86.insn list + type rockets = x86.insn list + structure X = x86 + + (* val peephole : tiberium -> rockets *) + + fun peephole ((insn1 as X.MOVL(a1,b1))::(insn2 as X.MOVL(a2,b2))::l) = + if(x86.opereq(a1, b1) orelse (x86.opereq(a1, a2) andalso x86.opereq(b1, b2))) then + peephole (insn2::l) + else if(x86.opereq(a2, b2) orelse (x86.opereq(a1, b2) andalso x86.opereq(b1, a2))) then + peephole (insn1::l) + else + insn1::(peephole (insn2::l)) + | peephole ((insn as X.MOVL(a,b))::l) = if x86.opereq(a, b) then peephole l else insn::(peephole l) + | peephole ((insn1 as X.NEG(a))::(insn2 as X.NEG(b))::l) = if x86.opereq(a, b) then peephole l else insn1::(peephole (insn2::l)) + | peephole (a::l) = a::(peephole l) + | peephole nil = nil + +end diff --git a/codegen/solidify.sml b/codegen/solidify.sml new file mode 100644 index 0000000..9d6269d --- /dev/null +++ b/codegen/solidify.sml @@ -0,0 +1,139 @@ +(* L1 Compiler + * Gathers tiberium, fires rockets + * Takes a list of mappings of temporaries to colors and a pseudoasm listing, + * then produces x86 code. + * Author: Joshua Wise + *) + +signature SOLIDIFY = +sig + type colorings = (Temp.temp * int) list + type asm = x86.insn list + + val solidify : colorings -> asm -> asm +end + +structure Solidify :> SOLIDIFY = +struct + structure X = x86 + structure T = Temp + + type colorings = (Temp.temp * int) list + type asm = x86.insn list + + exception Spilled + + fun solidify (regmap : colorings) (instrs : asm) : asm = + let + (* r14d and r15d is reserved for spilling *) + val maxreg = X.regtonum X.R13D + fun numtoreg n = + if (n > maxreg) + then raise Spilled + else X.numtoreg n + + fun temptonum (t: T.temp) : int = + (List.hd + (List.map (fn (_, n) => n) + (List.filter (fn (a, _) => (Temp.compare (a, t) = EQUAL)) regmap))) + + fun temptoreg (t: T.temp) : x86.reg = + numtoreg (temptonum t) + handle Empty => + (let + val () = print (" Uncolored temp "^(Temp.name t)^" -- dead code?\n") + in + X.R15D (*If we don't care about the output, then it is cool to explode this; R15D is guaranteed not to be used across builtin blocks.*) + end) + + val spillreg1 = X.R14D + val spillreg2 = X.R15D + + val prologue = [X.DIRECTIVE "\tpush %rbx\n\tpush %r12\n\tpush %r13\n\tpush %r14\n\tpush %r15"] (* Could be done better. *) + val epilogue = [X.DIRECTIVE "\tpop %r15\n\tpop %r14\n\tpop %r13\n\tpop %r12\n\tpop %rbx"] + + val numreg = foldr (Int.max) 0 (map (fn (_, n) => n) regmap) (* Number of registers used. *) + val nspilled = Int.max (numreg - maxreg, 0) (* Number of spilled registers. *) + fun isspilled (X.TEMP temp) = (((temptonum temp) > maxreg) handle Empty => false) (* Whether a register is spilled *) + | isspilled _ = false + fun stackpos (reg: int) = (reg - maxreg) * ~4 (* Stack position of some register number *) + + fun spill (X.TEMP temp, xreg: x86.reg) = (* Spill a register if need be. *) + if (isspilled (X.TEMP temp)) + then [X.MOVL (X.REL (X.RSP, stackpos (temptonum temp)), X.REG xreg)] + else nil + | spill _ = nil (* Nothing else can be spilled. *) + fun unspill (X.TEMP temp, xreg: x86.reg) = (* Unspill a register if need be. *) + if (isspilled (X.TEMP temp)) + then [X.MOVL (X.REG xreg, X.REL (X.RSP, stackpos (temptonum temp)))] + else nil + | unspill _ = nil + + fun realoper (X.TEMP temp) = X.REG (temptoreg temp) (* Makes a operand 'real'. *) + | realoper r = r + fun stackoper (X.TEMP temp) = + if not (isspilled (X.TEMP temp)) then raise ErrorMsg.InternalError "stackoper on unspilled temp?" + else X.REL (X.RSP, stackpos (temptonum temp)) + | stackoper _ = raise ErrorMsg.InternalError "stackoper on not temp?" + + fun transform (X.DIRECTIVE s) = [X.DIRECTIVE s] + | transform (X.COMMENT s) = [X.COMMENT s] + | transform (X.MOVL (dest, src)) = + if (isspilled dest) + then + unspill (src, spillreg1) @ + [ X.MOVL( + realoper dest handle Spilled => stackoper dest, + realoper src handle Spilled => X.REG spillreg1)] + else + [ X.MOVL( + realoper dest handle Spilled => raise ErrorMsg.InternalError "But we said that wasn't spilled?", + realoper src handle Spilled => stackoper src)] + | transform (X.SUBL (dest, src)) = + unspill (src, spillreg1) @ + unspill (dest, spillreg2) @ + [ X.SUBL( + realoper dest handle Spilled => X.REG spillreg2, + realoper src handle Spilled => X.REG spillreg1)] @ + spill (dest, spillreg2) + | transform (X.IMUL (dest, src)) = + unspill (dest, spillreg1) @ + [ X.IMUL( + realoper dest handle Spilled => X.REG spillreg1, + realoper src handle Spilled => stackoper src)] @ + spill (dest, spillreg1) + | transform (X.IMUL3 (dest, src, const)) = + [ X.IMUL3( + realoper dest handle Spilled => X.REG spillreg1, + realoper src handle Spilled => stackoper src, + const)] @ + spill (dest, spillreg1) + | transform (X.ADDL (dest, src)) = (* You can have either operand spilled, but not both. Pick one. *) + if (isspilled dest) + then + unspill (src, spillreg1) @ + [ X.ADDL( + realoper dest handle Spilled => stackoper dest, + realoper src handle Spilled => X.REG spillreg1)] + else + [ X.ADDL( + realoper dest handle Spilled => raise ErrorMsg.InternalError "But we said that wasn't spilled?", + realoper src handle Spilled => stackoper src)] + | transform (X.LEAL (dest, src1, src2)) = + unspill (src1, spillreg1) @ + unspill (src2, spillreg2) @ + [ X.LEAL( + realoper dest handle Spilled => X.REG spillreg1, + realoper src1 handle Spilled => X.REG spillreg1, + realoper src2 handle Spilled => X.REG spillreg2)] @ + spill (dest, spillreg1) + | transform (X.IDIVL (src)) = [ X.IDIVL(realoper src handle Spilled => stackoper src)] + | transform (X.NEG (src)) = [ X.NEG(realoper src handle Spilled => stackoper src)] + | transform (X.CLTD) = [ X.CLTD ] + | transform (X.RET) = epilogue @ [X.RET] +(* | transform _ = raise ErrorMsg.InternalError ("Unimplemented transform")*) + in + List.concat (prologue :: (map transform instrs)) + + end +end diff --git a/codegen/stringifier.sml b/codegen/stringifier.sml new file mode 100644 index 0000000..1f21bab --- /dev/null +++ b/codegen/stringifier.sml @@ -0,0 +1,39 @@ +(* stringifier + * Gathers tiberium, fires rockets + * turns a list of x86 insns into the assembly code to generate them + * Author: Chris Lu + *) + +signature STRINGIFY = +sig + type tiberium = x86.insn list + type rockets = string + val stringify : tiberium -> rockets +end + +structure Stringify :> STRINGIFY = +struct + type tiberium = x86.insn list + type rockets = string + structure X = x86 + + (* val stringify : tiberium -> rockets + * turns a x86 instruction list into a string of assembly code for these instructions *) + + fun stringify' (X.MOVL (r1, r2)) = "\tmovl " ^ X.prettyprint_oper r2 ^ ", " ^ X.prettyprint_oper r1 ^ "\n" + | stringify' (X.SUBL (r1, r2)) = "\tsubl " ^ X.prettyprint_oper r2 ^ ", " ^ X.prettyprint_oper r1 ^ "\n" + | stringify' (X.IMUL (r1, r2)) = "\timul " ^ X.prettyprint_oper r2 ^ ", " ^ X.prettyprint_oper r1 ^ "\n" + | stringify' (X.IMUL3 (r1, r2, k)) = "\timul " ^ X.prettyprint_oper (X.CONST k) ^ ", " ^ X.prettyprint_oper r2 ^ ", " ^ X.prettyprint_oper r1 ^ "\n" + | stringify' (X.ADDL (r1, r2)) = "\taddl " ^ X.prettyprint_oper r2 ^ ", " ^ X.prettyprint_oper r1 ^ "\n" + | stringify' (X.LEAL (r1, r2, r3)) = "\tleal (" ^ X.prettyprint_oper r2 ^ "," ^ X.prettyprint_oper r3 ^ "), " ^ X.prettyprint_oper r1 ^ "\n" + | stringify' (X.IDIVL (r1)) = "\tidivl " ^ X.prettyprint_oper r1 ^ "\n" + | stringify' (X.NEG (r1)) = "\tneg " ^ X.prettyprint_oper r1 ^ "\n" + | stringify' (X.RET) = "\tret\n" + | stringify' (X.CLTD) = "\tcltd\n" + | stringify' (X.DIRECTIVE(s)) = s ^ "\n" + | stringify' (X.COMMENT(s)) = "\t// " ^ s ^ "\n" + + (* val stringify : tiberium -> rockets *) + fun stringify l = foldr (fn (a,b) => (stringify' a) ^ b) ("") l + +end diff --git a/codegen/x86.sml b/codegen/x86.sml new file mode 100644 index 0000000..b7055f6 --- /dev/null +++ b/codegen/x86.sml @@ -0,0 +1,140 @@ +signature X86 = +sig + datatype reg = + EAX | EBX | ECX | EDX | ESI | EDI | EBP | RSP | R8D | R9D | R10D | R11D | R12D | R13D | R14D | R15D + datatype oper = REG of reg | TEMP of Temp.temp | CONST of Word32.word | REL of (reg * int) + datatype insn = + DIRECTIVE of string | + COMMENT of string | + MOVL of oper * oper | + SUBL of oper * oper | + IMUL of oper * oper | + IMUL3 of oper * oper * Word32.word | + ADDL of oper * oper | + LEAL of oper * oper * oper | + IDIVL of oper | + NEG of oper | + CLTD | + RET + + val cmpoper : oper * oper -> order + val opereq : oper * oper -> bool + val regname : reg -> string + val regtonum : reg -> int + val numtoreg : int -> reg + val prettyprint_oper : oper -> string + val prettyprint : insn -> string +end + +structure x86 :> X86 = +struct + datatype reg = + EAX | EBX | ECX | EDX | ESI | EDI | EBP | RSP | R8D | R9D | R10D | R11D | R12D | R13D | R14D | R15D + datatype oper = REG of reg | TEMP of Temp.temp | CONST of Word32.word | REL of (reg * int) + datatype insn = + DIRECTIVE of string | + COMMENT of string | + MOVL of oper * oper | + SUBL of oper * oper | + IMUL of oper * oper | + IMUL3 of oper * oper * Word32.word | + ADDL of oper * oper | + LEAL of oper * oper * oper | + IDIVL of oper | + NEG of oper | + CLTD | + RET + + fun regname EAX = "eax" + | regname EBX = "ebx" + | regname ECX = "ecx" + | regname EDX = "edx" + | regname ESI = "esi" + | regname EDI = "edi" + | regname EBP = "ebp" + | regname RSP = "rsp" + | regname R8D = "r8d" + | regname R9D = "r9d" + | regname R10D = "r10d" + | regname R11D = "r11d" + | regname R12D = "r12d" + | regname R13D = "r13d" + | regname R14D = "r14d" + | regname R15D = "r15d" + + fun regtonum EAX = 0 + | regtonum EBX = 1 + | regtonum ECX = 2 + | regtonum EDX = 3 + | regtonum ESI = 4 + | regtonum EDI = 5 + | regtonum R8D = 6 + | regtonum R9D = 7 + | regtonum R10D = 8 + | regtonum R11D = 9 + | regtonum R12D = 10 + | regtonum R13D = 11 + | regtonum R14D = 12 + | regtonum R15D = 13 + | regtonum EBP = 14 (* Dummy numbers -- not permitted for allocation, but there so that we can compare *) + | regtonum RSP = 15 + + fun numtoreg 0 = EAX + | numtoreg 1 = EBX + | numtoreg 2 = ECX + | numtoreg 3 = EDX + | numtoreg 4 = ESI + | numtoreg 5 = EDI + | numtoreg 6 = R8D + | numtoreg 7 = R9D + | numtoreg 8 = R10D + | numtoreg 9 = R11D + | numtoreg 10 = R12D + | numtoreg 11 = R13D + | numtoreg 12 = R14D + | numtoreg 13 = R15D + | numtoreg n = raise ErrorMsg.InternalError ("numtoreg: Unknown register "^(Int.toString n)) + + fun regcmp (r1, r2) = Int.compare (regtonum r1, regtonum r2) + + fun cmpoper (REG(reg1), REG(reg2)) = regcmp (reg1, reg2) + | cmpoper (TEMP(temp1), TEMP(temp2)) = Temp.compare (temp1,temp2) + | cmpoper (CONST(const1), CONST(const2)) = Word32.compare (const1, const2) + | cmpoper (REL (r1, i1), REL (r2, i2)) = + let + val regorder = regcmp (r1, r2) + val intorder = Int.compare (i1, i2) + in + if (regorder = EQUAL) then intorder + else regorder + end + | cmpoper (CONST _, _) = LESS + | cmpoper (REG _, _) = LESS + | cmpoper (REL _, _) = LESS + | cmpoper (_, _) = GREATER + + fun opereq (a, b) = cmpoper (a, b) = EQUAL + + fun moreDifferentToString (i) = + if (i >= 0) then Int.toString i + else "-" ^ (Int.toString (~i)) + + fun prettyprint_oper (REG r) = "%" ^ (regname r) + | prettyprint_oper (TEMP t) = Temp.name t + | prettyprint_oper (CONST c) = "$0x" ^ (Word32.toString c) + | prettyprint_oper (REL (r, i)) = (moreDifferentToString i) ^ "(%" ^ (regname r) ^ ")" + + fun prettyprint (DIRECTIVE(str)) = str ^ "\n" + | prettyprint (COMMENT(str)) = "// " ^ str ^ "\n" + | prettyprint (MOVL(src, dst)) = "\tMOVL\t" ^ (prettyprint_oper src) ^ ", " ^ (prettyprint_oper dst) ^ "\n" + | prettyprint (SUBL(src, dst)) = "\tSUBL\t" ^ (prettyprint_oper src) ^ ", " ^ (prettyprint_oper dst) ^ "\n" + | prettyprint (IMUL(src, dst)) = "\tIMUL\t" ^ (prettyprint_oper src) ^ ", " ^ (prettyprint_oper dst) ^ "\n" + | prettyprint (IMUL3(dst, tmp, const)) = "\tIMUL\t" ^ (prettyprint_oper (CONST const)) ^ ", " ^ (prettyprint_oper tmp) ^ ", " ^ (prettyprint_oper dst) ^ "\n" + | prettyprint (ADDL(src, dst)) = "\tADDL\t" ^ (prettyprint_oper src) ^ ", " ^ (prettyprint_oper dst) ^ "\n" + | prettyprint (LEAL(src1, src2, dst)) = "\tLEAL\t(" ^ (prettyprint_oper src1) ^ "," ^ (prettyprint_oper src2) ^ ")," ^ (prettyprint_oper dst) ^ "\n" + | prettyprint (IDIVL(src)) = "\tIDIVL\t" ^ (prettyprint_oper src) ^ "\n" + | prettyprint (NEG (src)) = "\tNEG\t" ^ (prettyprint_oper src) ^ "\n" + | prettyprint (CLTD) = "\tCLTD\n" + | prettyprint (RET) = "\tRET\n" +(* | prettyprint _ = raise ErrorMsg.InternalError ("prettyprint: unknown instruction")*) +end diff --git a/compile-l1c.sml b/compile-l1c.sml new file mode 100644 index 0000000..35284db --- /dev/null +++ b/compile-l1c.sml @@ -0,0 +1,7 @@ +(* L1 Compiler + * Helper for compilation + * Author: Kaustuv Chaudhuri + *) + +CM.make "sources.cm"; +SMLofNJ.exportFn ("bin/l1c.heap", Top.main); diff --git a/parse/ast.sml b/parse/ast.sml new file mode 100644 index 0000000..bd121e1 --- /dev/null +++ b/parse/ast.sml @@ -0,0 +1,100 @@ +(* L1 Compiler + * Abstract Syntax Trees + * Author: Alex Vaynberg + * Modified: Frank Pfenning + * + * Uses pretty printing library + * structure PP -- see util/pp.sml + *) + +signature AST = +sig + type ident = Symbol.symbol + + datatype oper = + PLUS + | MINUS + | TIMES + | DIVIDEDBY + | MODULO + | NEGATIVE (* unary minus *) + + datatype exp = + Var of ident + | ConstExp of Word32.word + | OpExp of oper * exp list + | Marked of exp Mark.marked + and stm = + Assign of ident * exp + | Return of exp + + type program = stm list + + (* print as source, with redundant parentheses *) + structure Print : + sig + val pp_exp : exp -> string + val pp_stm : stm -> string + val pp_program : program -> string + end + +end + +structure Ast :> AST = +struct + type ident = Symbol.symbol + + datatype oper = + PLUS + | MINUS + | TIMES + | DIVIDEDBY + | MODULO + | NEGATIVE (* unary minus *) + + datatype exp = + Var of ident + | ConstExp of Word32.word + | OpExp of oper * exp list + | Marked of exp Mark.marked + and stm = + Assign of ident * exp + | Return of exp + + type program = stm list + + (* print programs and expressions in source form + * using redundant parentheses to clarify precedence + *) + structure Print = + struct + fun pp_ident id = Symbol.name id + + fun pp_oper PLUS = "+" + | pp_oper MINUS = "-" + | pp_oper TIMES = "*" + | pp_oper DIVIDEDBY = "/" + | pp_oper MODULO = "%" + | pp_oper NEGATIVE = "-" + + fun pp_exp (Var(id)) = pp_ident id + | pp_exp (ConstExp(c)) = Word32Signed.toString c + | pp_exp (OpExp(oper, [e])) = + pp_oper oper ^ "(" ^ pp_exp e ^ ")" + | pp_exp (OpExp(oper, [e1,e2])) = + "(" ^ pp_exp e1 ^ " " ^ pp_oper oper + ^ " " ^ pp_exp e2 ^ ")" + | pp_exp (Marked(marked_exp)) = + pp_exp (Mark.data marked_exp) + + fun pp_stm (Assign (id,e)) = + pp_ident id ^ " = " ^ pp_exp e ^ ";" + | pp_stm (Return e) = + "return " ^ pp_exp e ^ ";" + + fun pp_stms nil = "" + | pp_stms (s::ss) = pp_stm s ^ "\n" ^ pp_stms ss + + fun pp_program ss = "{\n" ^ pp_stms ss ^ "}" + end +end diff --git a/parse/l1.grm b/parse/l1.grm new file mode 100644 index 0000000..41af15b --- /dev/null +++ b/parse/l1.grm @@ -0,0 +1,99 @@ +(* L1 Compiler + * L1 grammar + * Author: Kaustuv Chaudhuri + * Modified: Frank Pfenning + *) + +structure A = Ast + +(* for simplicity, we only mark expressions, not statements *) + +(* mark e with region (left, right) in source file *) +fun mark (e, (left, right)) = A.Marked (Mark.mark' (e, ParseState.ext (left, right))) + +(* create lval from expression; here just an id *) +(* generates error if not an identifier *) +fun make_lval (A.Var(id)) ext = id + | make_lval (A.Marked(marked_exp)) ext = + make_lval (Mark.data marked_exp) (Mark.ext marked_exp) + | make_lval _ ext = ( ErrorMsg.error ext "not a variable" ; + Symbol.bogus ) + +(* expand_asnop (exp1, "op=", exp2) region = "exp1 = exp1 op exps" + * or = "exp1 = exp2" if asnop is "=" + * generates error if exp1 is an lval (identifier) + * syntactically expands a compound assignment operator + *) +fun expand_asnop (exp1, NONE, exp2) (left, right) = + A.Assign(make_lval exp1 NONE, exp2) + | expand_asnop (exp1, SOME(oper), exp2) (left, right) = + A.Assign(make_lval exp1 NONE, + mark(A.OpExp(oper, [exp1, exp2]), (left, right))) + +%% +%header (functor L1LrValsFn (structure Token : TOKEN)) + +%term + EOF + | SEMI + | INTNUM of Word32.word + | IDENT of Symbol.symbol + | RETURN + | PLUS | MINUS | STAR | SLASH | PERCENT + | ASSIGN | PLUSEQ | MINUSEQ | STAREQ | SLASHEQ | PERCENTEQ + | LBRACE | RBRACE + | LPAREN | RPAREN + | UNARY | ASNOP (* dummy *) + +%nonterm + program of A.program + | stms of A.stm list + | stm of A.stm + | simp of A.stm + | return of A.stm + | exp of A.exp + | asnop of A.oper option + +%verbose (* print summary of errors *) +%pos int (* positions *) +%start program +%eop EOF +%noshift EOF + +%name L1 + +%left PLUS MINUS +%left STAR SLASH PERCENT +%right UNARY +%left LPAREN + +%% + +program : LBRACE stms return RBRACE + (stms@[return]) + +return : RETURN exp SEMI (A.Return exp) + +stms : ([]) + | stm stms (stm :: stms) + +stm : simp SEMI (simp) +simp : exp asnop exp %prec ASNOP + (expand_asnop (exp1, asnop, exp2) (exp1left, exp2right)) + +exp : LPAREN exp RPAREN (exp) + | INTNUM (mark (A.ConstExp(INTNUM),(INTNUMleft,INTNUMright))) + | IDENT (mark (A.Var(IDENT), (IDENTleft,IDENTright))) + | exp PLUS exp (mark (A.OpExp (A.PLUS, [exp1,exp2]), (exp1left,exp2right))) + | exp MINUS exp (mark (A.OpExp (A.MINUS, [exp1,exp2]), (exp1left,exp2right))) + | exp STAR exp (mark (A.OpExp (A.TIMES, [exp1,exp2]), (exp1left,exp2right))) + | exp SLASH exp (mark (A.OpExp (A.DIVIDEDBY, [exp1,exp2]), (exp1left,exp2right))) + | exp PERCENT exp (mark (A.OpExp (A.MODULO, [exp1,exp2]), (exp1left,exp2right))) + | MINUS exp %prec UNARY (mark (A.OpExp (A.NEGATIVE, [exp]), (MINUSleft,expright))) + +asnop : ASSIGN (NONE) + | PLUSEQ (SOME(A.PLUS)) + | MINUSEQ (SOME(A.MINUS)) + | STAREQ (SOME(A.TIMES)) + | SLASHEQ (SOME(A.DIVIDEDBY)) + | PERCENTEQ (SOME(A.MODULO)) diff --git a/parse/l1.lex b/parse/l1.lex new file mode 100644 index 0000000..a80f936 --- /dev/null +++ b/parse/l1.lex @@ -0,0 +1,112 @@ +(* L1 Compiler + * Lexer + * Author: Kaustuv Chaudhuri + * Modified: Frank Pfenning + *) + +structure A = Ast +structure S = Symbol + +type pos = int +type svalue = Tokens.svalue +type ('a,'b) token = ('a,'b) Tokens.token +type lexresult = (svalue,pos) Tokens.token + +local + val commentLevel = ref 0 + val commentPos = ref 0 +in + fun enterComment yypos = + ( commentLevel := !commentLevel + 1 ; + commentPos := yypos ) + + fun exitComment () = + ( commentLevel := !commentLevel - 1 ; + !commentLevel = 0 ) + + fun number (yyt, yyp) = + let + val ext = ParseState.ext (yyp, yyp + size yyt) + val numOpt = Word32Signed.fromString yyt + handle Overflow => + ( ErrorMsg.error ext + ("integral constant `" ^ yyt ^ "' too large") ; + NONE ) + in + case numOpt + of NONE => ( ErrorMsg.error ext + ("cannot parse integral constant `" ^ yyt ^ "'"); + Tokens.INTNUM (Word32Signed.ZERO, yyp, yyp + size yyt) ) + | SOME n => Tokens.INTNUM (n,yyp,yyp + size yyt) + end + + fun eof () = + ( if (!commentLevel > 0) + then (ErrorMsg.error (ParseState.ext (!commentPos,!commentPos)) "unterminated comment") + else (); + Tokens.EOF (0,0) ) (* bogus position information; unused *) + +end + +%% +%header (functor L1LexFn(structure Tokens : L1_TOKENS)); +%full +%s COMMENT COMMENT_LINE; + +id = [A-Za-z_][A-Za-z0-9_]*; +decnum = [0-9][0-9]*; + +ws = [\ \t\012]; + +%% + + {ws}+ => (lex ()); + \n => (ParseState.newline(yypos); lex()); + + "{" => (Tokens.LBRACE (yypos, yypos + size yytext)); + "}" => (Tokens.RBRACE (yypos, yypos + size yytext)); + "(" => (Tokens.LPAREN (yypos, yypos + size yytext)); + ")" => (Tokens.RPAREN (yypos, yypos + size yytext)); + + ";" => (Tokens.SEMI (yypos, yypos + size yytext)); + + "=" => (Tokens.ASSIGN (yypos, yypos + size yytext)); + "+=" => (Tokens.PLUSEQ (yypos, yypos + size yytext)); + "-=" => (Tokens.MINUSEQ (yypos, yypos + size yytext)); + "*=" => (Tokens.STAREQ (yypos, yypos + size yytext)); + "/=" => (Tokens.SLASHEQ (yypos, yypos + size yytext)); + "%=" => (Tokens.PERCENTEQ (yypos, yypos + size yytext)); + + "+" => (Tokens.PLUS (yypos, yypos + size yytext)); + "-" => (Tokens.MINUS (yypos, yypos + size yytext)); + "*" => (Tokens.STAR (yypos, yypos + size yytext)); + "/" => (Tokens.SLASH (yypos, yypos + size yytext)); + "%" => (Tokens.PERCENT (yypos, yypos + size yytext)); + + "return" => (Tokens.RETURN (yypos, yypos + size yytext)); + + {decnum} => (number (yytext, yypos)); + + {id} => (let + val id = Symbol.symbol yytext + in + Tokens.IDENT (id, yypos, yypos + size yytext) + end); + + "/*" => (YYBEGIN COMMENT; enterComment yypos; lex()); + "*/" => (ErrorMsg.error (ParseState.ext (yypos, yypos)) "unbalanced comments"; + lex()); + + "//" => (YYBEGIN COMMENT_LINE; lex()); + "#" => (YYBEGIN COMMENT_LINE; lex()); + . => (ErrorMsg.error (ParseState.ext (yypos,yypos)) + ("illegal character: \"" ^ yytext ^ "\""); + lex ()); + + "/*" => (enterComment yypos; lex()); + "*/" => (if exitComment () then YYBEGIN INITIAL else (); lex()); + \n => (ParseState.newline yypos; lex ()); + . => (lex()); + + \n => (ParseState.newline yypos; YYBEGIN INITIAL; lex()); + . => (lex()); diff --git a/parse/l1.lex.sml b/parse/l1.lex.sml new file mode 100644 index 0000000..70eb999 --- /dev/null +++ b/parse/l1.lex.sml @@ -0,0 +1,581 @@ +functor L1LexFn(structure Tokens : L1_TOKENS)= + struct + structure UserDeclarations = + struct +(* L1 Compiler + * Lexer + * Author: Kaustuv Chaudhuri + * Modified: Frank Pfenning + *) + +structure A = Ast +structure S = Symbol + +type pos = int +type svalue = Tokens.svalue +type ('a,'b) token = ('a,'b) Tokens.token +type lexresult = (svalue,pos) Tokens.token + +local + val commentLevel = ref 0 + val commentPos = ref 0 +in + fun enterComment yypos = + ( commentLevel := !commentLevel + 1 ; + commentPos := yypos ) + + fun exitComment () = + ( commentLevel := !commentLevel - 1 ; + !commentLevel = 0 ) + + fun number (yyt, yyp) = + let + val ext = ParseState.ext (yyp, yyp + size yyt) + val numOpt = Word32Signed.fromString yyt + handle Overflow => + ( ErrorMsg.error ext + ("integral constant `" ^ yyt ^ "' too large") ; + NONE ) + in + case numOpt + of NONE => ( ErrorMsg.error ext + ("cannot parse integral constant `" ^ yyt ^ "'"); + Tokens.INTNUM (Word32Signed.ZERO, yyp, yyp + size yyt) ) + | SOME n => Tokens.INTNUM (n,yyp,yyp + size yyt) + end + + fun eof () = + ( if (!commentLevel > 0) + then (ErrorMsg.error (ParseState.ext (!commentPos,!commentPos)) "unterminated comment") + else (); + Tokens.EOF (0,0) ) (* bogus position information; unused *) + +end + +end (* end of user routines *) +exception LexError (* raised if illegal leaf action tried *) +structure Internal = + struct + +datatype yyfinstate = N of int +type statedata = {fin : yyfinstate list, trans: string} +(* transition & final state table *) +val tab = let +val s = [ + (0, +"\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" +), + (1, +"\007\007\007\007\007\007\007\007\007\038\040\007\038\007\007\007\ +\\007\007\007\007\007\007\007\007\007\007\007\007\007\007\007\007\ +\\038\007\007\037\007\035\007\007\034\033\030\028\007\026\007\022\ +\\020\020\020\020\020\020\020\020\020\020\007\019\007\018\007\007\ +\\007\010\010\010\010\010\010\010\010\010\010\010\010\010\010\010\ +\\010\010\010\010\010\010\010\010\010\010\010\007\007\007\007\010\ +\\007\010\010\010\010\010\010\010\010\010\010\010\010\010\010\010\ +\\010\010\012\010\010\010\010\010\010\010\010\009\007\008\007\007\ +\\007\007\007\007\007\007\007\007\007\007\007\007\007\007\007\007\ +\\007\007\007\007\007\007\007\007\007\007\007\007\007\007\007\007\ +\\007\007\007\007\007\007\007\007\007\007\007\007\007\007\007\007\ +\\007\007\007\007\007\007\007\007\007\007\007\007\007\007\007\007\ +\\007\007\007\007\007\007\007\007\007\007\007\007\007\007\007\007\ +\\007\007\007\007\007\007\007\007\007\007\007\007\007\007\007\007\ +\\007\007\007\007\007\007\007\007\007\007\007\007\007\007\007\007\ +\\007\007\007\007\007\007\007\007\007\007\007\007\007\007\007\007" +), + (3, +"\041\041\041\041\041\041\041\041\041\041\046\041\041\041\041\041\ +\\041\041\041\041\041\041\041\041\041\041\041\041\041\041\041\041\ +\\041\041\041\041\041\041\041\041\041\041\044\041\041\041\041\042\ +\\041\041\041\041\041\041\041\041\041\041\041\041\041\041\041\041\ +\\041\041\041\041\041\041\041\041\041\041\041\041\041\041\041\041\ +\\041\041\041\041\041\041\041\041\041\041\041\041\041\041\041\041\ +\\041\041\041\041\041\041\041\041\041\041\041\041\041\041\041\041\ +\\041\041\041\041\041\041\041\041\041\041\041\041\041\041\041\041\ +\\041\041\041\041\041\041\041\041\041\041\041\041\041\041\041\041\ +\\041\041\041\041\041\041\041\041\041\041\041\041\041\041\041\041\ +\\041\041\041\041\041\041\041\041\041\041\041\041\041\041\041\041\ +\\041\041\041\041\041\041\041\041\041\041\041\041\041\041\041\041\ +\\041\041\041\041\041\041\041\041\041\041\041\041\041\041\041\041\ +\\041\041\041\041\041\041\041\041\041\041\041\041\041\041\041\041\ +\\041\041\041\041\041\041\041\041\041\041\041\041\041\041\041\041\ +\\041\041\041\041\041\041\041\041\041\041\041\041\041\041\041\041" +), + (5, +"\047\047\047\047\047\047\047\047\047\047\048\047\047\047\047\047\ +\\047\047\047\047\047\047\047\047\047\047\047\047\047\047\047\047\ +\\047\047\047\047\047\047\047\047\047\047\047\047\047\047\047\047\ +\\047\047\047\047\047\047\047\047\047\047\047\047\047\047\047\047\ +\\047\047\047\047\047\047\047\047\047\047\047\047\047\047\047\047\ +\\047\047\047\047\047\047\047\047\047\047\047\047\047\047\047\047\ +\\047\047\047\047\047\047\047\047\047\047\047\047\047\047\047\047\ +\\047\047\047\047\047\047\047\047\047\047\047\047\047\047\047\047\ +\\047\047\047\047\047\047\047\047\047\047\047\047\047\047\047\047\ +\\047\047\047\047\047\047\047\047\047\047\047\047\047\047\047\047\ +\\047\047\047\047\047\047\047\047\047\047\047\047\047\047\047\047\ +\\047\047\047\047\047\047\047\047\047\047\047\047\047\047\047\047\ +\\047\047\047\047\047\047\047\047\047\047\047\047\047\047\047\047\ +\\047\047\047\047\047\047\047\047\047\047\047\047\047\047\047\047\ +\\047\047\047\047\047\047\047\047\047\047\047\047\047\047\047\047\ +\\047\047\047\047\047\047\047\047\047\047\047\047\047\047\047\047" +), + (10, +"\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\011\011\011\011\011\011\011\011\011\011\000\000\000\000\000\000\ +\\000\011\011\011\011\011\011\011\011\011\011\011\011\011\011\011\ +\\011\011\011\011\011\011\011\011\011\011\011\000\000\000\000\011\ +\\000\011\011\011\011\011\011\011\011\011\011\011\011\011\011\011\ +\\011\011\011\011\011\011\011\011\011\011\011\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" +), + (12, +"\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\011\011\011\011\011\011\011\011\011\011\000\000\000\000\000\000\ +\\000\011\011\011\011\011\011\011\011\011\011\011\011\011\011\011\ +\\011\011\011\011\011\011\011\011\011\011\011\000\000\000\000\011\ +\\000\011\011\011\011\013\011\011\011\011\011\011\011\011\011\011\ +\\011\011\011\011\011\011\011\011\011\011\011\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" +), + (13, +"\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\011\011\011\011\011\011\011\011\011\011\000\000\000\000\000\000\ +\\000\011\011\011\011\011\011\011\011\011\011\011\011\011\011\011\ +\\011\011\011\011\011\011\011\011\011\011\011\000\000\000\000\011\ +\\000\011\011\011\011\011\011\011\011\011\011\011\011\011\011\011\ +\\011\011\011\011\014\011\011\011\011\011\011\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" +), + (14, +"\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\011\011\011\011\011\011\011\011\011\011\000\000\000\000\000\000\ +\\000\011\011\011\011\011\011\011\011\011\011\011\011\011\011\011\ +\\011\011\011\011\011\011\011\011\011\011\011\000\000\000\000\011\ +\\000\011\011\011\011\011\011\011\011\011\011\011\011\011\011\011\ +\\011\011\011\011\011\015\011\011\011\011\011\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" +), + (15, +"\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\011\011\011\011\011\011\011\011\011\011\000\000\000\000\000\000\ +\\000\011\011\011\011\011\011\011\011\011\011\011\011\011\011\011\ +\\011\011\011\011\011\011\011\011\011\011\011\000\000\000\000\011\ +\\000\011\011\011\011\011\011\011\011\011\011\011\011\011\011\011\ +\\011\011\016\011\011\011\011\011\011\011\011\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" +), + (16, +"\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\011\011\011\011\011\011\011\011\011\011\000\000\000\000\000\000\ +\\000\011\011\011\011\011\011\011\011\011\011\011\011\011\011\011\ +\\011\011\011\011\011\011\011\011\011\011\011\000\000\000\000\011\ +\\000\011\011\011\011\011\011\011\011\011\011\011\011\011\017\011\ +\\011\011\011\011\011\011\011\011\011\011\011\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" +), + (20, +"\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\021\021\021\021\021\021\021\021\021\021\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" +), + (22, +"\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\025\000\000\000\000\024\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\023\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" +), + (26, +"\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\027\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" +), + (28, +"\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\029\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" +), + (30, +"\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\032\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\031\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" +), + (35, +"\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\036\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" +), + (38, +"\000\000\000\000\000\000\000\000\000\039\000\000\039\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\039\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" +), + (42, +"\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\043\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" +), + (44, +"\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\045\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\ +\\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" +), +(0, "")] +fun f x = x +val s = map f (rev (tl (rev s))) +exception LexHackingError +fun look ((j,x)::r, i: int) = if i = j then x else look(r, i) + | look ([], i) = raise LexHackingError +fun g {fin=x, trans=i} = {fin=x, trans=look(s,i)} +in Vector.fromList(map g +[{fin = [], trans = 0}, +{fin = [], trans = 1}, +{fin = [], trans = 1}, +{fin = [], trans = 3}, +{fin = [], trans = 3}, +{fin = [], trans = 5}, +{fin = [], trans = 5}, +{fin = [(N 67)], trans = 0}, +{fin = [(N 8),(N 67)], trans = 0}, +{fin = [(N 6),(N 67)], trans = 0}, +{fin = [(N 54),(N 67)], trans = 10}, +{fin = [(N 54)], trans = 10}, +{fin = [(N 54),(N 67)], trans = 12}, +{fin = [(N 54)], trans = 13}, +{fin = [(N 54)], trans = 14}, +{fin = [(N 54)], trans = 15}, +{fin = [(N 54)], trans = 16}, +{fin = [(N 48),(N 54)], trans = 10}, +{fin = [(N 16),(N 67)], trans = 0}, +{fin = [(N 14),(N 67)], trans = 0}, +{fin = [(N 51),(N 67)], trans = 20}, +{fin = [(N 51)], trans = 20}, +{fin = [(N 39),(N 67)], trans = 22}, +{fin = [(N 28)], trans = 0}, +{fin = [(N 63)], trans = 0}, +{fin = [(N 57)], trans = 0}, +{fin = [(N 35),(N 67)], trans = 26}, +{fin = [(N 22)], trans = 0}, +{fin = [(N 33),(N 67)], trans = 28}, +{fin = [(N 19)], trans = 0}, +{fin = [(N 37),(N 67)], trans = 30}, +{fin = [(N 25)], trans = 0}, +{fin = [(N 60)], trans = 0}, +{fin = [(N 12),(N 67)], trans = 0}, +{fin = [(N 10),(N 67)], trans = 0}, +{fin = [(N 41),(N 67)], trans = 35}, +{fin = [(N 31)], trans = 0}, +{fin = [(N 65),(N 67)], trans = 0}, +{fin = [(N 2),(N 67)], trans = 38}, +{fin = [(N 2)], trans = 38}, +{fin = [(N 4)], trans = 0}, +{fin = [(N 77)], trans = 0}, +{fin = [(N 77)], trans = 42}, +{fin = [(N 70)], trans = 0}, +{fin = [(N 77)], trans = 44}, +{fin = [(N 73)], trans = 0}, +{fin = [(N 75)], trans = 0}, +{fin = [(N 81)], trans = 0}, +{fin = [(N 79)], trans = 0}]) +end +structure StartStates = + struct + datatype yystartstate = STARTSTATE of int + +(* start state definitions *) + +val COMMENT = STARTSTATE 3; +val COMMENT_LINE = STARTSTATE 5; +val INITIAL = STARTSTATE 1; + +end +type result = UserDeclarations.lexresult + exception LexerError (* raised if illegal leaf action tried *) +end + +fun makeLexer yyinput = +let val yygone0=1 + val yyb = ref "\n" (* buffer *) + val yybl = ref 1 (*buffer length *) + val yybufpos = ref 1 (* location of next character to use *) + val yygone = ref yygone0 (* position in file of beginning of buffer *) + val yydone = ref false (* eof found yet? *) + val yybegin = ref 1 (*Current 'start state' for lexer *) + + val YYBEGIN = fn (Internal.StartStates.STARTSTATE x) => + yybegin := x + +fun lex () : Internal.result = +let fun continue() = lex() in + let fun scan (s,AcceptingLeaves : Internal.yyfinstate list list,l,i0) = + let fun action (i,nil) = raise LexError + | action (i,nil::l) = action (i-1,l) + | action (i,(node::acts)::l) = + case node of + Internal.N yyk => + (let fun yymktext() = substring(!yyb,i0,i-i0) + val yypos = i0+ !yygone + open UserDeclarations Internal.StartStates + in (yybufpos := i; case yyk of + + (* Application actions *) + + 10 => let val yytext=yymktext() in Tokens.LPAREN (yypos, yypos + size yytext) end +| 12 => let val yytext=yymktext() in Tokens.RPAREN (yypos, yypos + size yytext) end +| 14 => let val yytext=yymktext() in Tokens.SEMI (yypos, yypos + size yytext) end +| 16 => let val yytext=yymktext() in Tokens.ASSIGN (yypos, yypos + size yytext) end +| 19 => let val yytext=yymktext() in Tokens.PLUSEQ (yypos, yypos + size yytext) end +| 2 => (lex ()) +| 22 => let val yytext=yymktext() in Tokens.MINUSEQ (yypos, yypos + size yytext) end +| 25 => let val yytext=yymktext() in Tokens.STAREQ (yypos, yypos + size yytext) end +| 28 => let val yytext=yymktext() in Tokens.SLASHEQ (yypos, yypos + size yytext) end +| 31 => let val yytext=yymktext() in Tokens.PERCENTEQ (yypos, yypos + size yytext) end +| 33 => let val yytext=yymktext() in Tokens.PLUS (yypos, yypos + size yytext) end +| 35 => let val yytext=yymktext() in Tokens.MINUS (yypos, yypos + size yytext) end +| 37 => let val yytext=yymktext() in Tokens.STAR (yypos, yypos + size yytext) end +| 39 => let val yytext=yymktext() in Tokens.SLASH (yypos, yypos + size yytext) end +| 4 => (ParseState.newline(yypos); lex()) +| 41 => let val yytext=yymktext() in Tokens.PERCENT (yypos, yypos + size yytext) end +| 48 => let val yytext=yymktext() in Tokens.RETURN (yypos, yypos + size yytext) end +| 51 => let val yytext=yymktext() in number (yytext, yypos) end +| 54 => let val yytext=yymktext() in let + val id = Symbol.symbol yytext + in + Tokens.IDENT (id, yypos, yypos + size yytext) + end end +| 57 => (YYBEGIN COMMENT; enterComment yypos; lex()) +| 6 => let val yytext=yymktext() in Tokens.LBRACE (yypos, yypos + size yytext) end +| 60 => (ErrorMsg.error (ParseState.ext (yypos, yypos)) "unbalanced comments"; + lex()) +| 63 => (YYBEGIN COMMENT_LINE; lex()) +| 65 => (YYBEGIN COMMENT_LINE; lex()) +| 67 => let val yytext=yymktext() in ErrorMsg.error (ParseState.ext (yypos,yypos)) + ("illegal character: \"" ^ yytext ^ "\""); + lex () end +| 70 => (enterComment yypos; lex()) +| 73 => (if exitComment () then YYBEGIN INITIAL else (); lex()) +| 75 => (ParseState.newline yypos; lex ()) +| 77 => (lex()) +| 79 => (ParseState.newline yypos; YYBEGIN INITIAL; lex()) +| 8 => let val yytext=yymktext() in Tokens.RBRACE (yypos, yypos + size yytext) end +| 81 => (lex()) +| _ => raise Internal.LexerError + + ) end ) + + val {fin,trans} = Unsafe.Vector.sub(Internal.tab, s) + val NewAcceptingLeaves = fin::AcceptingLeaves + in if l = !yybl then + if trans = #trans(Vector.sub(Internal.tab,0)) + then action(l,NewAcceptingLeaves +) else let val newchars= if !yydone then "" else yyinput 1024 + in if (size newchars)=0 + then (yydone := true; + if (l=i0) then UserDeclarations.eof () + else action(l,NewAcceptingLeaves)) + else (if i0=l then yyb := newchars + else yyb := substring(!yyb,i0,l-i0)^newchars; + yygone := !yygone+i0; + yybl := size (!yyb); + scan (s,AcceptingLeaves,l-i0,0)) + end + else let val NewChar = Char.ord(Unsafe.CharVector.sub(!yyb,l)) + val NewState = Char.ord(Unsafe.CharVector.sub(trans,NewChar)) + in if NewState=0 then action(l,NewAcceptingLeaves) + else scan(NewState,NewAcceptingLeaves,l+1,i0) + end + end +(* + val start= if substring(!yyb,!yybufpos-1,1)="\n" +then !yybegin+1 else !yybegin +*) + in scan(!yybegin (* start *),nil,!yybufpos,!yybufpos) + end +end + in lex + end +end diff --git a/parse/parse.sml b/parse/parse.sml new file mode 100644 index 0000000..5981be1 --- /dev/null +++ b/parse/parse.sml @@ -0,0 +1,49 @@ +(* L1 Compiler + * Parsing + * Author: Kaustuv Chaudhuri + * Modified: Frank Pfenning + * + * Glueing together the pieces produced by ML-Lex and ML-Yacc + *) + +signature PARSE = +sig + (* parse filename = ast + * will raise ErrorMsg.Error in case of lexing or parsing error + *) + val parse : string -> Ast.program +end + +structure Parse :> PARSE = +struct + + structure L1LrVals = L1LrValsFn (structure Token = LrParser.Token) + structure L1Lex = L1LexFn (structure Tokens = L1LrVals.Tokens) + structure L1Parse = Join (structure ParserData = L1LrVals.ParserData + structure Lex = L1Lex + structure LrParser = LrParser) + + (* Main parsing function *) + fun parse filename = + SafeIO.withOpenIn filename (fn instream => + let + val _ = ErrorMsg.reset() (* no errors, no messages so far *) + val _ = ParseState.setfile filename (* start at position 0 in filename *) + fun parseerror (s, p1, p2) = ErrorMsg.error (ParseState.ext (p1,p2)) s + val lexer = LrParser.Stream.streamify + (L1Lex.makeLexer (fn _ => TextIO.input instream)) + (* 0 = no error correction, 15 = reasonable lookahead for correction *) + val (absyn, _) = L1Parse.parse(0, lexer, parseerror, ()) + val _ = if !ErrorMsg.anyErrors + then raise ErrorMsg.Error + else () + in + absyn + end) + handle L1Lex.LexError => ( ErrorMsg.error NONE "lexer error" ; + raise ErrorMsg.Error ) + | LrParser.ParseError => raise ErrorMsg.Error (* always preceded by msg *) + | e as IO.Io _ => ( ErrorMsg.error NONE (exnMessage e); + raise ErrorMsg.Error ) + +end diff --git a/parse/parsestate.sml b/parse/parsestate.sml new file mode 100644 index 0000000..5e27137 --- /dev/null +++ b/parse/parsestate.sml @@ -0,0 +1,59 @@ +(* L1 Compiler + * Parse State System + * Author: Kaustuv Chaudhuri + * Annotations: Alex Vaynberg + * Modified: Frank Pfenning + * + * This tracks filename and newline characters + * so character positions in lexer tokens + * can be converted to line.column format for error messages + *) + +signature PARSE_STATE = + sig + (* setfile(filename) sets current filename and resets newline positions *) + val setfile : string -> unit + + (* newline(pos) adds pos to current newline positions *) + val newline : int -> unit + + (* returns the current position information based on two integer offsets *) + val ext : int * int -> Mark.ext option + end + +structure ParseState :> PARSE_STATE = +struct + + val currFilename = ref ""; + val currLines = ref (nil : int list); + + fun setfile (filename) = + (currFilename := filename; + currLines := nil) + + fun newline pos = + (currLines := pos :: !currLines) + + (* look (pos, newline_positions, line_number) = (line, col) + * pos is buffer position + * newline_positions is (reverse) list of newline positions in file + * line_number is lenght of newline_positions + *) + fun look (pos, a :: rest, n) = + (* a is end of line n *) + if a < pos then (n+1, pos-a) + else look (pos, rest, n-1) + | look (pos, nil, n) = + (* first line pos is off by 1 *) + (1, pos-1) + + (* ext (leftpos, rightpos) = SOME((leftline, leftcol), (rightline, rightcol), filename) + * return NONE for invalid position (0,0) + *) + fun ext (0, 0) = NONE + | ext (left, right) = + SOME (look (left, !currLines, List.length (!currLines)), + look (right, !currLines, List.length (!currLines)), + !currFilename) + +end diff --git a/sources.cm b/sources.cm new file mode 100644 index 0000000..5ad3d61 --- /dev/null +++ b/sources.cm @@ -0,0 +1,35 @@ +Group is + $/basis.cm + $/smlnj-lib.cm + $/ml-yacc-lib.cm + + util/mark.sml + util/flag.sml + util/symbol.sml + util/errormsg.sml + util/safe-io.sml + util/word32.sml + + parse/ast.sml + parse/parsestate.sml + parse/l1.lex + parse/l1.grm + parse/parse.sml + + type/typechecker.sml + + trans/temp.sml + trans/tree.sml + trans/trans.sml + + codegen/x86.sml + codegen/liveness.sml + codegen/igraph.sml + codegen/colororder.sml + codegen/solidify.sml + codegen/coloring.sml + codegen/stringifier.sml + codegen/peephole.sml + codegen/codegen.sml + + top/top.sml diff --git a/top/top.sml b/top/top.sml new file mode 100644 index 0000000..cb409bf --- /dev/null +++ b/top/top.sml @@ -0,0 +1,147 @@ +(* L1 Compiler + * Top Level Environment + * Author: Kaustuv Chaudhuri + * Modified: Alex Vaynberg + * Modified: Frank Pfenning + *) + +signature TOP = +sig + (* main function for standalone executable + * use with SMLofNJ.exportFn("heapfilename", Top.main) + *) + val main : string * string list -> OS.Process.status + + (* test "arguments"; is the same as executing a saved + * heap with arguments on the command line + *) + val test : string -> OS.Process.status +end + +structure Top :> TOP = +struct + structure G = GetOpt (* from $/smlnj-lib/Util/getopt-sig.sml *) + + fun say s = TextIO.output (TextIO.stdErr, s ^ "\n") + + fun newline () = TextIO.output (TextIO.stdErr, "\n") + + exception EXIT + + (* see flag explanations below *) + val flag_verbose = Flag.flag "verbose" + val flag_ast = Flag.flag "ast" + val flag_ir = Flag.flag "ir" + val flag_assem = Flag.flag "assem" + + fun reset_flags () = + List.app Flag.unset [flag_verbose, flag_ast, + flag_ir, flag_assem]; + + val options = [{short = "v", long=["verbose"], + desc=G.NoArg (fn () => Flag.set flag_verbose), + help="verbose messages"}, + {short = "", long=["dump-ast"], + desc=G.NoArg (fn () => Flag.set flag_ast), + help="pretty print the AST"}, + {short = "", long=["dump-ir"], + desc=G.NoArg (fn () => Flag.set flag_ir), + help="pretty print the IR"}, + {short = "", long=["dump-assem"], + desc=G.NoArg (fn () => Flag.set flag_assem), + help="pretty print the assembly before register allocaction"} + ] + + + fun stem s = + let + val (prefix, suffix) = + Substring.splitr (fn c => c <> #".") (Substring.full s) + in + if Substring.isEmpty prefix (* no "." in string s *) + then s (* return whole string *) + else Substring.string (Substring.trimr 1 prefix) + end + + fun main (name, args) = + let + val header = "Usage: compile [OPTION...] SOURCEFILE\nwhere OPTION is" + val usageinfo = G.usageInfo {header = header, options = options} + fun errfn msg = (say (msg ^ "\n" ^ usageinfo) ; raise EXIT) + + val _ = Temp.reset (); (* reset temp variable counter *) + val _ = reset_flags (); (* return all flags to default value *) + + val _ = if List.length args = 0 then + (say usageinfo; raise EXIT) + else () + + val (opts, files) = + G.getOpt {argOrder = G.Permute, + options = options, + errFn = errfn} + args + + val source = + case files + of [] => errfn "Error: no input file" + | [filename] => filename + | _ => errfn "Error: more than one input file" + + val _ = Flag.guard flag_verbose say ("Parsing... " ^ source) + val ast = Parse.parse source + val _ = Flag.guard flag_ast + (fn () => say (Ast.Print.pp_program ast)) () + + val _ = Flag.guard flag_verbose say "Checking..." + val _ = TypeChecker.typecheck ast + + val _ = Flag.guard flag_verbose say "Translating..." + val ir = Trans.translate ast + val _ = Flag.guard flag_ir (fn () => say (Tree.Print.pp_program ir)) () + + val _ = Flag.guard flag_verbose say "Generating proto-x86_64 code..." + val assem = Codegen.codegen ir + val _ = Flag.guard flag_assem + (fn () => List.app (TextIO.print o x86.prettyprint) assem) () + + val _ = Flag.guard flag_verbose say "Analyzing liveness..." + val liveness = Liveness.liveness assem; + + val _ = Flag.guard flag_verbose say "Graphing..." + val igraph = Igraph.gengraph liveness; + + val _ = Flag.guard flag_verbose say "Ordering..." + val order = ColorOrder.colororder igraph; + + val _ = Flag.guard flag_verbose say "Coloring..." + val colors = Colorizer.colorize order igraph; + + val _ = Flag.guard flag_verbose say "Solidifying x86_64 code..." + val x86 = Solidify.solidify colors assem; + + val _ = Flag.guard flag_verbose say "Peepholing..." + val x86p = Peephole.peephole x86; + + val _ = Flag.guard flag_verbose say "Stringifying..." + val x86d = [x86.DIRECTIVE(".file\t\"" ^ source ^ "\""), + x86.DIRECTIVE(".globl _l1_main"), + x86.DIRECTIVE("_l1_main:")] + @ x86p + @ [x86.DIRECTIVE ".ident\t\"15-411 L1 compiler v2 by czl@ and jwise@\""] + val code = Stringify.stringify x86d + + val afname = stem source ^ ".s" + val _ = Flag.guard flag_verbose say ("Writing assembly to " ^ afname ^ " ...") + val _ = SafeIO.withOpenOut afname (fn afstream => + TextIO.output (afstream, code)) + in + OS.Process.success + end + handle ErrorMsg.Error => ( say "Compilation failed" ; OS.Process.failure ) + | EXIT => OS.Process.failure + | ErrorMsg.InternalError s => ( say ("Internal compiler error: "^s^"\n"); OS.Process.failure) + | e => (say ("Unrecognized exception " ^ exnMessage e); OS.Process.failure) + + fun test s = main ("", String.tokens Char.isSpace s) +end diff --git a/trans/temp.sml b/trans/temp.sml new file mode 100644 index 0000000..3f6c806 --- /dev/null +++ b/trans/temp.sml @@ -0,0 +1,33 @@ +(* L1 Compiler + * Temporaries + * Author: Kaustuv Chaudhuri + * Modified: Alex Vaynberg + * Modified: Frank Pfenning + *) + +signature TEMP = +sig + type temp + + val reset : unit -> unit (* resets temp numbering *) + val new : unit -> temp (* returns a unique new temp *) + val name : temp -> string (* returns the name of a temp *) + val compare : temp * temp -> order (* comparison function *) +end + +structure Temp :> TEMP = +struct + type temp = int + + local + val counter = ref 1 + in + (* warning: calling reset() may jeopardize uniqueness of temps! *) + fun reset () = ( counter := 1 ) + fun new () = !counter before ( counter := !counter + 1 ) + end + + fun name t = "+t" ^ Int.toString t + + fun compare (t1,t2) = Int.compare (t1,t2) +end diff --git a/trans/trans.sml b/trans/trans.sml new file mode 100644 index 0000000..8ddf780 --- /dev/null +++ b/trans/trans.sml @@ -0,0 +1,54 @@ +(* L1 Compiler + * AST -> IR Translator + * Author: Kaustuv Chaudhuri + * Modified by: Alex Vaynberg + * Modified: Frank Pfenning + *) + +signature TRANS = +sig + (* translate abstract syntax tree to IR tree *) + val translate : Ast.program -> Tree.stm list +end + +structure Trans :> TRANS = +struct + + structure A = Ast + structure T = Tree + + fun trans_oper A.PLUS = T.ADD + | trans_oper A.MINUS = T.SUB + | trans_oper A.TIMES = T.MUL + | trans_oper A.DIVIDEDBY = T.DIV + | trans_oper A.MODULO = T.MOD + | trans_oper A.NEGATIVE = T.SUB (* unary to binary! *) + + and trans_exp env (A.Var(id)) = + (* after type-checking, id must be declared; do not guard lookup *) + T.TEMP (Symbol.look' env id) + | trans_exp env (A.ConstExp c) = T.CONST(c) + | trans_exp env (A.OpExp(oper, [e1, e2])) = + T.BINOP(trans_oper oper, trans_exp env e1, trans_exp env e2) + | trans_exp env (A.OpExp(A.NEGATIVE, [e])) = + T.BINOP(trans_oper A.NEGATIVE, T.CONST(Word32Signed.ZERO), trans_exp env e) + | trans_exp env (A.Marked(marked_exp)) = + trans_exp env (Mark.data marked_exp) + (* anything else should be impossible *) + + (* translate the statement *) + (* trans_stms : Temp.temp Symbol.table -> A.stm list -> Tree.stm list *) + fun trans_stms env (A.Assign(id,e)::stms) = + let val t = Temp.new() + val env' = Symbol.bind env (id, t) + in + T.MOVE(T.TEMP(t), trans_exp env e) + :: trans_stms env' stms + end + | trans_stms env (A.Return e::nil) = + (* after type-checking, return must be last statement *) + T.RETURN (trans_exp env e) :: nil + + fun translate p = trans_stms Symbol.empty p + +end diff --git a/trans/tree.sml b/trans/tree.sml new file mode 100644 index 0000000..1cfc4eb --- /dev/null +++ b/trans/tree.sml @@ -0,0 +1,68 @@ +(* L1 Compiler + * IR Trees + * Author: Kaustuv Chaudhuri + * Modified: Alex Vaynberg + * Modified: Frank Pfenning + *) + +signature TREE = +sig + + datatype binop = ADD | SUB | MUL | DIV | MOD + + datatype exp = + CONST of Word32.word + | TEMP of Temp.temp + | BINOP of binop * exp * exp + and stm = + MOVE of exp * exp + | RETURN of exp + + type program = stm list + + structure Print : + sig + val pp_exp : exp -> string + val pp_stm : stm -> string + val pp_program : program -> string + end +end + +structure Tree :> TREE = +struct + + datatype binop = ADD | SUB | MUL | DIV | MOD + + datatype exp = + CONST of Word32.word + | TEMP of Temp.temp + | BINOP of binop * exp * exp + and stm = + MOVE of exp * exp + | RETURN of exp + + type program = stm list + + structure Print = + struct + + fun pp_binop ADD = "+" + | pp_binop SUB = "-" + | pp_binop MUL = "*" + | pp_binop DIV = "/" + | pp_binop MOD = "%" + + fun pp_exp (CONST(x)) = Word32Signed.toString x + | pp_exp (TEMP(t)) = Temp.name t + | pp_exp (BINOP (binop, e1, e2)) = + "(" ^ pp_exp e1 ^ " " ^ pp_binop binop ^ " " ^ pp_exp e2 ^ ")" + + fun pp_stm (MOVE (e1,e2)) = + pp_exp e1 ^ " <-- " ^ pp_exp e2 + | pp_stm (RETURN e) = + "return " ^ pp_exp e + + fun pp_program (nil) = "" + | pp_program (stm::stms) = pp_stm stm ^ "\n" ^ pp_program stms + end +end diff --git a/type/typechecker.sml b/type/typechecker.sml new file mode 100644 index 0000000..699d15a --- /dev/null +++ b/type/typechecker.sml @@ -0,0 +1,49 @@ +(* L1 Compiler + * TypeChecker + * Author: Alex Vaynberg + * Modified: Frank Pfenning + * + * Simple typechecker that is based on a unit Symbol.table + * This is all that is needed since there is only an integer type present + *) + +signature TYPE_CHECK = +sig + (* prints error message and raises ErrorMsg.error if error found *) + val typecheck : Ast.program -> unit +end; + +structure TypeChecker :> TYPE_CHECK = +struct + structure A = Ast + + (* tc_exp : unit Symbol.table -> Ast.exp -> Mark.ext option -> unit *) + fun tc_exp env (A.Var(id)) ext = + (case Symbol.look env id + of NONE => ( ErrorMsg.error ext ("undefined variable `" ^ Symbol.name id ^ "'") ; + raise ErrorMsg.Error ) + | SOME _ => ()) + | tc_exp env (A.ConstExp(c)) ext = () + | tc_exp env (A.OpExp(oper,es)) ext = + (* Note: it is syntactically impossible in this language to + * apply an operator to an incorrect number of arguments + * so we only check each of the arguments + *) + List.app (fn e => tc_exp env e ext) es + | tc_exp env (A.Marked(marked_exp)) ext = + tc_exp env (Mark.data marked_exp) (Mark.ext marked_exp) + + (* tc_stms : unit Symbol.table -> Ast.program -> unit *) + fun tc_stms env nil = () + | tc_stms env (A.Assign(id,e)::stms) = + ( tc_exp env e NONE ; + tc_stms (Symbol.bind env (id, ())) stms ) + | tc_stms env (A.Return(e)::nil) = + tc_exp env e NONE + | tc_stms env (A.Return _ :: _) = + ( ErrorMsg.error NONE ("`return' not last statement") ; + raise ErrorMsg.Error ) + + fun typecheck prog = tc_stms Symbol.empty prog + +end diff --git a/util/errormsg.sml b/util/errormsg.sml new file mode 100644 index 0000000..9662f79 --- /dev/null +++ b/util/errormsg.sml @@ -0,0 +1,44 @@ +(* L1 Compiler + * Error messages + * Author: Kaustuv Chaudhuri + * Annotations: Alex Vaynberg + * Modified: Frank Pfenning + *) + +signature ERRORMSG = +sig + (* clears out all errors from the system *) + val reset : unit -> unit + + (* global flag that indicates whether there were errors *) + val anyErrors : bool ref + + (* sets the error flag and prints out an error message, does NOT raise ERROR *) + val error : Mark.ext option -> string -> unit + (* same, but does not increment error count *) + val warn : Mark.ext option -> string -> unit + + (* generic code stopping exception *) + exception Error + exception InternalError of string +end + +structure ErrorMsg :> ERRORMSG = +struct + (* Initial values of compiler state variables *) + val anyErrors = ref false + + fun reset() = ( anyErrors := false ) + + fun msg str ext note = + (anyErrors := true; + Option.map (TextIO.print o Mark.show) ext; + List.app TextIO.print [":", str, ":", note, "\n"]) + + fun error ext note = (anyErrors := true; msg "error" ext note) + fun warn ext note = msg "warning" ext note + + (* Print the given error message and then abort compilation *) + exception Error + exception InternalError of string +end diff --git a/util/flag.sml b/util/flag.sml new file mode 100644 index 0000000..dd2363e --- /dev/null +++ b/util/flag.sml @@ -0,0 +1,51 @@ +(* L1 Compiler + * Simple structure for cleanly handling input parameters + * Author: Kaustuv Chaudhuri + * Annotations: Alex Vaynberg + *) + +signature FLAG = +sig + type flag + + val flag : string -> flag (* create a new flag that is not set *) + val not : flag -> flag (* reverses the meaning of flag being set *) + + val set : flag -> unit (* set a flag *) + val unset : flag -> unit (* unset a flag *) + val isset : flag -> bool (* check if the flag is set *) + + val guard : flag -> ('a -> unit) -> 'a -> unit + (* return a function that runs only if flag is set *) + val guards : flag list -> ('a -> unit) -> 'a -> unit + (* return a function that runs only if all flags are set *) + val someguards : flag list -> ('a -> unit) -> 'a -> unit + (* return a func that runs if one of flags is set *) + + (* get a function that runs the first one if flag is set, or second one if it is not *) + val branch : flag -> ('a -> 'b) * ('a -> 'b) -> 'a -> 'b + + val show : flag -> string (* returns string that contains the setting of the flag *) +end + +structure Flag :> FLAG = +struct + datatype flag = FLAG of {name : string, value : bool ref, post : bool -> bool} + + fun flag name = FLAG {name = name, value = ref false, post = fn b => b} + + fun set (FLAG f) = #value f := true + fun unset (FLAG f) = #value f := false + fun not (FLAG {name, value, post}) = + FLAG {name = name, value = value, post = fn b => Bool.not (post b)} + + fun isset (FLAG f) = (#post f) (! (#value f)) + + fun guard fl f x = if isset fl then f x else () + fun guards fls f x = if List.all isset fls then f x else () + fun someguards fls f x = if List.exists isset fls then f x else () + + fun branch fl (f, g) = if isset fl then f else g + + fun show (FLAG f) = "flag " ^ #name f ^ " = " ^ Bool.toString (! (#value f)) +end diff --git a/util/mark.sml b/util/mark.sml new file mode 100644 index 0000000..a83f65a --- /dev/null +++ b/util/mark.sml @@ -0,0 +1,96 @@ +(* L1 Compiler + * Positional Markers + * Author: Kaustuv Chaudhuri + * Annotations / bugfixes: Alex Vaynberg + *) + +signature MARK = +sig + type ext = (int * int) * (int * int) * string (* position *) + + val show : ext -> string (* converts the data into human readable form *) + + type 'a marked (* value with positional information *) + + (* INTRODUCTION FUNCTIONS for type 'a marked *) + + (* put together a value and positional information *) + val mark : 'a * ext -> 'a marked + + (* put together a value and an option of positional information *) + val mark' : 'a * ext option -> 'a marked + + (* mark the value with no positional information *) + val naked : 'a -> 'a marked + + (* ELIMINATION FUNCTIONS for type a' marked *) + + (* data: remove the markings *) + val data : 'a marked -> 'a + + (* ext: retrieve positional information from marked value*) + val ext : 'a marked -> ext option + + + (* USEFUL TOOLS *) + + (* wrap: + * returns SOME of positional information unit that contains each one in the list + * NONE if such wrap is not possible (spans several files, etc.) + *) + val wrap : ext option list -> ext option + + (* map: make your function keep positional information *) + val map : ('a -> 'b) -> 'a marked -> 'b marked + (* map': similar to map, but f can now use positional information + * and preserve it at the same time + *) + val map' : ('a marked -> 'b) -> 'a marked -> 'b marked +end + +structure Mark :> MARK = +struct + type ext = (int * int) * (int * int) * string + + fun pos (row, 0) = Int.toString row + | pos (row, col) = Int.toString row ^ "." ^ Int.toString col + + fun show (l, r, file) = file ^ ":" ^ pos l ^ "-" ^ pos r + + type 'a marked = 'a * ext option + + fun mark (d, e) = (d, SOME e) + fun mark' (d, e) = (d, e) + fun naked d = (d, NONE) + + fun data (d, e) = d + fun ext (d, e) = e + + fun extmin ((l1, c1), (l2, c2)) = + if l1 < l2 then (l1, c1) + else + if l1 > l2 then (l2, c2) + else (l1, Int.min (c1, c2)) + fun extmax ((l1, c1), (l2, c2)) = + if l1 > l2 then (l1, c1) + else + if l1 > l2 then (l2, c2) + else (l1, Int.min (c1, c2)) + + fun wrap [] = NONE + | wrap (e :: []) = e + | wrap (e :: el) = + (case wrap el of + NONE => NONE + | SOME (el1, el2, elf) => + (case e of + SOME (e1, e2, ef) => + if String.compare (ef, elf) = EQUAL then + SOME (extmin (e1, el1), extmax (e2, el2), ef) + else NONE + | NONE => SOME (el1, el2, elf))) + + + fun map f (d, e) = (f d, e) + fun map' f (m as (d, e)) = (f m, e) +end diff --git a/util/safe-io.sml b/util/safe-io.sml new file mode 100644 index 0000000..30b9c16 --- /dev/null +++ b/util/safe-io.sml @@ -0,0 +1,61 @@ +(* L1 Compiler + * Safe(r) I/O functions + * Author: Frank Pfenning + *) + +signature SAFE_IO = +sig + (* withOpenIn fileName (fn instream => body) = result + opens fileName for input to obtain instream and evaluates body. + The file is closed during normal and abnormal exit of body. + *) + val withOpenIn : string -> (TextIO.instream -> 'a) -> 'a + + (* withOpenOut fileName (fn outstream => body) = result + opens fileName for output to obtain outstream and evaluates body. + The file is closed during normal and abnormal exit of body. + *) + val withOpenOut : string -> (TextIO.outstream -> 'a) -> 'a +end + +structure SafeIO :> SAFE_IO = +struct + + (* result of a computation *) + datatype 'a Result = Value of 'a | Exception of exn + + + (* withOpenIn fileName (fn instream => body) = result + opens fileName for input to obtain instream and evaluates body. + The file is closed during normal and abnormal exit of body. + *) + fun withOpenIn (fileName) (scope) = + let + val instream = TextIO.openIn fileName + (* val _ = fileOpenMsg (fileName) *) + val result = Value (scope instream) handle exn => Exception (exn) + (* val _ = fileCloseMsg (fileName) *) + val _ = TextIO.closeIn instream + in + case result + of Value (x) => x + | Exception (exn) => raise exn + end + + (* withOpenOut fileName (fn outstream => body) = result + opens fileName for input to obtain outstream and evaluates body. + The file is closed during normal and abnormal exit of body. + *) + fun withOpenOut (fileName) (scope) = + let + val outstream = TextIO.openOut fileName + (* val _ = fileOpenMsg (fileName) *) + val result = Value (scope outstream) handle exn => Exception (exn) + (* val _ = fileCloseMsg (fileName) *) + val _ = TextIO.closeOut outstream + in + case result + of Value (x) => x + | Exception (exn) => raise exn + end +end diff --git a/util/symbol.sml b/util/symbol.sml new file mode 100644 index 0000000..590f9be --- /dev/null +++ b/util/symbol.sml @@ -0,0 +1,125 @@ +(* L1 Compiler + * The symbol tables + * Author: Kaustuv Chaudhuri + *) + +(* uses, from $/smlnj-lib/Util/ + structure HashTable + structure HashString + functor BinaryMapFn + functor BinarySetFn +*) + + +signature SYMBOL = +sig + type symbol + val compare : symbol * symbol -> order (* compare symbols by their creation time + * GREATER if they can not be compared + *) + + val bogus : symbol (* a dummy symbol, less than others *) + val is_bogus : symbol -> bool + + val reset : unit -> unit (* resets the hash table in which the symbols are stored *) + val symbol : string -> symbol (* generates a new symbol with given name *) + val name : symbol -> string (* returns a name associated with symbol *) + + (* symbol tables -- allows association of any type with each symbol *) + type 'a table + val empty : 'a table (* empty table *) + val digest : (symbol * 'a) list -> 'a table (* prefilled table *) + + val bind : 'a table -> symbol * 'a -> 'a table (* insert new item into table *) + val look : 'a table -> symbol -> 'a option (* return the value from the table *) + val look' : 'a table -> symbol -> 'a (* returns value from table, raise Option if not found *) + val count : 'a table -> int (* returns the number of items in the table *) + + val elems : 'a table -> 'a list (* return all the data as a list *) + val elemsi : 'a table -> (symbol * 'a) list (* return the symbols with the associated data *) + val keys : 'a table -> symbol list (* just the symbols *) + + (* symbol set -- similar to a () Symbol.table, elements can be removed *) + type set + val null : set (* empty set *) + val singleton : symbol -> set (* generate a set with one item *) + val add : set -> symbol -> set (* add a symbol *) + val remove : set -> symbol -> set (* remove a symbol *) + val member : set -> symbol -> bool (* is the symbol in the set? *) + val showmems : set -> string (* returns the string of delimited names, for debugging *) +end + + +structure Symbol :> SYMBOL = +struct + type symbol = string * int + + val bogus = ("?", ~1) + fun is_bogus (_, ~1) = true + | is_bogus _ = false + + fun compare ((n, i), (n', i')) = + if i < 0 orelse i' < 0 then GREATER + else Int.compare (i, i') + + local + exception Symbol + val nexts = ref 0 + fun initht () = + HashTable.mkTable (HashString.hashString, fn (x, y) => String.compare (x, y) = EQUAL) + (128, Symbol) + val ht = ref (initht ()) + in + fun reset () = (nexts := 0; + ht := initht ()) + fun symbol name = + (case HashTable.find (!ht) name of + SOME i => (name, i) + | NONE => let + val i = !nexts before nexts := !nexts + 1 + in + HashTable.insert (!ht) (name, i); + (name, i) + end) + + end + + fun name (n, i) = n + + structure Map = BinaryMapFn (struct + type ord_key = symbol + val compare = compare + end) + + type 'a table = 'a Map.map + + val empty = Map.empty + fun digest l = List.foldr (fn ((s, v), m) => Map.insert (m, s, v)) empty l + + fun bind t (s, x) = Map.insert (t, s, x) + fun look t s = Map.find (t, s) + fun look' t s = Option.valOf (look t s) + fun count t = Map.numItems t + fun elems t = Map.listItems t + fun elemsi t = Map.listItemsi t + fun keys t = Map.listKeys t + + fun delimit' [] s = s + | delimit' [x] s = s ^ x + | delimit' (x :: xs) s = delimit' xs (s ^ x ^ ", ") + fun delimit l = delimit' l "[" ^ "]" + + structure Set = BinarySetFn (struct + type ord_key = symbol + val compare = compare + end) + + type set = Set.set + + val null = Set.empty + val singleton = Set.singleton + fun add S s = Set.add (S, s) + fun remove S s = Set.delete (S, s) + fun member S s = Set.member (S, s) + fun showmems S = delimit (List.map name (Set.listItems S)) +end diff --git a/util/word32.sml b/util/word32.sml new file mode 100644 index 0000000..d830c9d --- /dev/null +++ b/util/word32.sml @@ -0,0 +1,41 @@ +(* L1 Compiler + * Utilities for signed modular arithmetic + * Author: Frank Pfenning + *) + +(* + * There are two useful structure in the SML Basis Library + * Int32, with 2's complement arithmetic, + * but it raises Overflow instead of doing modular arithmetic + * Word32, with unsigned modular arithmetic + * + * This structure implements some signed operations on Word32 + *) + +signature WORD32_SIGNED = +sig + val TMAX : Word32.word (* largest signed positive word, 2^31-1 *) + val TMIN : Word32.word (* smallest signed negative word -2^31 *) + val ZERO : Word32.word (* 0 *) + val fromString : string -> Word32.word option (* parse from string, no sign *) + (* raises Overflow if not 0 <= n < 2^32 *) + val toString : Word32.word -> string (* print to string, with sign *) +end + +structure Word32Signed :> WORD32_SIGNED = +struct + val TMIN = Word32.<<(Word32.fromInt(1), Word.fromInt(Word32.wordSize-1)) + val TMAX = Word32.-(TMIN, Word32.fromInt(1)) + val ZERO = Word32.fromInt(0) + fun neg w = Word32.>(w, TMAX) + + (* fromString does not allow leading "-" *) + fun fromString (str) = + (* scanString might also raise Overflow *) + StringCvt.scanString (Word32.scan StringCvt.DEC) str + + fun toString (w) = + if neg w + then "-" ^ Word32.fmt StringCvt.DEC (Word32.~(w)) + else Word32.fmt StringCvt.DEC w +end -- 2.43.0