module Decode(
input clk,
+ input stall,
input [31:0] insn,
input [31:0] inpc,
input [31:0] incpsr,
+ input [31:0] inspsr,
output reg [31:0] op0,
output reg [31:0] op1,
output reg [31:0] op2,
+ output reg carry,
output reg [31:0] outcpsr,
+ output reg [31:0] outspsr,
- output [3:0] read_0,
- output [3:0] read_1,
- output [3:0] read_2,
+ output reg [3:0] read_0,
+ output reg [3:0] read_1,
+ output reg [3:0] read_2,
input [31:0] rdata_0,
input [31:0] rdata_1,
input [31:0] rdata_2
);
- wire [31:0] regs0, regs1, regs2, rpc;
- wire [31:0] op1_res, cpsr;
+ wire [31:0] regs0, regs1, regs2;
+ reg [31:0] rpc;
+ reg [31:0] op0_out, op1_out, op2_out;
+ reg carry_out;
/* shifter stuff */
wire [31:0] shift_oper;
wire [31:0] shift_res;
wire shift_cflag_out;
+ wire [31:0] rotate_res;
assign regs0 = (read_0 == 4'b1111) ? rpc : rdata_0;
assign regs1 = (read_1 == 4'b1111) ? rpc : rdata_1;
assign regs2 = rdata_2; /* use regs2 for things that cannot be r15 */
- IHATEARMSHIFT blowme(.insn(insn),
- .operand(regs1),
- .reg_amt(regs2),
- .cflag_in(incpsr[`CPSR_C]),
- .res(shift_res),
- .cflag_out(shift_cflag_out));
-
+ IREALLYHATEARMSHIFT shift(.insn(insn),
+ .operand(regs1),
+ .reg_amt(regs2),
+ .cflag_in(incpsr[`CPSR_C]),
+ .res(shift_res),
+ .cflag_out(shift_cflag_out));
+
+ SuckLessRotator whirr(.oper({24'b0, insn[7:0]}),
+ .amt(insn[11:8]),
+ .res(rotate_res));
+
always @(*)
casez (insn)
- 32'b????000000??????????????1001????, /* Multiply -- must come before ALU, because it pattern matches a specific case of ALU */
-// 32'b????00001???????????????1001????, /* Multiply long */
- 32'b????00010?001111????000000000000, /* MRS (Transfer PSR to register) */
- 32'b????00010?101001111100000000????, /* MSR (Transfer register to PSR) */
- 32'b????00?10?1010001111????????????, /* MSR (Transfer register or immediate to PSR, flag bits only) */
- 32'b????00010?00????????00001001????, /* Atomic swap */
- 32'b????000100101111111111110001????, /* Branch and exchange */
- 32'b????000??0??????????00001??1????, /* Halfword transfer - register offset */
- 32'b????000??1??????????00001??1????, /* Halfword transfer - register offset */
- 32'b????011????????????????????1????, /* Undefined. I hate ARM */
- 32'b????01??????????????????????????, /* Single data transfer */
- 32'b????100?????????????????????????, /* Block data transfer */
- 32'b????101?????????????????????????, /* Branch */
- 32'b????110?????????????????????????, /* Coprocessor data transfer */
- 32'b????1110???????????????????0????, /* Coprocessor data op */
- 32'b????1110???????????????????1????, /* Coprocessor register transfer */
- 32'b????1111????????????????????????: /* SWI */
- rpc = inpc - 8;
- 32'b????00??????????????????????????: /* ALU */
- rpc = inpc - (insn[25] ? 8 : (insn[4] ? 12 : 8));
- default: /* X everything else out */
+ `DECODE_ALU_MULT, /* Multiply -- must come before ALU, because it pattern matches a specific case of ALU */
+// `DECODE_ALU_MUL_LONG, /* Multiply long */
+ `DECODE_ALU_MRS, /* MRS (Transfer PSR to register) */
+ `DECODE_ALU_MSR, /* MSR (Transfer register to PSR) */
+ `DECODE_ALU_MSR_FLAGS, /* MSR (Transfer register or immediate to PSR, flag bits only) */
+ `DECODE_ALU_SWP, /* Atomic swap */
+ `DECODE_ALU_BX, /* Branch and exchange */
+ `DECODE_ALU_HDATA_REG, /* Halfword transfer - register offset */
+ `DECODE_ALU_HDATA_IMM, /* Halfword transfer - register offset */
+ `DECODE_LDRSTR_UNDEFINED, /* Undefined. I hate ARM */
+ `DECODE_LDRSTR, /* Single data transfer */
+ `DECODE_LDMSTM, /* Block data transfer */
+ `DECODE_BRANCH, /* Branch */
+ `DECODE_LDCSTC, /* Coprocessor data transfer */
+ `DECODE_CDP, /* Coprocessor data op */
+ `DECODE_SWI: /* SWI */
+ rpc = inpc + 8;
+ `DECODE_MRCMCR: /* Coprocessor register transfer */
+ rpc = inpc + 12;
+ `DECODE_ALU: /* ALU */
+ rpc = inpc + (insn[25] ? 8 : (insn[4] ? 12 : 8));
+ default: /* X everything else out */
rpc = 32'hxxxxxxxx;
endcase
-
+
always @(*) begin
read_0 = 4'hx;
read_1 = 4'hx;
read_2 = 4'hx;
+ op0_out = 32'hxxxxxxxx;
+ op1_out = 32'hxxxxxxxx;
+ op2_out = 32'hxxxxxxxx;
+ carry_out = 1'bx;
+
casez (insn)
- 32'b????000000??????????????1001????: /* Multiply -- must come before ALU, because it pattern matches a specific case of ALU */
+ `DECODE_ALU_MULT: /* Multiply -- must come before ALU, because it pattern matches a specific case of ALU */
begin
read_0 = insn[15:12]; /* Rn */
read_1 = insn[3:0]; /* Rm */
read_2 = insn[11:8]; /* Rs */
+
+ op0_out = regs0;
+ op1_out = regs1;
+ op2_out = regs2;
end
-// 32'b????00001???????????????1001????, /* Multiply long */
+// `DECODE_ALU_MUL_LONG: /* Multiply long */
+// begin
// read_0 = insn[11:8]; /* Rn */
// read_1 = insn[3:0]; /* Rm */
// read_2 = 4'b0; /* anyus */
- 32'b????00010?001111????000000000000, /* MRS (Transfer PSR to register) */
- 32'b????00010?101001111100000000????, /* MSR (Transfer register to PSR) */
- 32'b????00?10?1010001111????????????: /* MSR (Transfer register or immediate to PSR, flag bits only) */
- begin end /* Everything stays x'ed out. */
- 32'b????00??????????????????????????: /* ALU */
+//
+// op1_res = regs1;
+// end
+ `DECODE_ALU_MRS: /* MRS (Transfer PSR to register) */
+ begin end
+ `DECODE_ALU_MSR: /* MSR (Transfer register to PSR) */
begin
- read_0 = insn[19:16]; /* Rn */
- read_1 = insn[3:0]; /* Rm */
- read_2 = insn[11:8]; /* Rs for shift */
+ read_0 = insn[3:0]; /* Rm */
+
+ op0_out = regs0;
+ end
+ `DECODE_ALU_MSR_FLAGS: /* MSR (Transfer register or immediate to PSR, flag bits only) */
+ begin
+ read_0 = insn[3:0]; /* Rm */
+
+ if(insn[25]) begin /* the constant case */
+ op0_out = rotate_res;
+ end else begin
+ op0_out = regs0;
+ end
end
- 32'b????00010?00????????00001001????: /* Atomic swap */
+ `DECODE_ALU_SWP: /* Atomic swap */
begin
read_0 = insn[19:16]; /* Rn */
read_1 = insn[3:0]; /* Rm */
+
+ op0_out = regs0;
+ op1_out = regs1;
end
- 32'b????000100101111111111110001????: /* Branch and exchange */
+ `DECODE_ALU_BX: /* Branch and exchange */
+ begin
read_0 = insn[3:0]; /* Rn */
- 32'b????000??0??????????00001??1????: /* Halfword transfer - register offset */
+
+ op0_out = regs0;
+ end
+ `DECODE_ALU_HDATA_REG: /* Halfword transfer - register offset */
begin
read_0 = insn[19:16];
read_1 = insn[3:0];
+ read_2 = insn[15:12];
+
+ op0_out = regs0;
+ op1_out = regs1;
+ op2_out = regs2;
end
- 32'b????000??1??????????00001??1????: /* Halfword transfer - immediate offset */
+ `DECODE_ALU_HDATA_IMM: /* Halfword transfer - immediate offset */
begin
read_0 = insn[19:16];
- read_1 = insn[3:0];
+ read_1 = insn[15:12];
+
+ op0_out = regs0;
+ op1_out = {24'b0, insn[11:8], insn[3:0]};
+ op2_out = regs1;
end
- 32'b????011????????????????????1????: /* Undefined. I hate ARM */
- begin end
- 32'b????01??????????????????????????: /* Single data transfer */
+ `DECODE_ALU: /* ALU */
begin
read_0 = insn[19:16]; /* Rn */
read_1 = insn[3:0]; /* Rm */
- end
- 32'b????100?????????????????????????: /* Block data transfer */
- read_0 = insn[19:16];
- 32'b????101?????????????????????????: /* Branch */
- begin end
- 32'b????110?????????????????????????: /* Coprocessor data transfer */
- read_0 = insn[19:16];
- 32'b????1110???????????????????0????, /* Coprocessor data op */
- 32'b????1110???????????????????1????, /* Coprocessor register transfer */
- 32'b????1111????????????????????????: /* SWI */
- begin end
- default:
- $display("Undecoded instruction");
- endcase
- end
-
- always @(*) begin
- op1_res = 32'hxxxxxxxx;
- cpsr = 32'hxxxxxxxx;
- casez (insn)
- 32'b????000000??????????????1001????: begin /* Multiply */
- op1_res = regs1;
- cpsr = incpsr;
- end
-// 32'b????00001???????????????1001????: begin /* Multiply long */
-// op1_res = regs1;
-// end
- 32'b????00010?001111????000000000000: begin /* MRS (Transfer PSR to register) */
- cpsr = incpsr;
- end
- 32'b????00010?101001111100000000????: begin /* MSR (Transfer register to PSR) */
- cpsr = incpsr;
- end
- 32'b????00?10?1010001111????????????: begin /* MSR (Transfer register or immediate to PSR, flag bits onry) */
- cpsr = incpsr;
- end
- 32'b????00??????????????????????????: begin /* ALU */
+ read_2 = insn[11:8]; /* Rs for shift */
+
+ op0_out = regs0;
if(insn[25]) begin /* the constant case */
- cpsr = incpsr;
- op1_res = ({24'b0, insn[7:0]} >> {insn[11:8], 1'b0}) | ({24'b0, insn[7:0]} << (5'b0 - {insn[11:8], 1'b0}));
+ carry_out = incpsr[`CPSR_C];
+ op1_out = rotate_res;
end else begin
- cpsr = {incpsr[31:30], shift_cflag_out, incpsr[28:0]};
- op1_res = shift_res;
+ carry_out = shift_cflag_out;
+ op1_out = shift_res;
end
end
- 32'b????00010?00????????00001001????: begin /* Atomic swap */
- op1_res = regs1;
- end
- 32'b????000100101111111111110001????: begin /* Branch and exchange */
- cpsr = incpsr;
- end
- 32'b????000??0??????????00001??1????: begin /* Halfword transfer - register offset */
- op1_res = regs1;
- cpsr = incpsr;
- end
- 32'b????000??1??????????00001??1????: begin /* Halfword transfer - immediate offset */
- op1_res = {24'b0, insn[11:8], insn[3:0]};
- cpsr = incpsr;
- end
- 32'b????011????????????????????1????: begin /* Undefined. I hate ARM */
+ `DECODE_LDRSTR_UNDEFINED: /* Undefined. I hate ARM */
+ begin
/* eat shit */
end
- 32'b????01??????????????????????????: begin /* Single data transfer */
- if(insn[25]) begin
- op1_res = {20'b0, insn[11:0]};
- cpsr = incpsr;
+ `DECODE_LDRSTR: /* Single data transfer */
+ begin
+ read_0 = insn[19:16]; /* Rn */
+ read_1 = insn[3:0]; /* Rm */
+ read_2 = insn[15:12];
+
+ op0_out = regs0;
+ if(!insn[25] /* immediate */) begin
+ op1_out = {20'b0, insn[11:0]};
+ carry_out = incpsr[`CPSR_C];
end else begin
- op1_res = shift_res;
- cpsr = {incpsr[31:30], shift_cflag_out, incpsr[28:0]};
+ op1_out = shift_res;
+ carry_out = shift_cflag_out;
end
+ op2_out = regs2;
end
- 32'b????100?????????????????????????: begin /* Block data transfer */
- op1_res = {16'b0, insn[15:0]};
- cpsr = incpsr;
+ `DECODE_LDMSTM: /* Block data transfer */
+ begin
+ read_0 = insn[19:16];
+
+ op0_out = regs0;
+ op1_out = {16'b0, insn[15:0]};
end
- 32'b????101?????????????????????????: begin /* Branch */
- op1_res = {{6{insn[23]}}, insn[23:0], 2'b0};
- cpsr = incpsr;
+ `DECODE_BRANCH: /* Branch */
+ begin
+ op0_out = {{6{insn[23]}}, insn[23:0], 2'b0};
end
- 32'b????110?????????????????????????: begin /* Coprocessor data transfer */
- op1_res = {24'b0, insn[7:0]};
- cpsr = incpsr;
+ `DECODE_LDCSTC: /* Coprocessor data transfer */
+ begin
+ read_0 = insn[19:16];
+
+ op0_out = regs0;
+ op1_out = {24'b0, insn[7:0]};
end
- 32'b????1110???????????????????0????: begin /* Coprocessor data op */
- cpsr = incpsr;
+ `DECODE_CDP: /* Coprocessor data op */
+ begin
end
- 32'b????1110???????????????????1????: begin /* Coprocessor register transfer */
- cpsr = incpsr;
+ `DECODE_MRCMCR: /* Coprocessor register transfer */
+ begin
+ read_0 = insn[15:12];
+
+ op0_out = regs0;
end
- 32'b????1111????????????????????????: begin /* SWI */
- cpsr = incpsr;
+ `DECODE_SWI: /* SWI */
+ begin
end
- default: begin end
+ default:
+ $display("Undecoded instruction");
endcase
end
+
always @ (posedge clk) begin
- op0 <= regs0; /* Rn - always */
- op1 <= op1_res; /* 'operand 2' - Rm */
- op2 <= regs2; /* thirdedge - Rs */
- outcpsr <= cpsr;
+ if (!stall)
+ begin
+ op0 <= op0_out; /* Rn - always */
+ op1 <= op1_out; /* 'operand 2' - Rm */
+ op2 <= op2_out; /* thirdedge - Rs */
+ carry <= carry_out;
+ outcpsr <= incpsr;
+ outspsr <= inspsr;
+ end
end
endmodule
-module IHATEARMSHIFT(
+module IREALLYHATEARMSHIFT(
input [31:0] insn,
input [31:0] operand,
input [31:0] reg_amt,
input cflag_in,
- output [31:0] res,
- output cflag_out
+ output reg [31:0] res,
+ output reg cflag_out
);
wire [5:0] shift_amt;
- wire elanus;
+ reg is_arith, is_rot;
+ wire rshift_cout;
+ wire [31:0] rshift_res;
+ assign shift_amt = insn[4] ? {|reg_amt[7:5], reg_amt[4:0]} /* reg-specified shift */
+ : {insn[11:7] == 5'b0, insn[11:7]}; /* immediate shift */
+
+ SuckLessShifter barrel(.oper(operand),
+ .carryin(cflag_in),
+ .amt(shift_amt),
+ .is_arith(is_arith),
+ .is_rot(is_rot),
+ .res(rshift_res),
+ .carryout(rshift_cout));
- /* might want to write our own damn shifter that does arithmetic/logical efficiently and stuff */
always @(*)
- if(insn[4]) begin
- shift_amt = {|reg_amt[7:5], reg_amt[4:0]};
- elanus = 1'b1;
- end else begin
- shift_amt = {insn[11:7] == 5'b0, insn[11:7]};
- elanus = 1'b0;
+ case (insn[6:5])
+ `SHIFT_LSL: begin
+ /* meaningless */
+ is_rot = 1'b0;
+ is_arith = 1'b0;
end
-
+ `SHIFT_LSR: begin
+ is_rot = 1'b0;
+ is_arith = 1'b0;
+ end
+ `SHIFT_ASR: begin
+ is_rot = 1'b0;
+ is_arith = 1'b1;
+ end
+ `SHIFT_ROR: begin
+ is_rot = 1'b1;
+ is_arith = 1'b0;
+ end
+ endcase
+
always @(*)
case (insn[6:5]) /* shift type */
- `SHIFT_LSL: begin
- {cflag_out, res} = {cflag_in, operand} << {elanus & shift_amt[5], shift_amt[4:0]};
- end
+ `SHIFT_LSL:
+ {cflag_out, res} = {cflag_in, operand} << {insn[4] & shift_amt[5], shift_amt[4:0]};
`SHIFT_LSR: begin
- {res, cflag_out} = {operand, cflag_in} >> shift_amt;
+ res = rshift_res;
+ cflag_out = rshift_cout;
end
`SHIFT_ASR: begin
- {res, cflag_out} = {operand, cflag_in} >> shift_amt | (operand[31] ? ~(33'h1FFFFFFFF >> shift_amt) : 33'b0);
+ res = rshift_res;
+ cflag_out = rshift_cout;
end
`SHIFT_ROR: begin
- if(!elanus && shift_amt[4:0] == 5'b0) begin /* RRX x.x */
+ if(!insn[4] && shift_amt[4:0] == 5'b0) begin /* RRX x.x */
res = {cflag_in, operand[31:1]};
cflag_out = operand[0];
- end else if(shift_amt == 6'b0) begin
- res = operand;
- cflag_out = cflag_in;
end else begin
- res = operand >> shift_amt[4:0] | operand << (5'b0 - shift_amt[4:0]);
- cflag_out = operand[shift_amt[4:0] - 5'b1];
+ res = rshift_res;
+ cflag_out = rshift_cout;
end
end
endcase
endmodule
+
+module SuckLessShifter(
+ input [31:0] oper,
+ input carryin,
+ input [5:0] amt,
+ input is_arith,
+ input is_rot,
+ output wire [31:0] res,
+ output wire carryout
+);
+
+ wire [32:0] stage1, stage2, stage3, stage4, stage5;
+
+ wire pushbits = is_arith & oper[31];
+
+ /* do a barrel shift */
+ assign stage1 = amt[5] ? {is_rot ? oper : {32{pushbits}}, oper[31]} : {oper, carryin};
+ assign stage2 = amt[4] ? {is_rot ? stage1[16:1] : {16{pushbits}}, stage1[32:17], stage1[16]} : stage1;
+ assign stage3 = amt[3] ? {is_rot ? stage2[8:1] : {8{pushbits}}, stage2[32:9], stage2[8]} : stage2;
+ assign stage4 = amt[2] ? {is_rot ? stage3[4:1] : {4{pushbits}}, stage3[32:5], stage3[4]} : stage3;
+ assign stage5 = amt[1] ? {is_rot ? stage4[2:1] : {2{pushbits}}, stage4[32:3], stage4[2]} : stage4;
+ assign {res, carryout} = amt[0] ? {is_rot ? stage5[1] : pushbits, stage5[32:2], stage5[1]} : stage5;
+
+endmodule
+
+module SuckLessRotator(
+ input [31:0] oper,
+ input [3:0] amt,
+ output wire [31:0] res
+);
+
+ wire [31:0] stage1, stage2, stage3;
+ assign stage1 = amt[3] ? {oper[15:0], oper[31:16]} : oper;
+ assign stage2 = amt[2] ? {stage1[7:0], stage1[31:8]} : stage1;
+ assign stage3 = amt[1] ? {stage2[3:0], stage2[31:4]} : stage2;
+ assign res = amt[0] ? {stage3[1:0], stage3[31:2]} : stage3;
+
+endmodule
+