module Execute( input clk, input Nrst, /* XXX not used yet */ input stall_2a, input flush_2a, input bubble_2a, input [31:0] pc_2a, input [31:0] insn_2a, input [31:0] cpsr_2a, input [31:0] spsr_2a, input [31:0] op0_2a, input [31:0] op1_2a, input [31:0] op2_2a, input carry_2a, output reg outstall_2a = 0, output reg bubble_3a = 1, output reg [31:0] cpsr_3a = 0, output reg [31:0] spsr_3a = 0, output reg cpsrup_3a = 0, output reg write_reg_3a = 1'bx, output reg [3:0] write_num_3a = 4'bxxxx, output reg [31:0] write_data_3a = 32'hxxxxxxxx, output reg [31:0] jmppc_2a, output reg jmp_2a, output reg [31:0] pc_3a, output reg [31:0] insn_3a, output reg [31:0] op0_3a, op1_3a, op2_3a ); reg mult_start; reg [31:0] mult_acc0, mult_in0, mult_in1; wire mult_done; wire [31:0] mult_result; reg [31:0] alu_in0_2a, alu_in1_2a; reg [3:0] alu_op_2a; reg alu_setflags_2a; wire [31:0] alu_result_2a, alu_outcpsr_2a; wire alu_setres_2a; reg next_bubble_3a; reg [31:0] next_cpsr_3a, next_spsr_3a; reg next_cpsrup_3a; reg next_write_reg_3a; reg [3:0] next_write_num_3a; reg [31:0] next_write_data_3a; Multiplier multiplier( .clk(clk), .Nrst(Nrst), .start(mult_start), .acc0(mult_acc0), .in0(mult_in0), .in1(mult_in1), .done(mult_done), .result(mult_result)); ALU alu( .clk(clk), .Nrst(Nrst), .in0(alu_in0_2a), .in1(alu_in1_2a), .cpsr(cpsr_2a), .op(alu_op_2a), .setflags(alu_setflags_2a), .shifter_carry(carry_2a), .result(alu_result_2a), .cpsr_out(alu_outcpsr_2a), .setres(alu_setres_2a)); always @(posedge clk) begin if (!stall_2a) begin bubble_3a <= next_bubble_3a; cpsr_3a <= next_cpsr_3a; spsr_3a <= next_spsr_3a; cpsrup_3a <= next_cpsrup_3a; write_reg_3a <= next_write_reg_3a; write_num_3a <= next_write_num_3a; write_data_3a <= next_write_data_3a; pc_3a <= pc_2a; insn_3a <= insn_2a; op0_3a <= op0_2a; op1_3a <= op1_2a; op2_3a <= op2_2a; end end reg delayedflush_2a = 0; always @(posedge clk) if (flush_2a && outstall_2a /* halp! I can't do it now, maybe later? */) delayedflush_2a <= 1; else if (!outstall_2a /* anything has been handled this time around */) delayedflush_2a <= 0; reg outstall_3a = 0; always @(posedge clk) outstall_3a <= outstall_2a; always @(*) begin outstall_2a = stall_2a; casez (insn_2a) `DECODE_ALU_MULT: /* Multiply -- must come before ALU, because it pattern matches a specific case of ALU */ outstall_2a = outstall_2a | ((!outstall_3a | !mult_done) && !bubble_2a); endcase end /* ALU inputs */ always @(*) begin alu_in0_2a = op0_2a; alu_in1_2a = op1_2a; alu_op_2a = insn_2a[24:21]; alu_setflags_2a = insn_2a[20] /* S */; end /* Register outputs */ always @(*) begin next_cpsr_3a = cpsr_2a; next_spsr_3a = spsr_2a; next_cpsrup_3a = 0; next_write_reg_3a = 0; next_write_num_3a = 4'hx; next_write_data_3a = 32'hxxxxxxxx; casez(insn_2a) `DECODE_ALU_MULT: /* Multiply -- must come before ALU, because it pattern matches a specific case of ALU */ begin next_cpsr_3a = insn_2a[20] /* S */ ? {mult_result[31] /* N */, mult_result == 0 /* Z */, 1'b0 /* C */, cpsr_2a[28] /* V */, cpsr_2a[27:0]} : cpsr_2a; next_cpsrup_3a = insn_2a[20] /* S */; next_write_reg_3a = 1; next_write_num_3a = insn_2a[19:16] /* Rd -- why the fuck isn't this the same place as ALU */; next_write_data_3a = mult_result; end `DECODE_ALU_MRS: /* MRS (Transfer PSR to register) */ begin next_write_reg_3a = 1; next_write_num_3a = insn_2a[15:12]; if (insn_2a[22] /* Ps */) next_write_data_3a = spsr_2a; else next_write_data_3a = cpsr_2a; end `DECODE_ALU_MSR, /* MSR (Transfer register to PSR) */ `DECODE_ALU_MSR_FLAGS: /* MSR (Transfer register or immediate to PSR, flag bits only) */ begin if ((cpsr_2a[4:0] == `MODE_USR) || (insn_2a[16] /* that random bit */ == 1'b0)) /* flags only */ begin if (insn_2a[22] /* Ps */) next_spsr_3a = {op0_2a[31:29], spsr_2a[28:0]}; else next_cpsr_3a = {op0_2a[31:29], cpsr_2a[28:0]}; end else begin if (insn_2a[22] /* Ps */) next_spsr_3a = op0_2a; else next_cpsr_3a = op0_2a; end next_cpsrup_3a = 1; end `DECODE_ALU_SWP, /* Atomic swap */ `DECODE_ALU_BX, /* Branch */ `DECODE_ALU_HDATA_REG, /* Halfword transfer - register offset */ `DECODE_ALU_HDATA_IMM: /* Halfword transfer - immediate offset */ begin end `DECODE_ALU: /* ALU */ begin if (alu_setres_2a) begin next_write_reg_3a = 1; next_write_num_3a = insn_2a[15:12] /* Rd */; next_write_data_3a = alu_result_2a; end if (insn_2a[20] /* S */) begin next_cpsrup_3a = 1; next_cpsr_3a = ((insn_2a[15:12] == 4'b1111) && insn_2a[20]) ? spsr_2a : alu_outcpsr_2a; end end `DECODE_LDRSTR_UNDEFINED, /* Undefined. I hate ARM */ `DECODE_LDRSTR, /* Single data transfer */ `DECODE_LDMSTM: /* Block data transfer */ begin end `DECODE_BRANCH: /* Branch */ begin if(insn_2a[24] /* L */) begin next_write_reg_3a = 1; next_write_num_3a = 4'hE; /* link register */ next_write_data_3a = pc_2a + 32'h4; end end endcase end /* Multiplier inputs */ always @(*) begin mult_start = 0; mult_acc0 = 32'hxxxxxxxx; mult_in0 = 32'hxxxxxxxx; mult_in1 = 32'hxxxxxxxx; casez(insn_2a) `DECODE_ALU_MULT: begin if (!outstall_3a /* i.e., this is a new one */ && !bubble_2a /* i.e., this is a real one */) begin mult_start = 1; mult_acc0 = insn_2a[21] /* A */ ? op0_2a /* Rn */ : 32'h0; mult_in0 = op1_2a /* Rm */; mult_in1 = op2_2a /* Rs */; $display("New MUL instruction"); end end endcase end /* Miscellaneous cleanup. */ always @(*) begin next_bubble_3a = bubble_2a | flush_2a | delayedflush_2a; jmp_2a = 1'b0; jmppc_2a = 32'h00000000; casez (insn_2a) `DECODE_ALU_MULT: /* Multiply -- must come before ALU, because it pattern matches a specific case of ALU */ next_bubble_3a = next_bubble_3a | !mult_done | !outstall_3a; `DECODE_ALU_MRS, /* MRS (Transfer PSR to register) */ `DECODE_ALU_MSR, /* MSR (Transfer register to PSR) */ `DECODE_ALU_MSR_FLAGS, /* MSR (Transfer register or immediate to PSR, flag bits only) */ `DECODE_ALU_SWP, /* Atomic swap */ `DECODE_ALU_BX, /* Branch */ `DECODE_ALU_HDATA_REG, /* Halfword transfer - register offset */ `DECODE_ALU_HDATA_IMM, /* Halfword transfer - immediate offset */ `DECODE_ALU, /* ALU */ `DECODE_LDRSTR_UNDEFINED, /* Undefined. I hate ARM */ `DECODE_LDRSTR, /* Single data transfer */ `DECODE_LDMSTM: /* Block data transfer */ begin end `DECODE_BRANCH: begin if(!bubble_2a && !flush_2a && !delayedflush_2a && !outstall_2a /* Let someone else take precedence. */) begin jmppc_2a = pc_2a + op0_2a + 32'h8; jmp_2a = 1'b1; end end /* Branch */ `DECODE_LDCSTC, /* Coprocessor data transfer */ `DECODE_CDP, /* Coprocessor data op */ `DECODE_MRCMCR, /* Coprocessor register transfer */ `DECODE_SWI: /* SWI */ begin end default: /* X everything else out */ begin end endcase end endmodule module Multiplier( input clk, input Nrst, /* XXX not used yet */ input start, input [31:0] acc0, input [31:0] in0, input [31:0] in1, output reg done = 0, output reg [31:0] result); reg [31:0] bitfield; reg [31:0] multiplicand; reg [31:0] acc; always @(posedge clk) begin if (start) begin bitfield <= in0; multiplicand <= in1; acc <= acc0; done <= 0; end else begin bitfield <= {2'b00, bitfield[31:2]}; multiplicand <= {multiplicand[29:0], 2'b00}; acc <= acc + (bitfield[0] ? multiplicand : 0) + (bitfield[1] ? {multiplicand[30:0], 1'b0} : 0); if (bitfield == 0) begin result <= acc; done <= 1; end end end endmodule module ALU( input clk, input Nrst, /* XXX not used yet */ input [31:0] in0, input [31:0] in1, input [31:0] cpsr, input [3:0] op, input setflags, input shifter_carry, output reg [31:0] result, output reg [31:0] cpsr_out, output reg setres ); reg [31:0] res; reg flag_n, flag_z, flag_c, flag_v; wire [32:0] sum, diff, rdiff; wire sum_v, diff_v, rdiff_v; assign sum = {1'b0, in0} + {1'b0, in1}; assign diff = {1'b0, in0} - {1'b0, in1}; assign rdiff = {1'b0, in1} - {1'b0, in0}; assign sum_v = (in0[31] ^~ in1[31]) & (sum[31] ^ in0[31]); assign diff_v = (in0[31] ^ in1[31]) & (diff[31] ^ in0[31]); assign rdiff_v = (in0[31] ^ in1[31]) & (rdiff[31] ^ in1[31]); always @(*) begin res = 32'hxxxxxxxx; setres = 1'bx; flag_c = cpsr[`CPSR_C]; flag_v = cpsr[`CPSR_V]; case(op) `ALU_AND: begin result = in0 & in1; flag_c = shifter_carry; setres = 1'b1; end `ALU_EOR: begin result = in0 ^ in1; flag_c = shifter_carry; setres = 1'b1; end `ALU_SUB: begin {flag_c, result} = diff; flag_c = !flag_c; flag_v = diff_v; setres = 1'b1; end `ALU_RSB: begin {flag_c, result} = rdiff; flag_c = !flag_c; flag_v = rdiff_v; setres = 1'b1; end `ALU_ADD: begin {flag_c, result} = sum; flag_v = sum_v; setres = 1'b1; end `ALU_ADC: begin {flag_c, result} = sum + {32'b0, cpsr[`CPSR_C]}; flag_v = sum_v | (~sum[31] & result[31]); setres = 1'b1; end `ALU_SBC: begin {flag_c, result} = diff - {32'b0, (~cpsr[`CPSR_C])}; flag_c = !flag_c; flag_v = diff_v | (diff[31] & ~result[31]); setres = 1'b1; end `ALU_RSC: begin {flag_c, result} = rdiff - {32'b0, (~cpsr[`CPSR_C])}; flag_c = !flag_c; flag_v = rdiff_v | (rdiff[31] & ~result[31]); setres = 1'b1; end `ALU_TST: begin result = in0 & in1; flag_c = shifter_carry; setres = 1'b0; end `ALU_TEQ: begin result = in0 ^ in1; flag_c = shifter_carry; setres = 1'b0; end `ALU_CMP: begin {flag_c, result} = diff; flag_c = !flag_c; flag_v = diff_v; setres = 1'b0; end `ALU_CMN: begin {flag_c, result} = sum; flag_v = sum_v; setres = 1'b0; end `ALU_ORR: begin result = in0 | in1; flag_c = shifter_carry; setres = 1'b1; end `ALU_MOV: begin result = in1; flag_c = shifter_carry; setres = 1'b1; end `ALU_BIC: begin result = in0 & (~in1); flag_c = shifter_carry; setres = 1'b1; end `ALU_MVN: begin result = ~in1; flag_c = shifter_carry; setres = 1'b1; end endcase flag_z = (result == 0); flag_n = result[31]; cpsr_out = setflags ? {flag_n, flag_z, flag_c, flag_v, cpsr[27:0]} : cpsr; end endmodule