Computer Architecture: A Constructive Approach SMIPS Implementations Arvind Computer Science & Artificial Intelligence Lab. Massachusetts Institute of Technology. Single-Cycle SMIPS. Register File. PC. Execute. Decode. +4. Data Memory. Inst Memory.
Computer Architecture: A Constructive Approach SMIPS Implementations Arvind Computer Science & Artificial Intelligence Lab. Massachusetts Institute of Technology
Single-Cycle SMIPS Register File PC Execute Decode +4 Data Memory Inst Memory Datapath is shown only for convenience; it will be derived automatically from the high-level textual description
Decoding Instructions: input-output types decode 31:26, 5:0 iType IType 31:26 aluFunc 5:0 AluFunc instruction 31:26 brComp Bit#(32) BrType Type DecodedInst 20:16 rDst 15:11 Rindex Mux control logic not shown 25:21 rSrc1 Rindex 20:16 rSrc2 Rindex 15:0 imm Bit#(32) ext 25:0 Bool immValid
Typedefs typedefenum {Alu, Ld, St, J, Jr, Jal, Jalr, Br} IType deriving(Bits, Eq); typedef enum {Eq, Neq, Le, Lt, Ge, Gt} BrType deriving(Bits, Eq); typedef enum {Add, Sub, And, Or, Xor, Nor, Slt, Sltu, LShift, RShift, Sra} AluFunc deriving(Bits, Eq); typedefenum {RAlu, IAlu, LdSt, J, Jr, Br} InstDType deriving(Bits, Eq);
Instruction Encoding Bit#(6) opADDIU = 6'b001001; Bit#(6) opSLTI = 6'b001010; Bit#(6) opLW = 6'b100011; Bit#(6) opSW = 6'b101011; Bit#(6) opJ = 6'b000010; Bit#(6) opBEQ = 6'b000100; … Bit#(6) opFUNC = 6'b000000; Bit#(6) fcADDU = 6'b100001; Bit#(6) fcAND = 6'b100100; Bit#(6) fcJR = 6'b001000; … Bit#(6) opRT = 6'b000001; Bit#(6) rtBLTZ = 5'b00000; Bit#(6) rtBGEZ = 5'b00100;
Instruction Grouping function InstDType getInstDType(Bit#(32) inst) return case (inst[31:26]) opADDIU, opSLTI, …: IAlu; opLW, opSW: LdSt; opJ, opJAL: J; opBEQ, opRT, …: Br; opFUNC: case (inst[5:0]) fcADDU, fcAND, …: RAlu; fcJR, fcJALR: Jr; endcase; endcase; endfunction
Decode Function function DecodedInst decode(Bit#(32) inst); DecodedInst dInst = ?; let opcode = inst[ 31 : 26 ]; let rs = inst[ 25 : 21 ]; let rt = inst[ 20 : 16 ]; let rd = inst[ 15 : 11 ]; let funct = inst[ 5 : 0 ]; let imm = inst[ 15 : 0 ]; let target = inst[ 25 : 0 ]; case (getInstDType(inst)) ... endcase return dInst; endfunction
Decoding Instructions:R-Type ALU case (getInstDType(opcode)) … RAlu: begin dInst.iType = Alu; dInst.aluFunc = case (funct) fcADDU: Add; fcSUBU: Sub; ... endcase; dInst.rDst = rd; dInst.rSrc1 = rs; dInst.rSrc2 = rt; dInst.immValid = False; end
Decoding Instructions:I-Type ALU case (getInstDType(opcode)) … IAlu: begin dInst.iType = Alu; dInst.aluFunc = case (opcode) opADDIU: Add; ... endcase; dInst.rDst = rt; dInst.rSrc1 = rs; dInst.imm = signedIAlu(opcode) ? signExtend(imm): zeroExtend(imm); dInst.immValid = True; end
Decoding Instructions:Load & Store case (getInstDType(opcode)) … LdSt: begin dInst.iType = opcode==opLW ? Ld : St; dInst.aluFunc = Add; dInst.rDst = rt; dInst.rSrc1 = rs; dInst.rSrc2 = rt; dInst.imm = signExtned(imm); dInst.immValid = True; end
Decoding Instructions:Jump case (getInstDType(opcode)) … J: begin dInst.iType = opcode==opJ ? J : Jal; dInst.rDst = 31; dInst.imm = zeroExtend({target, 2'b00}); dInst.immValid = False; end Jr: begin dInst.iType = funct==fcJR ? Jr : Jalr; dInst.rDst = rd; dInst.rSrc1 = rs; dInst.immValid = False; end
Decoding Instructions:Branch case (getInstDType(opcode)) … Br: begin dInst.iType = Br; dInst.brComp = case (opcode) opBEQ : EQ; opBLEZ: LE; ... endcase; dInst.rSrc1 = rs; dInst.rSrc2 = rt; dInst.imm = signExtend({imm, 2'b00}); dInst.immValid = False; end
Executing Instructions execute iType rDst dInst either for rf write or St rVal2 data ALU either for memory reference or branch target rVal1 addr Pure combinational logic Branch Address brTaken pc
Some Useful Functions function Bool memType (IType i) return (i==Ld || i == St); endfunction function Bool regWriteType (IType i) return (i==Alu || i==Ld || i==Jal || i==Jalr); endfunction function Bool controlType (IType i) return (i==J || i==Jr || i==Jal || i==Jalr || i==Br); endfunction
Execute Function functionExecInstexec(DecodedInstdInst, Data rVal1, Data rVal2, Addr pc); Data aluVal2 = (dInst.immValid)? dInst.imm : rVal2 letaluRes = alu(rVal1, aluVal2, dInst.aluFunc); letbrRes = aluBr(rVal1, aluVal2, dInst.brComp); letbrAddr = brAddrCal(pc, rVal1, dInst.iType, dInst.imm); letaddr = (memType(dInst.iType)? aluRes : br.addr; let data = dInst.iType==St ? rVal2 : aluRes; returnExecInst{iType: dInst.iType, brTaken: brRes, addr: adddr, data: data, rDst: dInst.rDst}; endfunction
ALU function Data alu(Data a, Data b, Func func); Data res = case(func) Add : add(a, b); Sub : subtract(a, b); And : (a & b); Or : (a | b); Xor : (a ^ b); Nor : ~(a | b); Slt : setLessThan(a, b); Sltu : setLessThanUnsigned(a, b); LShift: logicalShiftLeft(a, b[4:0]); RShift: logicalShiftRight(a, b[4:0]); Sra : signedShiftRight(a, b[4:0]); endcase; return res; endfunction
Branch Resolution function Bool aluBr(Data a, Data b, BrType brComp); Bool brTaken = case(brComp) Eq : (a == b); Neq : (a != b); Le : signedLE(a, 0); Lt : signedLT(a, 0); Ge : signedGE(a, 0); Gt : signedGT(a, 0); endcase; return brTaken; endfunction
Branch Address Calculation function Addr brAddrCalc(Address pc, Data val, IType iType, Data imm); let targetAddr = case (iType) J, Jal : {pc[31:28], imm[27:0]}; Jr, Jalr : val; default : pc + imm; endcase; return targetAddr; endfunction
Single-Cycle SMIPS module mkProc(Proc); Reg#(Addr) pc <- mkRegU; RFile rf <- mkRFile; Memory mem <- mkTwoPortedMemory; let iMem = mem.iport ; let dMem = mem.dport; rule doProc; let inst <- iMem(MemReq{op: Ld, addr: pc, data: ?}); let dInst = decode(inst); Data rVal1 = rf.rd1(dInst.rSrc1); Data rVal2 = rf.rd2(dInst.rSrc2);
Single-Cycle SMIPS cont let eInst = exec(dInst, rVal1, rVal2, pc); if(memType(eInst.iType)) eInst.data <- dMem(MemReq{ op: eInst.iType==Ld ? Ld : St, addr: eInst.addr, data: eInst.data}); pc <= eInst.brTaken ? eInst.addr : pc + 4; if(regWriteType(eInst.iType)) rf.wr(eInst.rDst, eInst.data); endrule endmodule
SMIPS Princeton Architecture Register File Epoch PC Execute Decode fr +4 Memory
Two-Cycle SMIPS modulemkProc(Proc); Reg#(Addr) pc <- mkRegU; RFilerf <- mkRFile; Memory mem <- mkMemory; letuMem= mem.port; PipeReg#(FBundle) fr <- mkPipeReg; Reg#(Bit#(1)) stage <- mkReg(0); ruledoProc; if(stage==0 && fr.notFull) begin let inst <- uMem(MemReq{op: Ld, addr: pc, data: ?}); fr.enq(FBundle{pc: pc, inst: inst}); stage <= 1; end
Two-Cycle SMIPS if(stage==1 && fr.notEmpty) begin letfrpc = fr.first.pc; let inst = fr.first.inst; letdInst = decode(inst); Data rVal1 = rf.rd1(dInst.rSrc1); Data rVal2 = rf.rd2(dInst.rSrc2); leteInst = exec(dInst, rVal1, rVal2, frpc); if(memType(eInst.iType)) eInst.data <- uMem(MemReq{ op: eInst.iType==Ld ? Ld : St, addr: eInst.addr, data: eInst.data}); pc <= eInst.brTaken ? eInst.addr : pc + 4; if(regWriteType(eInst.iType)) rf.wr(eInst.rDst, eInst.data); fr.deq; stage <= 0; end endruleendmodule
Two-Cycle SMIPS descriptions are the same for Harvard & Princeton Register File stage PC Execute Decode fr +4 Data Memory Inst Memory
Pipelined SMIPS (Princeton) Register File Epoch PC Execute Decode fr +4 Memory
Pipelined SMIPS (Princeton) modulemkProc(Proc); Reg#(Addr) pc <- mkRegU; Reg#(Bool) epoch <- mkReg(True); RFilerf <- mkRFile; Memory mem <- mkOnePortedMemory; letuMem = mem.port; PipeReg#(FBundle) fr <- mkPipeReg; Wire#(Bool) memAcc <- mkDWire(False); ruledoProc; if(fr.notFull && !memAcc) begin let inst <- uMem(MemReq{op: Ld, addr: pc, data: ?}); fr.enq(FBundle{pc: pc, epoch: epoch, inst: inst}); end
Pipelined SMIPS (Princeton) AddrredirPC = ?; BoolredirPCvalid = False; if(fr.notEmpty) begin letfrpc = fr.first.pc; let inst = fr.first.inst; if(fr.first.epoch==epoch) begin letdInst = decode(inst); Data rVal1 = rf.rd1(dInst.rSrc1); Data rVal2 = rf.rd2(dInst.rSrc2); leteInst = exec(dInst, rVal1, rVal2, frpc); if(memType(eInst.iType)) begin eInst.data <- uMem(MemReq{ op: eInst.iType==Ld ? Ld : St, addr: eInst.addr, data: eInst.data}); memAcc = True; end
Pipelined SMIPS (Princeton) if(eInst.brTaken) begin redirPC = eInst.addr; redirPCvalid = True; end if(
Two-Stage SMIPS (Harvard) Register File Epoch PC Execute Decode fr +4 Data Memory Inst Memory http://csg.csail.mit.edu/SNU
Two-Stage SMIPS (Harvard) module mkProc(Proc); Reg#(Addr) pc <- mkRegU; Reg#(Bool) epoch <- mkRegU; RFile rf <- mkRFile; Memory mem <- mkTwoPortedMemory; let iMem = mem.iport; let dMem = mem.dport; PipeReg#(FBundle) fr <- mkPipeReg; rule doProc; if(fr.notFull) begin let inst <- iMem(MemReq{op: Ld, addr: pc, data: ?}); fr.enq(FBundle{pc: pc, epoch: epoch, inst: inst}); end http://csg.csail.mit.edu/SNU
Two-Stage SMIPS (Harvard) Addr redirPc = ?; Bool redirPCvalid = False; if(fr.notEmpty) begin let frpc = fr.first.pc; let inst = fr.first.inst; if(fr.first.epoch==epoch) begin let dInst = decode(inst); Data rVal1 = rf.rd1(dInst.rSrc1); Data rVal2 = rf.rd2(dInst.rSrc2); let eInst = exec(dInst, rVal1, rVal2, frpc); if(memType(eInst.iType)) eInst.data <- dMem(MemReq{ op: eInst.iType==Ld ? Ld : St, addr: eInst.addr, data: eInst.data}); http://csg.csail.mit.edu/SNU
Two-Stage SMIPS (Harvard) if(eInst.brTaken) begin redirPC = eInst.addr; redirPCvalid = True; end if(regWriteType(eInst.iType)) rf.wr(eInst.rDst, eInst.data); end fr.deq; end pc <= redirPCvalid ? redirPC : pc + 4; epoch <= redirPCvalid ? !epoch : epoch; endrule endmodule http://csg.csail.mit.edu/SNU