diff --git a/generators/chipyard/src/main/resources/csrc/spiketile.cc b/generators/chipyard/src/main/resources/csrc/spiketile.cc index dd02d55e21..b0474ab696 100644 --- a/generators/chipyard/src/main/resources/csrc/spiketile.cc +++ b/generators/chipyard/src/main/resources/csrc/spiketile.cc @@ -10,6 +10,18 @@ #include #include +/* Includes for rocc support */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + #if __has_include("spiketile_tsi.h") #define SPIKETILE_HTIF_TSI extern std::map tsis; @@ -89,6 +101,13 @@ class chipyard_simif_t : public simif_t void tcm_a(uint64_t address, uint64_t data, uint32_t mask, uint32_t opcode, uint32_t size); bool tcm_d(uint64_t *data); + bool rocc_handshake(rocc_insn_t *insn, reg_t* rs1, reg_t* rs2); + void push_rocc_insn(rocc_insn_t insn, reg_t rs1, reg_t rs2); + void push_rocc_result(long long int result); + long long int get_rocc_result(); + void set_rocc_exists(bool exists); + bool get_rocc_exists(); + void loadmem(size_t base, const char* fname); void drain_stq(); @@ -155,6 +174,12 @@ class chipyard_simif_t : public simif_t std::vector wb_q; std::vector st_q; + std::vector rocc_insn_q; + std::vector rocc_result_q; + std::vector rocc_rs1_q; + std::vector rocc_rs2_q; + bool rocc_exists; + std::map, uint64_t> readonly_cache; bool mmio_valid; @@ -181,6 +206,31 @@ class tile_t { context_t stq_context; }; +/* Begin RoCC header file */ +class generic_t : public extension_t +{ + public: + generic_t(chipyard_simif_t* s) { + simif = s; + } + + const char* name() { return "generic" ; } + + reg_t custom0(rocc_insn_t insn, reg_t xs1, reg_t xs2); + reg_t custom1(rocc_insn_t insn, reg_t xs1, reg_t xs2); + reg_t custom2(rocc_insn_t insn, reg_t xs1, reg_t xs2); + reg_t custom3(rocc_insn_t insn, reg_t xs1, reg_t xs2); + + virtual std::vector get_instructions(); + virtual std::vector get_disasms(); + + void reset() {}; + + protected: + chipyard_simif_t* simif; +}; +/* End RoCC header file */ + context_t *host; std::map tiles; std::ostream sout(nullptr); @@ -204,6 +254,7 @@ extern "C" void spike_tile(int hartid, char* isa, long long int ipc, long long int cycle, long long int* insns_retired, + unsigned char has_rocc, char debug, char mtip, char msip, char meip, @@ -285,7 +336,16 @@ extern "C" void spike_tile(int hartid, char* isa, unsigned char* tcm_d_valid, unsigned char tcm_d_ready, - long long int* tcm_d_data + long long int* tcm_d_data, + + unsigned char rocc_request_ready, + unsigned char* rocc_request_valid, + int* rocc_request_insn, + int* rocc_request_rs1, + int* rocc_request_rs2, + unsigned char rocc_response_valid, + long long int rocc_response_rd, + long long int rocc_response_result ) { if (!host) { @@ -312,6 +372,11 @@ extern "C" void spike_tile(int hartid, char* isa, sout); simif->harts[hartid] = p; + std::function extension; + generic_t* my_generic_extension = new generic_t(simif); + p->register_extension(my_generic_extension); + simif->set_rocc_exists(has_rocc); + s_vpi_vlog_info vinfo; if (!vpi_get_vlog_info(&vinfo)) abort(); @@ -425,8 +490,83 @@ extern "C" void spike_tile(int hartid, char* isa, if (tcm_d_ready) { *tcm_d_valid = simif->tcm_d((uint64_t*)tcm_d_data); } + + *rocc_request_valid = 0; + if (rocc_request_ready) { + *rocc_request_valid = simif->rocc_handshake((rocc_insn_t*) rocc_request_insn, (reg_t*) rocc_request_rs1, (reg_t*) rocc_request_rs2); + } + + if (rocc_response_valid) { + simif->push_rocc_result(rocc_response_result); + } +} + +/* Begin RoCC Section */ +reg_t generic_t::custom0(rocc_insn_t insn, reg_t xs1, reg_t xs2) { + bool has_rocc = simif->get_rocc_exists(); + if (!has_rocc) { + printf("Accelerator not instantiated, are you using the right config?\n"); + exit(1); + } else { + simif->push_rocc_insn(insn, xs1, xs2); + return simif->get_rocc_result(); + } +} + +reg_t generic_t::custom1(rocc_insn_t insn, reg_t xs1, reg_t xs2) { + bool has_rocc = simif->get_rocc_exists(); + if (!has_rocc) { + printf("Accelerator not instantiated, are you using the right config?\n"); + exit(1); + } else { + simif->push_rocc_insn(insn, xs1, xs2); + return simif->get_rocc_result(); + } +} + +reg_t generic_t::custom2(rocc_insn_t insn, reg_t xs1, reg_t xs2) { + bool has_rocc = simif->get_rocc_exists(); + if (!has_rocc) { + printf("Accelerator not instantiated, are you using the right config?\n"); + exit(1); + } else { + simif->push_rocc_insn(insn, xs1, xs2); + return simif->get_rocc_result(); + } +} + +reg_t generic_t::custom3(rocc_insn_t insn, reg_t xs1, reg_t xs2) { + bool has_rocc = simif->get_rocc_exists(); + if (!has_rocc) { + printf("Accelerator not instantiated, are you using the right config?\n"); + exit(1); + } else { + simif->push_rocc_insn(insn, xs1, xs2); + return simif->get_rocc_result(); + } +} + +define_custom_func(generic_t, "generic", generic_custom0, custom0); +define_custom_func(generic_t, "generic", generic_custom1, custom1); +define_custom_func(generic_t, "generic", generic_custom2, custom2); +define_custom_func(generic_t, "generic", generic_custom3, custom3); + +std::vector generic_t::get_instructions() +{ + std::vector insns; + push_custom_insn(insns, ROCC_OPCODE0, ROCC_OPCODE_MASK, ILLEGAL_INSN_FUNC, generic_custom0); + push_custom_insn(insns, ROCC_OPCODE1, ROCC_OPCODE_MASK, ILLEGAL_INSN_FUNC, generic_custom1); + push_custom_insn(insns, ROCC_OPCODE2, ROCC_OPCODE_MASK, ILLEGAL_INSN_FUNC, generic_custom2); + push_custom_insn(insns, ROCC_OPCODE3, ROCC_OPCODE_MASK, ILLEGAL_INSN_FUNC, generic_custom3); + return insns; } +std::vector generic_t::get_disasms() +{ + std::vector insns; + return insns; +} +/*End RoCC Section*/ chipyard_simif_t::chipyard_simif_t(size_t icache_ways, size_t icache_sets, @@ -1055,6 +1195,49 @@ bool chipyard_simif_t::tcm_d(uint64_t* data) { return true; } +bool chipyard_simif_t::rocc_handshake(rocc_insn_t* insn, reg_t* rs1, reg_t* rs2) { + if (rocc_insn_q.empty()) { + return false; + } + *insn = rocc_insn_q[0]; + *rs1 = rocc_rs1_q[0]; + *rs2 = rocc_rs2_q[0]; + + rocc_insn_q.erase(rocc_insn_q.begin()); + rocc_rs1_q.erase(rocc_rs1_q.begin()); + rocc_rs2_q.erase(rocc_rs2_q.begin()); + return true; +} + +void chipyard_simif_t::push_rocc_insn(rocc_insn_t insn, reg_t rs1, reg_t rs2) { + rocc_insn_q.push_back(insn); + rocc_rs1_q.push_back(rs1); + rocc_rs2_q.push_back(rs2); + + host->switch_to(); +} + +void chipyard_simif_t::push_rocc_result(long long int result) { + rocc_result_q.push_back(result); +} + +long long int chipyard_simif_t::get_rocc_result() { + while (rocc_result_q.size() == 0) { + host->switch_to(); + } + long long int result = rocc_result_q.front(); + rocc_result_q.erase(rocc_result_q.begin()); + return result; +} + +void chipyard_simif_t::set_rocc_exists(bool exists) { + rocc_exists = exists; +} + +bool chipyard_simif_t::get_rocc_exists() { + return rocc_exists; +} + void chipyard_simif_t::loadmem(size_t base, const char* fname) { class loadmem_memif_t : public memif_t { public: diff --git a/generators/chipyard/src/main/resources/vsrc/spiketile.v b/generators/chipyard/src/main/resources/vsrc/spiketile.v index 8921f68fb4..869ff54031 100644 --- a/generators/chipyard/src/main/resources/vsrc/spiketile.v +++ b/generators/chipyard/src/main/resources/vsrc/spiketile.v @@ -19,6 +19,7 @@ import "DPI-C" function void spike_tile(input int hartid, input longint ipc, input longint cycle, output longint insns_retired, + input bit has_rocc, input bit debug, input bit mtip, @@ -102,7 +103,16 @@ import "DPI-C" function void spike_tile(input int hartid, output bit tcm_d_valid, input bit tcm_d_ready, - output longint tcm_d_data + output longint tcm_d_data, + + input bit rocc_request_ready, + output bit rocc_request_valid, + output longint rocc_request_insn, + output longint rocc_request_rs1, + output longint rocc_request_rs2, + input bit rocc_response_valid, + input longint rocc_response_rd, + input longint rocc_response_result ); @@ -128,6 +138,7 @@ module SpikeBlackBox #( input [63:0] ipc, input [63:0] cycle, output [63:0] insns_retired, + input has_rocc, input debug, input mtip, @@ -211,12 +222,25 @@ module SpikeBlackBox #( output tcm_d_valid, input tcm_d_ready, - output [63:0] tcm_d_data + output [63:0] tcm_d_data, + + + input rocc_request_ready, + output rocc_request_valid, + output [63:0] rocc_request_insn, + output [63:0] rocc_request_rs1, + output [63:0] rocc_request_rs2, + + input rocc_response_valid, + input [63:0] rocc_response_rd, + input [63:0] rocc_response_result ); longint __insns_retired; reg [63:0] __insns_retired_reg; + wire __has_rocc; + wire __icache_a_ready; bit __icache_a_valid; longint __icache_a_address; @@ -290,7 +314,19 @@ module SpikeBlackBox #( reg __tcm_d_valid_reg; reg [63:0] __tcm_d_data_reg; + wire __rocc_request_ready; + bit __rocc_request_valid; + longint __rocc_request_insn; + longint __rocc_request_rs1; + longint __rocc_request_rs2; + reg __rocc_request_valid_reg; + reg [63:0] __rocc_request_insn_reg; + reg [63:0] __rocc_request_rs1_reg; + reg [63:0] __rocc_request_rs2_reg; + wire __rocc_response_valid; + longint __rocc_response_rd; + longint __rocc_response_result; always @(posedge clock) begin if (reset) begin @@ -359,6 +395,7 @@ module SpikeBlackBox #( __tcm_d_valid_reg <= 1'b0; __tcm_d_data = 64'h0; __tcm_d_data_reg <= 64'h0; + spike_tile_reset(HARTID); end else begin spike_tile(HARTID, ISA, PMPREGIONS, @@ -367,6 +404,7 @@ module SpikeBlackBox #( ICACHE_SOURCEIDS, DCACHE_SOURCEIDS, TCM_BASE, TCM_SIZE, reset_vector, ipc, cycle, __insns_retired, + __has_rocc, debug, mtip, msip, meip, seip, __icache_a_ready, __icache_a_valid, __icache_a_address, __icache_a_sourceid, @@ -391,7 +429,10 @@ module SpikeBlackBox #( mmio_d_valid, mmio_d_data, tcm_a_valid, tcm_a_address, tcm_a_data, tcm_a_mask, tcm_a_opcode, tcm_a_size, - __tcm_d_valid, __tcm_d_ready, __tcm_d_data + __tcm_d_valid, __tcm_d_ready, __tcm_d_data, + + __rocc_request_ready, __rocc_request_valid, __rocc_request_insn, __rocc_request_rs1, __rocc_request_rs2, + __rocc_response_valid, rocc_response_rd, __rocc_response_result ); __insns_retired_reg <= __insns_retired; @@ -430,6 +471,11 @@ module SpikeBlackBox #( __tcm_d_valid_reg <= __tcm_d_valid; __tcm_d_data_reg <= __tcm_d_data; + __rocc_request_valid_reg <= __rocc_request_valid; + __rocc_request_insn_reg <= __rocc_request_insn; + __rocc_request_rs1_reg <= __rocc_request_rs1; + __rocc_request_rs2_reg <= __rocc_request_rs2; + end end // always @ (posedge clock) assign insns_retired = __insns_retired_reg; @@ -473,4 +519,14 @@ module SpikeBlackBox #( assign tcm_d_data = __tcm_d_data_reg; assign __tcm_d_ready = tcm_d_ready; + assign __has_rocc = has_rocc; + assign rocc_request_valid = __rocc_request_valid_reg; + assign rocc_request_insn = __rocc_request_insn_reg; + assign rocc_request_rs1 = __rocc_request_rs1_reg; + assign rocc_request_rs2 = __rocc_request_rs2_reg; + assign __rocc_request_ready = rocc_request_ready; + assign __rocc_response_valid = rocc_response_valid; + assign __rocc_response_rd = rocc_response_rd; + assign __rocc_response_result = rocc_response_result; + endmodule; diff --git a/generators/chipyard/src/main/scala/SpikeTile.scala b/generators/chipyard/src/main/scala/SpikeTile.scala index 97866c50f2..19f4a70505 100644 --- a/generators/chipyard/src/main/scala/SpikeTile.scala +++ b/generators/chipyard/src/main/scala/SpikeTile.scala @@ -65,7 +65,7 @@ case class SpikeCoreParams() extends CoreParams { val useConditionalZero = false override def vLen = 128 - override def vMemDataBits = 128 + override def vMemDataBits = 64 //128 } case class SpikeTileAttachParams( @@ -176,6 +176,9 @@ class SpikeTile( tlMasterXbar.node := TLWidthWidget(8) := TLBuffer() := mmioNode override lazy val module = new SpikeTileModuleImp(this) + val rocc_sequence = p(BuildRoCC).map(_(p)) + val has_rocc = rocc_sequence.nonEmpty + val rocc_module = if (has_rocc) rocc_sequence.head else null } class SpikeBlackBox( @@ -193,7 +196,8 @@ class SpikeBlackBox( executable_regions: String, tcm_base: BigInt, tcm_size: BigInt, - use_dtm: Boolean) extends BlackBox(Map( + use_dtm: Boolean, + ) extends BlackBox(Map( "HARTID" -> IntParam(hartId), "ISA" -> StringParam(isa), "PMPREGIONS" -> IntParam(pmpregions), @@ -218,6 +222,7 @@ class SpikeBlackBox( val ipc = Input(UInt(64.W)) val cycle = Input(UInt(64.W)) val insns_retired = Output(UInt(64.W)) + val has_rocc = Input(Bool()) val debug = Input(Bool()) val mtip = Input(Bool()) @@ -303,6 +308,21 @@ class SpikeBlackBox( val data = Output(UInt(64.W)) } } + + val rocc = new Bundle { + val request = new Bundle { + val ready = Input(Bool()) + val valid = Output(Bool()) + val insn = Output(UInt(64.W)) + val rs1 = Output(UInt(64.W)) + val rs2 = Output(UInt(64.W)) + } + val response = new Bundle { + val valid = Input(Bool()) + val rd = Input(UInt(64.W)) + val result = Input(UInt(64.W)) + } + } }) addResource("/vsrc/spiketile.v") addResource("/csrc/spiketile.cc") @@ -347,6 +367,7 @@ class SpikeTileModuleImp(outer: SpikeTile) extends BaseTileModuleImp(outer) { outer.spikeTileParams.tcmParams.map(_.size).getOrElse(0), useDTM )) + spike.io.has_rocc := outer.has_rocc.asBool spike.io.clock := clock.asBool val cycle = RegInit(0.U(64.W)) cycle := cycle + 1.U @@ -466,6 +487,109 @@ class SpikeTileModuleImp(outer: SpikeTile) extends BaseTileModuleImp(outer) { tcm_tl.d.valid := spike.io.tcm.d.valid tcm_tl.d.bits.data := spike.io.tcm.d.data } + + /* Begin RoCC Section */ + if (outer.has_rocc) { + val to_rocc_enq_bits = IO(new Bundle{ + val rs2 = UInt(64.W) + val rs1 = UInt(64.W) + val insn = UInt(64.W) + }) + + val to_rocc_q = Module(new Queue(UInt(192.W), 1, flow=true, pipe=true)) + spike.io.rocc.request.ready := to_rocc_q.io.enq.ready && to_rocc_q.io.count === 0.U + to_rocc_q.io.enq.valid := spike.io.rocc.request.valid + to_rocc_enq_bits.insn := spike.io.rocc.request.insn + to_rocc_enq_bits.rs1 := spike.io.rocc.request.rs1 + to_rocc_enq_bits.rs2 := spike.io.rocc.request.rs2 + to_rocc_q.io.enq.bits := to_rocc_enq_bits.asUInt + + outer.rocc_module.module.io.cmd.valid := to_rocc_q.io.deq.valid + to_rocc_q.io.deq.ready := outer.rocc_module.module.io.cmd.ready + + val inst = Wire(new RoCCInstruction()) + inst.funct := to_rocc_q.io.deq.bits(31,25) + inst.rs2 := to_rocc_q.io.deq.bits(24,20) + inst.rs1 := to_rocc_q.io.deq.bits(19,15) + inst.xd := to_rocc_q.io.deq.bits(14) + inst.xs1 := to_rocc_q.io.deq.bits(13) + inst.xs2 := to_rocc_q.io.deq.bits(12) + inst.rd := to_rocc_q.io.deq.bits(11,7) + inst.opcode := to_rocc_q.io.deq.bits(6,0) + + val cmd = Wire(new RoCCCommand()) + cmd.inst := inst + cmd.rs1 := to_rocc_q.io.deq.bits(127,64) + cmd.rs2 := to_rocc_q.io.deq.bits(191,128) + cmd.status := DontCare + outer.rocc_module.module.io.cmd.bits := cmd + dontTouch(outer.rocc_module.module.io) + + //Instantiate unused signals, will probably be used as interface develops further. + outer.rocc_module.module.io.mem.req.ready := false.B + outer.rocc_module.module.io.mem.s2_nack := false.B + outer.rocc_module.module.io.mem.s2_uncached := false.B + outer.rocc_module.module.io.mem.s2_paddr := 0.U + outer.rocc_module.module.io.mem.resp.valid := false.B + outer.rocc_module.module.io.mem.resp.bits := DontCare + outer.rocc_module.module.io.mem.replay_next := false.B + outer.rocc_module.module.io.mem.s2_xcpt.ma.ld := false.B + outer.rocc_module.module.io.mem.s2_xcpt.ma.st := false.B + outer.rocc_module.module.io.mem.s2_xcpt.pf.ld := false.B + outer.rocc_module.module.io.mem.s2_xcpt.pf.st := false.B + outer.rocc_module.module.io.mem.s2_xcpt.ae.ld := false.B + outer.rocc_module.module.io.mem.s2_xcpt.ae.st := false.B + outer.rocc_module.module.io.mem.s2_xcpt.gf.ld := false.B + outer.rocc_module.module.io.mem.s2_xcpt.gf.st := false.B + outer.rocc_module.module.io.mem.s2_gpa := 0.U + outer.rocc_module.module.io.mem.ordered := false.B + outer.rocc_module.module.io.mem.perf.acquire := false.B + outer.rocc_module.module.io.mem.perf.release := false.B + outer.rocc_module.module.io.mem.perf.grant := false.B + outer.rocc_module.module.io.exception := false.B + outer.rocc_module.module.io.mem.clock_enabled := true.B + outer.rocc_module.module.io.mem.perf.storeBufferEmptyAfterStore := false.B + outer.rocc_module.module.io.mem.perf.storeBufferEmptyAfterLoad := false.B + outer.rocc_module.module.io.mem.perf.canAcceptLoadThenLoad := false.B + outer.rocc_module.module.io.mem.perf.canAcceptStoreThenLoad := false.B + outer.rocc_module.module.io.mem.perf.canAcceptStoreThenRMW := false.B + outer.rocc_module.module.io.mem.s2_nack_cause_raw := 0.U + outer.rocc_module.module.io.mem.s2_gpa_is_pte := false.B + outer.rocc_module.module.io.mem.perf.tlbMiss := false.B + outer.rocc_module.module.io.mem.perf.blocked := false.B + + outer.rocc_module.module.io.fpu_req.ready := false.B + outer.rocc_module.module.io.fpu_resp.valid := false.B + outer.rocc_module.module.io.fpu_resp.bits := DontCare + + val from_rocc_enq_bits = IO(new Bundle { + val rd = UInt(64.W) + val resp = UInt(64.W) + }) + + val from_rocc_q = Module(new Queue(UInt(128.W), 1, flow=true, pipe=true)) //rd and result stitched together + outer.rocc_module.module.io.resp.ready := from_rocc_q.io.enq.ready && from_rocc_q.io.count === 0.U + from_rocc_q.io.enq.valid := outer.rocc_module.module.io.resp.valid + + from_rocc_enq_bits.rd := outer.rocc_module.module.io.resp.bits.rd + from_rocc_enq_bits.resp := outer.rocc_module.module.io.resp.bits.data + from_rocc_q.io.enq.bits := from_rocc_enq_bits.asUInt + spike.io.rocc.response.valid := false.B + from_rocc_q.io.deq.ready := true.B + spike.io.rocc.response.rd := from_rocc_q.io.deq.bits(127,64) + spike.io.rocc.response.result := 0.U + + when (from_rocc_q.io.deq.fire) { + spike.io.rocc.response.valid := true.B + spike.io.rocc.response.result := from_rocc_q.io.deq.bits(63,0) + } + } else { + spike.io.rocc.request.ready := false.B + spike.io.rocc.response.valid := false.B + spike.io.rocc.response.result := 0.U + spike.io.rocc.response.rd := 0.U + } + /* End RoCC Section */ } class WithNSpikeCores(n: Int = 1, tileParams: SpikeTileParams = SpikeTileParams() @@ -497,3 +621,15 @@ class WithSpikeTCM extends Config((site, here, up) => { case ExtMem => None case SubsystemBankedCoherenceKey => up(SubsystemBankedCoherenceKey).copy(nBanks = 0) }) + +/** + * Config fragment to enable different RoCCs, work in progress + */ +class WithAccumRoCC extends Config((site, here, up) => { + case BuildRoCC => List( + (p: Parameters) => { + val accumulator = LazyModule(new AccumulatorExample(OpcodeSet.custom0, n = 4)(p)) + accumulator + } + ) +})