Skip to content

Commit 068dd7a

Browse files
committed
Cleanup DCA implementation
1 parent 3d4586b commit 068dd7a

File tree

17 files changed

+761
-789
lines changed

17 files changed

+761
-789
lines changed

Bender.yml

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,9 @@ export_include_dirs:
3333
- hw/reqrsp_interface/include
3434
- hw/mem_interface/include
3535
- hw/tcdm_interface/include
36+
- hw/dca_interface/include
3637
- hw/snitch/include
38+
- hw/snitch_fp_ss/include
3739
- hw/snitch_ssr/include
3840
- hw/generated
3941

@@ -91,6 +93,10 @@ sources:
9193
- hw/tcdm_interface/test/reqrsp_to_tcdm_tb.sv
9294
- hw/tcdm_interface/test/tcdm_mux_tb.sv
9395

96+
# dca_interface
97+
- files:
98+
- hw/dca_interface/src/dca_fork.sv
99+
94100
# snitch
95101
- files:
96102
# Level 0
@@ -152,20 +158,25 @@ sources:
152158
- hw/snitch_ssr/test/tb_simple_ssr.sv
153159
- hw/snitch_ssr/test/tb_simple_ssr_streamer.sv
154160

161+
# snitch_fp_ss
162+
- files:
163+
# Level 0
164+
- hw/snitch_fp_ss/src/snitch_fpu.sv
165+
- hw/snitch_fp_ss/src/snitch_sequencer.sv
166+
# Level 1
167+
- hw/snitch_fp_ss/src/snitch_fp_ss.sv
168+
155169
# snitch_cluster
156170
- files:
157171
# Level 0
158172
- hw/snitch_cluster/src/snitch_amo_shim.sv
159173
- hw/snitch_cluster/src/snitch_cluster_peripheral/snitch_cluster_peripheral_reg_pkg.sv
160174
- hw/snitch_cluster/src/snitch_cluster_peripheral/snitch_cluster_peripheral_reg.sv
161175
- hw/snitch_cluster/src/snitch_cluster_peripheral/snitch_cluster_peripheral.sv
162-
- hw/snitch_cluster/src/snitch_fpu.sv
163-
- hw/snitch_cluster/src/snitch_sequencer.sv
164176
- hw/snitch_cluster/src/snitch_tcdm_fc_interconnect.sv
165177
- hw/bootrom/snitch_bootrom.sv
166178
# Level 1
167179
- hw/snitch_cluster/src/snitch_barrier.sv
168-
- hw/snitch_cluster/src/snitch_fp_ss.sv
169180
- hw/snitch_cluster/src/snitch_shared_muldiv.sv
170181
- hw/snitch_cluster/src/snitch_tcdm_interconnect.sv
171182
# Level 2
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// Copyright 2025 ETH Zurich and University of Bologna.
2+
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
3+
// SPDX-License-Identifier: SHL-0.51
4+
5+
// Author: Luca Colagrande <[email protected]>
6+
7+
`ifndef DCA_INTERFACE_ASSIGN_SVH_
8+
`define DCA_INTERFACE_ASSIGN_SVH_
9+
10+
`define DCA_REQRSP_ASSIGN_REQ(__opt_as, dst, src) \
11+
__opt_as dst.q_valid = src.q_valid; \
12+
__opt_as dst.p_ready = src.p_ready; \
13+
__opt_as dst.q.operands = src.q.operands; \
14+
__opt_as dst.q.rnd_mode = src.q.rnd_mode; \
15+
__opt_as dst.q.op = src.q.op; \
16+
__opt_as dst.q.op_mod = src.q.op_mod; \
17+
__opt_as dst.q.src_fmt = src.q.src_fmt; \
18+
__opt_as dst.q.dst_fmt = src.q.dst_fmt; \
19+
__opt_as dst.q.int_fmt = src.q.int_fmt; \
20+
__opt_as dst.q.vectorial_op = src.q.vectorial_op;
21+
`define DCA_REQRSP_ASSIGN_RSP(__opt_as, dst, src) \
22+
__opt_as dst.p_valid = src.p_valid; \
23+
__opt_as dst.q_ready = src.q_ready; \
24+
__opt_as dst.p.status = src.p.status; \
25+
__opt_as dst.p.result = src.p.result;
26+
27+
`endif // DCA_INTERFACE_ASSIGN_SVH_
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
`ifndef DCA_INTERFACE_TYPEDEF_SVH_
2+
`define DCA_INTERFACE_TYPEDEF_SVH_
3+
4+
`include "reqrsp_interface/typedef.svh"
5+
6+
`define DCA_REQ_CHAN_STRUCT(__data_width) \
7+
struct packed { \
8+
logic [2:0][__data_width-1:0] operands; \
9+
fpnew_pkg::roundmode_e rnd_mode; \
10+
fpnew_pkg::operation_e op; \
11+
logic op_mod; \
12+
fpnew_pkg::fp_format_e src_fmt; \
13+
fpnew_pkg::fp_format_e dst_fmt; \
14+
fpnew_pkg::int_format_e int_fmt; \
15+
logic vectorial_op; \
16+
}
17+
18+
`define DCA_RSP_CHAN_STRUCT(__data_width) \
19+
struct packed { \
20+
fpnew_pkg::status_t status; \
21+
logic [__data_width-1:0] result; \
22+
}
23+
24+
`define DCA_REQ_STRUCT(__data_width) \
25+
`GENERIC_REQRSP_REQ_STRUCT(`DCA_REQ_CHAN_STRUCT(__data_width))
26+
27+
`define DCA_RSP_STRUCT(__data_width) \
28+
`GENERIC_REQRSP_RSP_STRUCT(`DCA_RSP_CHAN_STRUCT(__data_width))
29+
30+
`define DCA_TYPEDEF_REQ_CHAN_T(__name, __data_width) \
31+
typedef `DCA_REQ_CHAN_STRUCT(__data_width) __name``_req_chan_t;
32+
33+
`define DCA_TYPEDEF_RSP_CHAN_T(__name, __data_width) \
34+
typedef `DCA_RSP_CHAN_STRUCT(__data_width) __name``_rsp_chan_t;
35+
36+
`define DCA_TYPEDEF_ALL(__name, __data_width) \
37+
`DCA_TYPEDEF_REQ_CHAN_T(__name, __data_width) \
38+
`DCA_TYPEDEF_RSP_CHAN_T(__name, __data_width) \
39+
`GENERIC_REQRSP_TYPEDEF_ALL(__name, __name``_req_chan_t, __name``_rsp_chan_t)
40+
41+
`endif // DCA_INTERFACE_TYPEDEF_SVH_

hw/dca_interface/src/dca_fork.sv

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
// Copyright 2025 ETH Zurich and University of Bologna.
2+
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
3+
// SPDX-License-Identifier: SHL-0.51
4+
5+
// Author: Luca Colagrande <[email protected]>
6+
7+
`include "dca_interface/typedef.svh"
8+
9+
// Forks a wide Direct Compute Access (DCA) request to multiple lanes,
10+
// operating in SIMD fashion.
11+
module dca_fork #(
12+
parameter int unsigned LaneDataWidth = 64,
13+
parameter int unsigned NumLanes = 8,
14+
// Derived parameters
15+
localparam int unsigned DataWidth = LaneDataWidth * NumLanes,
16+
localparam type dca_req_t = `DCA_REQ_STRUCT(DataWidth),
17+
localparam type dca_rsp_t = `DCA_RSP_STRUCT(DataWidth),
18+
localparam type dca_lane_req_t = `DCA_REQ_STRUCT(LaneDataWidth),
19+
localparam type dca_lane_rsp_t = `DCA_RSP_STRUCT(LaneDataWidth)
20+
) (
21+
input logic clk_i,
22+
input logic rst_ni,
23+
input dca_req_t slv_req_i,
24+
output dca_rsp_t slv_rsp_o,
25+
output dca_lane_req_t [NumLanes-1:0] mst_req_o,
26+
input dca_lane_rsp_t [NumLanes-1:0] mst_rsp_i
27+
);
28+
29+
logic [NumLanes-1:0] flat_q_valids;
30+
logic [NumLanes-1:0] flat_q_readies;
31+
logic [NumLanes-1:0] flat_p_valids;
32+
logic [NumLanes-1:0] flat_p_readies;
33+
34+
// Fork the DCA request to all lanes
35+
stream_fork #(
36+
.N_OUP(NumLanes)
37+
) i_dca_fork_fpu (
38+
.clk_i (clk_i),
39+
.rst_ni (rst_ni),
40+
.valid_i(slv_req_i.q_valid),
41+
.ready_o(slv_rsp_o.q_ready),
42+
.valid_o(flat_q_valids),
43+
.ready_i(flat_q_readies)
44+
);
45+
46+
// Join the DCA responses from all lanes
47+
stream_join #(
48+
.N_INP(NumLanes)
49+
) i_dca_join_fpu (
50+
.inp_valid_i(flat_p_valids),
51+
.inp_ready_o(flat_p_readies),
52+
.oup_valid_o(slv_resp_o.p_valid),
53+
.oup_ready_i(slv_req_i.p_ready)
54+
);
55+
56+
for (genvar i = 0; i < NumLanes; i++) begin : gen_lane
57+
// The same operation flags are sent to all lanes
58+
assign mst_req_o[i].q.rnd_mode = slv_req_i.q.rnd_mode;
59+
assign mst_req_o[i].q.op = slv_req_i.q.op;
60+
assign mst_req_o[i].q.op_mod = slv_req_i.q.op_mod;
61+
assign mst_req_o[i].q.src_fmt = slv_req_i.q.src_fmt;
62+
assign mst_req_o[i].q.dst_fmt = slv_req_i.q.dst_fmt;
63+
assign mst_req_o[i].q.int_fmt = slv_req_i.q.int_fmt;
64+
assign mst_req_o[i].q.vectorial_op = slv_req_i.q.vectorial_op;
65+
// Data is split across lanes, to perform SIMD operation (both operands and result)
66+
assign mst_req_o[i].q.operands[2][DataWidth-1:0] = slv_req_i.q.operands[2][DataWidth*i+:DataWidth];
67+
assign mst_req_o[i].q.operands[1][DataWidth-1:0] = slv_req_i.q.operands[1][DataWidth*i+:DataWidth];
68+
assign mst_req_o[i].q.operands[0][DataWidth-1:0] = slv_req_i.q.operands[0][DataWidth*i+:DataWidth];
69+
assign slv_rsp_o.p.result[DataWidth*i+:DataWidth] = mst_rsp_i[i].p.result[DataWidth-1:0];
70+
// Connect the handshake signals
71+
assign mst_req_o[i].q_valid = flat_q_valids[i];
72+
assign mst_req_o[i].p_ready = flat_p_readies[i];
73+
assign flat_q_readies[i] = mst_rsp_i[i].q_ready;
74+
assign flat_p_valids[i] = mst_rsp_i[i].p_valid;
75+
end
76+
77+
// OR-reduce the status bits from all lanes
78+
// TODO(colluca): double-check that this is actually a bitwise OR
79+
always_comb begin
80+
slv_rsp_o.p.dca_status = '0;
81+
for (int i = 0; i < (NumLanes-1); i++) begin
82+
slv_rsp_o.p.dca_status |= mst_rsp_i[i].p.dca_status;
83+
end
84+
end
85+
86+
endmodule

hw/snitch/src/snitch_pkg.sv

Lines changed: 29 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -358,45 +358,40 @@ package snitch_pkg;
358358

359359
typedef struct packed {
360360
longint source;
361-
longint dca_in_hs; // Handshake to indicate DCA Data in
362-
longint dca_out_hs; // Handshake to indicate DCA Data out
363-
longint dca_in_op_code; // OPS-Code of the FPU (@FPNEW Doku)
364-
longint dca_in_op_mode; // OP-Mode of the FPU (@FPNEW Doku)
365-
longint dca_in_rnd_mode; // Round-Mode of the FPU (@FPNEW Doku)
366-
longint dca_in_vector_mode; // Vector-Mode of the FPU (@FPNEW Doku)
367-
longint dca_in_op_0; // First Operand of the FPU
368-
longint dca_in_op_1; // Second Operand of the FPU
369-
longint dca_in_op_2; // Third Operand of the FPU
370-
longint dca_in_src_fmt; // Input SRC format (@FPNEW Doku)
371-
longint dca_in_dst_fmt; // Output SRC format (@FPNEW Doku)
372-
longint dca_in_int_fmt; // Intermidiate format (@FPNEW Doku)
373-
longint dca_in_tag; // Unique input Tag
374-
longint dca_out_tag; // Unique output Tag
375-
longint dca_out_status; // Status of the FPU (@FPNEW Doku)
376-
longint dca_out_result; // Result of the FPU
361+
longint req_hs;
362+
longint rsp_hs;
363+
longint operand0;
364+
longint operand1;
365+
longint operand2;
366+
longint rnd_mode;
367+
longint op;
368+
longint op_mod;
369+
longint src_fmt;
370+
longint dst_fmt;
371+
longint int_fmt;
372+
longint vectorial_op;
373+
longint tag;
374+
longint status;
375+
longint result;
377376
} dca_trace_port_t;
378-
// All Dokumentation with (@FPNEW Doku) can be found here:
379-
// https://github.com/openhwgroup/cvfpu/tree/master/docs
380377

381378
function automatic string print_dca_trace(dca_trace_port_t dca_trace);
382379
string extras_str = "{";
383380
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "source", dca_trace.source);
384-
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "dca_in_hs", dca_trace.dca_in_hs);
385-
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "dca_out_hs", dca_trace.dca_out_hs);
386-
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "op_code", dca_trace.dca_in_op_code);
387-
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "op_mode", dca_trace.dca_in_op_mode);
388-
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "rnd_mode", dca_trace.dca_in_rnd_mode);
389-
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "vector_mode", dca_trace.dca_in_vector_mode);
390-
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "op_0", dca_trace.dca_in_op_0);
391-
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "op_1", dca_trace.dca_in_op_1);
392-
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "op_2", dca_trace.dca_in_op_2);
393-
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "src_format", dca_trace.dca_in_src_fmt);
394-
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "dst_format", dca_trace.dca_in_dst_fmt);
395-
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "int_format", dca_trace.dca_in_int_fmt);
396-
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "in_tag", dca_trace.dca_in_tag);
397-
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "status", dca_trace.dca_out_status);
398-
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "result", dca_trace.dca_out_result);
399-
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "out_tag", dca_trace.dca_out_tag);
381+
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "dca_req_hs", dca_trace.req_hs);
382+
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "dca_rsp_hs", dca_trace.rsp_hs);
383+
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "dca_op_code", dca_trace.op);
384+
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "dca_op_mode", dca_trace.op_mod);
385+
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "dca_rnd_mode", dca_trace.rnd_mode);
386+
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "dca_vector_mode", dca_trace.vectorial_op);
387+
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "dca_op_0", dca_trace.operand0);
388+
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "dca_op_1", dca_trace.operand1);
389+
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "dca_op_2", dca_trace.operand2);
390+
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "dca_src_format", dca_trace.src_fmt);
391+
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "dca_dst_format", dca_trace.dst_fmt);
392+
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "dca_int_format", dca_trace.int_fmt);
393+
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "dca_status", dca_trace.status);
394+
extras_str = $sformatf("%s'%s': 0x%0x, ", extras_str, "dca_result", dca_trace.result);
400395
extras_str = $sformatf("%s}", extras_str);
401396
return extras_str;
402397
endfunction

0 commit comments

Comments
 (0)