Skip to content

Commit 119805a

Browse files
Merge branch 'master' into tensor-core
2 parents 5cf6797 + bc765d1 commit 119805a

File tree

8 files changed

+2222
-2
lines changed

8 files changed

+2222
-2
lines changed

hw/rtl/Vortex_hbm.sv

Lines changed: 229 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,229 @@
1+
// Copyright © 2019-2023
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software
9+
// distributed under the License is distributed on an "AS IS" BASIS,
10+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
// See the License for the specific language governing permissions and
12+
// limitations under the License.
13+
14+
`include "VX_define.vh"
15+
16+
module Vortex_hbm import VX_gpu_pkg::*; (
17+
`SCOPE_IO_DECL
18+
19+
// Clock
20+
input wire clk,
21+
input wire reset,
22+
23+
// Memory request
24+
output wire mem_req_valid [`NUM_MEM_PORTS],
25+
output wire mem_req_rw [`NUM_MEM_PORTS],
26+
output wire [`VX_MEM_BYTEEN_WIDTH-1:0] mem_req_byteen [`NUM_MEM_PORTS],
27+
output wire [`VX_MEM_ADDR_WIDTH-1:0] mem_req_addr [`NUM_MEM_PORTS],
28+
output wire [`VX_MEM_DATA_WIDTH-1:0] mem_req_data [`NUM_MEM_PORTS],
29+
output wire [`VX_MEM_TAG_WIDTH-1:0] mem_req_tag [`NUM_MEM_PORTS],
30+
input wire mem_req_ready [`NUM_MEM_PORTS],
31+
32+
// Memory response
33+
input wire mem_rsp_valid [`NUM_MEM_PORTS],
34+
input wire [`VX_MEM_DATA_WIDTH-1:0] mem_rsp_data [`NUM_MEM_PORTS],
35+
input wire [`VX_MEM_TAG_WIDTH-1:0] mem_rsp_tag [`NUM_MEM_PORTS],
36+
output wire mem_rsp_ready [`NUM_MEM_PORTS],
37+
38+
// DCR write request
39+
input wire dcr_wr_valid,
40+
input wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr,
41+
input wire [`VX_DCR_DATA_WIDTH-1:0] dcr_wr_data,
42+
43+
// Status
44+
output wire busy
45+
);
46+
47+
`ifdef SCOPE
48+
localparam scope_cluster = 0;
49+
`SCOPE_IO_SWITCH (`NUM_CLUSTERS);
50+
`endif
51+
52+
`ifdef PERF_ENABLE
53+
VX_mem_perf_if mem_perf_if();
54+
assign mem_perf_if.icache = 'x;
55+
assign mem_perf_if.dcache = 'x;
56+
assign mem_perf_if.l2cache = 'x;
57+
assign mem_perf_if.lmem = 'x;
58+
`endif
59+
60+
VX_mem_bus_if #(
61+
.DATA_SIZE (`L2_LINE_SIZE),
62+
.TAG_WIDTH (L2_MEM_TAG_WIDTH)
63+
) per_cluster_mem_bus_if[`NUM_CLUSTERS]();
64+
65+
VX_mem_bus_if #(
66+
.DATA_SIZE (`L3_LINE_SIZE),
67+
.TAG_WIDTH (L3_MEM_TAG_WIDTH)
68+
) mem_bus_if[`NUM_MEM_PORTS]();
69+
70+
`RESET_RELAY (l3_reset, reset);
71+
72+
VX_cache_wrap_l3 #(
73+
.INSTANCE_ID ("l3cache"),
74+
.CACHE_SIZE (`L3_CACHE_SIZE),
75+
.LINE_SIZE (`L3_LINE_SIZE),
76+
.NUM_BANKS (`L3_NUM_BANKS),
77+
.NUM_WAYS (`L3_NUM_WAYS),
78+
.WORD_SIZE (L3_WORD_SIZE),
79+
.NUM_MEM_PORTS (`NUM_MEM_PORTS),
80+
.NUM_REQS (L3_NUM_REQS),
81+
.CRSQ_SIZE (`L3_CRSQ_SIZE),
82+
.MSHR_SIZE (`L3_MSHR_SIZE),
83+
.MRSQ_SIZE (`L3_MRSQ_SIZE),
84+
.MREQ_SIZE (`L3_WRITEBACK ? `L3_MSHR_SIZE : `L3_MREQ_SIZE),
85+
.TAG_WIDTH (L2_MEM_TAG_WIDTH),
86+
.WRITE_ENABLE (1),
87+
.WRITEBACK (`L3_WRITEBACK),
88+
.DIRTY_BYTES (`L3_WRITEBACK),
89+
.UUID_WIDTH (`UUID_WIDTH),
90+
.CORE_OUT_BUF (2),
91+
.MEM_OUT_BUF (2),
92+
.NC_ENABLE (1),
93+
.PASSTHRU (!`L3_ENABLED)
94+
) l3cache (
95+
.clk (clk),
96+
.reset (l3_reset),
97+
98+
`ifdef PERF_ENABLE
99+
.cache_perf (mem_perf_if.l3cache),
100+
`endif
101+
102+
.core_bus_if (per_cluster_mem_bus_if),
103+
.mem_bus_if (mem_bus_if)
104+
);
105+
106+
wire mem_req_fire[`NUM_MEM_PORTS-1:0];
107+
wire mem_rsp_fire[`NUM_MEM_PORTS-1:0];
108+
109+
for (genvar i = 0; i < `NUM_MEM_PORTS; ++i) begin
110+
assign mem_req_valid[i] = mem_bus_if[i].req_valid;
111+
assign mem_req_rw[i] = mem_bus_if[i].req_data.rw;
112+
assign mem_req_byteen[i]= mem_bus_if[i].req_data.byteen;
113+
assign mem_req_addr[i] = mem_bus_if[i].req_data.addr;
114+
assign mem_req_data[i] = mem_bus_if[i].req_data.data;
115+
assign mem_req_tag[i] = mem_bus_if[i].req_data.tag;
116+
assign mem_bus_if[i].req_ready = mem_req_ready[i];
117+
`UNUSED_VAR (mem_bus_if[i].req_data.atype)
118+
119+
assign mem_bus_if[i].rsp_valid = mem_rsp_valid[i];
120+
assign mem_bus_if[i].rsp_data.data = mem_rsp_data[i];
121+
assign mem_bus_if[i].rsp_data.tag = mem_rsp_tag[i];
122+
assign mem_rsp_ready[i] = mem_bus_if[i].rsp_ready;
123+
124+
assign mem_req_fire[i] = mem_req_valid[i] && mem_req_ready[i];
125+
assign mem_rsp_fire[i] = mem_rsp_valid[i] && mem_rsp_ready[i];
126+
`UNUSED_VAR (mem_req_fire[i])
127+
`UNUSED_VAR (mem_rsp_fire[i])
128+
end
129+
130+
VX_dcr_bus_if dcr_bus_if();
131+
assign dcr_bus_if.write_valid = dcr_wr_valid;
132+
assign dcr_bus_if.write_addr = dcr_wr_addr;
133+
assign dcr_bus_if.write_data = dcr_wr_data;
134+
135+
wire [`NUM_CLUSTERS-1:0] per_cluster_busy;
136+
137+
// Generate all clusters
138+
for (genvar cluster_id = 0; cluster_id < `NUM_CLUSTERS; ++cluster_id) begin : clusters
139+
140+
`RESET_RELAY (cluster_reset, reset);
141+
142+
VX_dcr_bus_if cluster_dcr_bus_if();
143+
`BUFFER_DCR_BUS_IF (cluster_dcr_bus_if, dcr_bus_if, (`NUM_CLUSTERS > 1));
144+
145+
VX_cluster #(
146+
.CLUSTER_ID (cluster_id),
147+
.INSTANCE_ID ($sformatf("cluster%0d", cluster_id))
148+
) cluster (
149+
`SCOPE_IO_BIND (scope_cluster + cluster_id)
150+
151+
.clk (clk),
152+
.reset (cluster_reset),
153+
154+
`ifdef PERF_ENABLE
155+
.mem_perf_if (mem_perf_if),
156+
`endif
157+
158+
.dcr_bus_if (cluster_dcr_bus_if),
159+
160+
.mem_bus_if (per_cluster_mem_bus_if[cluster_id]),
161+
162+
.busy (per_cluster_busy[cluster_id])
163+
);
164+
end
165+
166+
`BUFFER_EX(busy, (| per_cluster_busy), 1'b1, (`NUM_CLUSTERS > 1));
167+
168+
`ifdef PERF_ENABLE
169+
170+
reg [`PERF_CTR_BITS-1:0] perf_mem_pending_reads;
171+
mem_perf_t mem_perf;
172+
173+
for (genvar i = 0; i < `NUM_MEM_PORTS; ++i) begin
174+
always @(posedge clk) begin
175+
if (reset) begin
176+
perf_mem_pending_reads <= '0;
177+
end else begin
178+
perf_mem_pending_reads <= $signed(perf_mem_pending_reads) +
179+
`PERF_CTR_BITS'($signed(2'(mem_req_fire[i] && ~mem_bus_if[i].req_data.rw) - 2'(mem_rsp_fire[i])));
180+
end
181+
end
182+
end
183+
184+
wire mem_rd_req_fire[`NUM_MEM_PORTS-1:0];
185+
wire mem_wr_req_fire[`NUM_MEM_PORTS-1:0];
186+
187+
for (genvar i = 0; i < `NUM_MEM_PORTS; ++i) begin
188+
assign mem_rd_req_fire[i] = mem_req_fire[i] && ~mem_bus_if[i].req_data.rw;
189+
assign mem_wr_req_fire[i] = mem_req_fire[i] && mem_bus_if[i].req_data.rw;
190+
end
191+
192+
always @(posedge clk) begin
193+
if (reset) begin
194+
mem_perf <= '0;
195+
end else begin
196+
for (int i = 0; i < `NUM_MEM_PORTS; ++i) begin
197+
mem_perf.reads <= mem_perf.reads + `PERF_CTR_BITS'(mem_rd_req_fire[i]);
198+
mem_perf.writes <= mem_perf.writes + `PERF_CTR_BITS'(mem_wr_req_fire[i]);
199+
end
200+
mem_perf.latency <= mem_perf.latency + perf_mem_pending_reads;
201+
end
202+
end
203+
assign mem_perf_if.mem = mem_perf;
204+
205+
`endif
206+
207+
`ifdef DBG_TRACE_MEM
208+
always @(posedge clk) begin
209+
for (int i = 0; i < `NUM_MEM_PORTS; ++i) begin
210+
if (mem_req_fire[i]) begin
211+
if (mem_req_rw[i])
212+
`TRACE(1, ("%d: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%0h data=0x%0h, bank=%d\n", $time, `TO_FULL_ADDR(mem_req_addr[i]), mem_req_tag[i], mem_req_byteen[i], mem_req_data[i], i));
213+
else
214+
`TRACE(1, ("%d: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%0h, bank=%d\n", $time, `TO_FULL_ADDR(mem_req_addr[i]), mem_req_tag[i], mem_req_byteen[i], i));
215+
end
216+
if (mem_rsp_fire[i]) begin
217+
`TRACE(1, ("%d: MEM Rd Rsp: tag=0x%0h, data=0x%0h\n", $time, mem_rsp_tag[i], mem_rsp_data[i]));
218+
end
219+
end
220+
end
221+
`endif
222+
223+
`ifdef SIMULATION
224+
always @(posedge clk) begin
225+
$fflush(); // flush stdout buffer
226+
end
227+
`endif
228+
229+
endmodule

hw/rtl/cache/VX_cache_bypass.sv

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,9 @@ module VX_cache_bypass #(
247247
assign is_mem_rsp_nc = 1'b0;
248248
end
249249

250+
`IGNORE_UNUSED_BEGIN
250251
wire [(MEM_TAG_OUT_WIDTH - NC_ENABLE)-1:0] mem_rsp_tag_id_nc;
252+
`IGNORE_UNUSED_END
251253

252254
VX_bits_remove #(
253255
.N (MEM_TAG_OUT_WIDTH),

0 commit comments

Comments
 (0)