|
| 1 | +// Copyright © 2019-2023 |
| 2 | +// |
| 3 | +// Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +// you may not use this file except in compliance with the License. |
| 5 | +// You may obtain a copy of the License at |
| 6 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 7 | +// |
| 8 | +// Unless required by applicable law or agreed to in writing, software |
| 9 | +// distributed under the License is distributed on an "AS IS" BASIS, |
| 10 | +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 11 | +// See the License for the specific language governing permissions and |
| 12 | +// limitations under the License. |
| 13 | + |
| 14 | +`include "VX_define.vh" |
| 15 | + |
| 16 | +module Vortex_hbm import VX_gpu_pkg::*; ( |
| 17 | + `SCOPE_IO_DECL |
| 18 | + |
| 19 | + // Clock |
| 20 | + input wire clk, |
| 21 | + input wire reset, |
| 22 | + |
| 23 | + // Memory request |
| 24 | + output wire mem_req_valid [`NUM_MEM_PORTS], |
| 25 | + output wire mem_req_rw [`NUM_MEM_PORTS], |
| 26 | + output wire [`VX_MEM_BYTEEN_WIDTH-1:0] mem_req_byteen [`NUM_MEM_PORTS], |
| 27 | + output wire [`VX_MEM_ADDR_WIDTH-1:0] mem_req_addr [`NUM_MEM_PORTS], |
| 28 | + output wire [`VX_MEM_DATA_WIDTH-1:0] mem_req_data [`NUM_MEM_PORTS], |
| 29 | + output wire [`VX_MEM_TAG_WIDTH-1:0] mem_req_tag [`NUM_MEM_PORTS], |
| 30 | + input wire mem_req_ready [`NUM_MEM_PORTS], |
| 31 | + |
| 32 | + // Memory response |
| 33 | + input wire mem_rsp_valid [`NUM_MEM_PORTS], |
| 34 | + input wire [`VX_MEM_DATA_WIDTH-1:0] mem_rsp_data [`NUM_MEM_PORTS], |
| 35 | + input wire [`VX_MEM_TAG_WIDTH-1:0] mem_rsp_tag [`NUM_MEM_PORTS], |
| 36 | + output wire mem_rsp_ready [`NUM_MEM_PORTS], |
| 37 | + |
| 38 | + // DCR write request |
| 39 | + input wire dcr_wr_valid, |
| 40 | + input wire [`VX_DCR_ADDR_WIDTH-1:0] dcr_wr_addr, |
| 41 | + input wire [`VX_DCR_DATA_WIDTH-1:0] dcr_wr_data, |
| 42 | + |
| 43 | + // Status |
| 44 | + output wire busy |
| 45 | +); |
| 46 | + |
| 47 | +`ifdef SCOPE |
| 48 | + localparam scope_cluster = 0; |
| 49 | + `SCOPE_IO_SWITCH (`NUM_CLUSTERS); |
| 50 | +`endif |
| 51 | + |
| 52 | +`ifdef PERF_ENABLE |
| 53 | + VX_mem_perf_if mem_perf_if(); |
| 54 | + assign mem_perf_if.icache = 'x; |
| 55 | + assign mem_perf_if.dcache = 'x; |
| 56 | + assign mem_perf_if.l2cache = 'x; |
| 57 | + assign mem_perf_if.lmem = 'x; |
| 58 | +`endif |
| 59 | + |
| 60 | + VX_mem_bus_if #( |
| 61 | + .DATA_SIZE (`L2_LINE_SIZE), |
| 62 | + .TAG_WIDTH (L2_MEM_TAG_WIDTH) |
| 63 | + ) per_cluster_mem_bus_if[`NUM_CLUSTERS](); |
| 64 | + |
| 65 | + VX_mem_bus_if #( |
| 66 | + .DATA_SIZE (`L3_LINE_SIZE), |
| 67 | + .TAG_WIDTH (L3_MEM_TAG_WIDTH) |
| 68 | + ) mem_bus_if[`NUM_MEM_PORTS](); |
| 69 | + |
| 70 | + `RESET_RELAY (l3_reset, reset); |
| 71 | + |
| 72 | + VX_cache_wrap_l3 #( |
| 73 | + .INSTANCE_ID ("l3cache"), |
| 74 | + .CACHE_SIZE (`L3_CACHE_SIZE), |
| 75 | + .LINE_SIZE (`L3_LINE_SIZE), |
| 76 | + .NUM_BANKS (`L3_NUM_BANKS), |
| 77 | + .NUM_WAYS (`L3_NUM_WAYS), |
| 78 | + .WORD_SIZE (L3_WORD_SIZE), |
| 79 | + .NUM_MEM_PORTS (`NUM_MEM_PORTS), |
| 80 | + .NUM_REQS (L3_NUM_REQS), |
| 81 | + .CRSQ_SIZE (`L3_CRSQ_SIZE), |
| 82 | + .MSHR_SIZE (`L3_MSHR_SIZE), |
| 83 | + .MRSQ_SIZE (`L3_MRSQ_SIZE), |
| 84 | + .MREQ_SIZE (`L3_WRITEBACK ? `L3_MSHR_SIZE : `L3_MREQ_SIZE), |
| 85 | + .TAG_WIDTH (L2_MEM_TAG_WIDTH), |
| 86 | + .WRITE_ENABLE (1), |
| 87 | + .WRITEBACK (`L3_WRITEBACK), |
| 88 | + .DIRTY_BYTES (`L3_WRITEBACK), |
| 89 | + .UUID_WIDTH (`UUID_WIDTH), |
| 90 | + .CORE_OUT_BUF (2), |
| 91 | + .MEM_OUT_BUF (2), |
| 92 | + .NC_ENABLE (1), |
| 93 | + .PASSTHRU (!`L3_ENABLED) |
| 94 | + ) l3cache ( |
| 95 | + .clk (clk), |
| 96 | + .reset (l3_reset), |
| 97 | + |
| 98 | + `ifdef PERF_ENABLE |
| 99 | + .cache_perf (mem_perf_if.l3cache), |
| 100 | + `endif |
| 101 | + |
| 102 | + .core_bus_if (per_cluster_mem_bus_if), |
| 103 | + .mem_bus_if (mem_bus_if) |
| 104 | + ); |
| 105 | + |
| 106 | + wire mem_req_fire[`NUM_MEM_PORTS-1:0]; |
| 107 | + wire mem_rsp_fire[`NUM_MEM_PORTS-1:0]; |
| 108 | + |
| 109 | + for (genvar i = 0; i < `NUM_MEM_PORTS; ++i) begin |
| 110 | + assign mem_req_valid[i] = mem_bus_if[i].req_valid; |
| 111 | + assign mem_req_rw[i] = mem_bus_if[i].req_data.rw; |
| 112 | + assign mem_req_byteen[i]= mem_bus_if[i].req_data.byteen; |
| 113 | + assign mem_req_addr[i] = mem_bus_if[i].req_data.addr; |
| 114 | + assign mem_req_data[i] = mem_bus_if[i].req_data.data; |
| 115 | + assign mem_req_tag[i] = mem_bus_if[i].req_data.tag; |
| 116 | + assign mem_bus_if[i].req_ready = mem_req_ready[i]; |
| 117 | + `UNUSED_VAR (mem_bus_if[i].req_data.atype) |
| 118 | + |
| 119 | + assign mem_bus_if[i].rsp_valid = mem_rsp_valid[i]; |
| 120 | + assign mem_bus_if[i].rsp_data.data = mem_rsp_data[i]; |
| 121 | + assign mem_bus_if[i].rsp_data.tag = mem_rsp_tag[i]; |
| 122 | + assign mem_rsp_ready[i] = mem_bus_if[i].rsp_ready; |
| 123 | + |
| 124 | + assign mem_req_fire[i] = mem_req_valid[i] && mem_req_ready[i]; |
| 125 | + assign mem_rsp_fire[i] = mem_rsp_valid[i] && mem_rsp_ready[i]; |
| 126 | + `UNUSED_VAR (mem_req_fire[i]) |
| 127 | + `UNUSED_VAR (mem_rsp_fire[i]) |
| 128 | + end |
| 129 | + |
| 130 | + VX_dcr_bus_if dcr_bus_if(); |
| 131 | + assign dcr_bus_if.write_valid = dcr_wr_valid; |
| 132 | + assign dcr_bus_if.write_addr = dcr_wr_addr; |
| 133 | + assign dcr_bus_if.write_data = dcr_wr_data; |
| 134 | + |
| 135 | + wire [`NUM_CLUSTERS-1:0] per_cluster_busy; |
| 136 | + |
| 137 | + // Generate all clusters |
| 138 | + for (genvar cluster_id = 0; cluster_id < `NUM_CLUSTERS; ++cluster_id) begin : clusters |
| 139 | + |
| 140 | + `RESET_RELAY (cluster_reset, reset); |
| 141 | + |
| 142 | + VX_dcr_bus_if cluster_dcr_bus_if(); |
| 143 | + `BUFFER_DCR_BUS_IF (cluster_dcr_bus_if, dcr_bus_if, (`NUM_CLUSTERS > 1)); |
| 144 | + |
| 145 | + VX_cluster #( |
| 146 | + .CLUSTER_ID (cluster_id), |
| 147 | + .INSTANCE_ID ($sformatf("cluster%0d", cluster_id)) |
| 148 | + ) cluster ( |
| 149 | + `SCOPE_IO_BIND (scope_cluster + cluster_id) |
| 150 | + |
| 151 | + .clk (clk), |
| 152 | + .reset (cluster_reset), |
| 153 | + |
| 154 | + `ifdef PERF_ENABLE |
| 155 | + .mem_perf_if (mem_perf_if), |
| 156 | + `endif |
| 157 | + |
| 158 | + .dcr_bus_if (cluster_dcr_bus_if), |
| 159 | + |
| 160 | + .mem_bus_if (per_cluster_mem_bus_if[cluster_id]), |
| 161 | + |
| 162 | + .busy (per_cluster_busy[cluster_id]) |
| 163 | + ); |
| 164 | + end |
| 165 | + |
| 166 | + `BUFFER_EX(busy, (| per_cluster_busy), 1'b1, (`NUM_CLUSTERS > 1)); |
| 167 | + |
| 168 | +`ifdef PERF_ENABLE |
| 169 | + |
| 170 | + reg [`PERF_CTR_BITS-1:0] perf_mem_pending_reads; |
| 171 | + mem_perf_t mem_perf; |
| 172 | + |
| 173 | + for (genvar i = 0; i < `NUM_MEM_PORTS; ++i) begin |
| 174 | + always @(posedge clk) begin |
| 175 | + if (reset) begin |
| 176 | + perf_mem_pending_reads <= '0; |
| 177 | + end else begin |
| 178 | + perf_mem_pending_reads <= $signed(perf_mem_pending_reads) + |
| 179 | + `PERF_CTR_BITS'($signed(2'(mem_req_fire[i] && ~mem_bus_if[i].req_data.rw) - 2'(mem_rsp_fire[i]))); |
| 180 | + end |
| 181 | + end |
| 182 | + end |
| 183 | + |
| 184 | + wire mem_rd_req_fire[`NUM_MEM_PORTS-1:0]; |
| 185 | + wire mem_wr_req_fire[`NUM_MEM_PORTS-1:0]; |
| 186 | + |
| 187 | + for (genvar i = 0; i < `NUM_MEM_PORTS; ++i) begin |
| 188 | + assign mem_rd_req_fire[i] = mem_req_fire[i] && ~mem_bus_if[i].req_data.rw; |
| 189 | + assign mem_wr_req_fire[i] = mem_req_fire[i] && mem_bus_if[i].req_data.rw; |
| 190 | + end |
| 191 | + |
| 192 | + always @(posedge clk) begin |
| 193 | + if (reset) begin |
| 194 | + mem_perf <= '0; |
| 195 | + end else begin |
| 196 | + for (int i = 0; i < `NUM_MEM_PORTS; ++i) begin |
| 197 | + mem_perf.reads <= mem_perf.reads + `PERF_CTR_BITS'(mem_rd_req_fire[i]); |
| 198 | + mem_perf.writes <= mem_perf.writes + `PERF_CTR_BITS'(mem_wr_req_fire[i]); |
| 199 | + end |
| 200 | + mem_perf.latency <= mem_perf.latency + perf_mem_pending_reads; |
| 201 | + end |
| 202 | + end |
| 203 | + assign mem_perf_if.mem = mem_perf; |
| 204 | + |
| 205 | +`endif |
| 206 | + |
| 207 | +`ifdef DBG_TRACE_MEM |
| 208 | + always @(posedge clk) begin |
| 209 | + for (int i = 0; i < `NUM_MEM_PORTS; ++i) begin |
| 210 | + if (mem_req_fire[i]) begin |
| 211 | + if (mem_req_rw[i]) |
| 212 | + `TRACE(1, ("%d: MEM Wr Req: addr=0x%0h, tag=0x%0h, byteen=0x%0h data=0x%0h, bank=%d\n", $time, `TO_FULL_ADDR(mem_req_addr[i]), mem_req_tag[i], mem_req_byteen[i], mem_req_data[i], i)); |
| 213 | + else |
| 214 | + `TRACE(1, ("%d: MEM Rd Req: addr=0x%0h, tag=0x%0h, byteen=0x%0h, bank=%d\n", $time, `TO_FULL_ADDR(mem_req_addr[i]), mem_req_tag[i], mem_req_byteen[i], i)); |
| 215 | + end |
| 216 | + if (mem_rsp_fire[i]) begin |
| 217 | + `TRACE(1, ("%d: MEM Rd Rsp: tag=0x%0h, data=0x%0h\n", $time, mem_rsp_tag[i], mem_rsp_data[i])); |
| 218 | + end |
| 219 | + end |
| 220 | + end |
| 221 | +`endif |
| 222 | + |
| 223 | +`ifdef SIMULATION |
| 224 | + always @(posedge clk) begin |
| 225 | + $fflush(); // flush stdout buffer |
| 226 | + end |
| 227 | +`endif |
| 228 | + |
| 229 | +endmodule |
0 commit comments