Skip to content

Commit ea467b5

Browse files
committed
Add FPGA Optimized Register File Version
Add a register file, optimized for synthesis on FPGAs supporting distributed RAM. The register file features two RAM blocks each with 1 sync-write and 3 async read ports. To achieve the behavior of a 2 sync-write / 3 async-read register file, the read access is arbitrated depending on which block was last written to. For this purpose an additional array of *NUM_TOT_WORDS* 1-bit registers is introduced. Savings for FPGA synthesis are achieved by: - Replacing an Array of FFs with distributed RAM. Example: 31 32-bit registers as FFs occupy 992 FFs, or 446 LUTs on Xilinx Artix-7 FPGAs. The equivalent storage capacity using distributed RAM is implemented by 36 RAM32M primitives (inferrred from generic HDL), or 144 distributed RAM enabled LUTs, and 31 FFs for block selection (16 LUTs). - The distributed RAM primitives have the read- address decoders already integrated. This saves three 32-bit 32 to 1 multiplexers at the read ports. - Since both write ports unconditionally write to their respective RAM blocks, the multiplexing of the write ports is also saved. That is 32 32-bit 2 to 1 multiplexers. Concrete Savings: (synthesized for Xilinx Artix-7 FPGA) - without FPU reg file: baseline: 7347 LUTs, 2508 FFs optimized: 5722 LUTs, 1541 FFs ------------------------------- difference: -1625 LUTS (-22.1%) -967 FFs (-38.6%) - with FPU reg file: baseline: 13160 LUTs, 4027 FFs optimized: 10257 LUTs, 2062 FFs ------------------------------- difference: -3353 LUTS (-24.6%) -1965 FFs (-48.8%) Signed-off-by: ganoam <[email protected]>
1 parent 1607d8b commit ea467b5

File tree

3 files changed

+183
-2
lines changed

3 files changed

+183
-2
lines changed

Bender.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,10 @@ sources:
3838
- target: asic
3939
files:
4040
- rtl/cv32e40p_register_file_latch.sv
41-
- target: not(asic)
41+
- target: fpga
42+
files:
43+
- rtl/cv32e40p_register_file_fpga.sv
44+
- target: rtl
4245
files:
4346
- rtl/cv32e40p_register_file_ff.sv
4447
- target: rtl

rtl/cv32e40p_register_file_fpga.sv

Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
2+
// Copyright 2018 ETH Zurich and University of Bologna.
3+
// Copyright and related rights are licensed under the Solderpad Hardware
4+
// License, Version 0.51 (the "License"); you may not use this file except in
5+
// compliance with the License. You may obtain a copy of the License at
6+
// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
7+
// or agreed to in writing, software, hardware and materials distributed under
8+
// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
9+
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
10+
// specific language governing permissions and limitations under the License.
11+
12+
////////////////////////////////////////////////////////////////////////////////
13+
// Engineer: Francesco Conti - [email protected] //
14+
// //
15+
// Additional contributions by: //
16+
// Michael Gautschi - [email protected] //
17+
// Davide Schiavone - [email protected] //
18+
// Noam Gallmann - [email protected] //
19+
// //
20+
// Design Name: RISC-V register file //
21+
// Project Name: RI5CY //
22+
// Language: SystemVerilog //
23+
// //
24+
// Description: Register file with 31x 32 bit wide registers. Register 0 //
25+
// is fixed to 0. This register file is optimized for FPGAs //
26+
// featuring distributed RAM-enabled logic cells. //
27+
// Also supports the fp-register file now if FPU=1 //
28+
// If PULP_ZFINX is 1, floating point operations take values //
29+
// from the X register file //
30+
// //
31+
////////////////////////////////////////////////////////////////////////////////
32+
33+
module cv32e40p_register_file
34+
#(
35+
parameter ADDR_WIDTH = 5,
36+
parameter DATA_WIDTH = 32,
37+
parameter FPU = 0,
38+
parameter PULP_ZFINX = 0
39+
)
40+
(
41+
// Clock and Reset
42+
input logic clk,
43+
input logic rst_n,
44+
45+
input logic scan_cg_en_i,
46+
47+
//Read port R1
48+
input logic [ADDR_WIDTH-1:0] raddr_a_i,
49+
output logic [DATA_WIDTH-1:0] rdata_a_o,
50+
51+
//Read port R2
52+
input logic [ADDR_WIDTH-1:0] raddr_b_i,
53+
output logic [DATA_WIDTH-1:0] rdata_b_o,
54+
55+
//Read port R3
56+
input logic [ADDR_WIDTH-1:0] raddr_c_i,
57+
output logic [DATA_WIDTH-1:0] rdata_c_o,
58+
59+
// Write port W1
60+
input logic [ADDR_WIDTH-1:0] waddr_a_i,
61+
input logic [DATA_WIDTH-1:0] wdata_a_i,
62+
input logic we_a_i,
63+
64+
// Write port W2
65+
input logic [ADDR_WIDTH-1:0] waddr_b_i,
66+
input logic [DATA_WIDTH-1:0] wdata_b_i,
67+
input logic we_b_i
68+
);
69+
70+
// The register values are stored in two separate RAM blocks each featuring 1 sync-write and
71+
// 3 async-read ports. A set of 1-bit flip-flops keeps track of which RAM block contains the valid
72+
// entry for each register.
73+
// The integer register file occupies adresses 0-31. If enabled, the floating-point registers are
74+
// located at addresses 32-63.
75+
76+
// number of integer registers
77+
localparam NUM_WORDS = 2**(ADDR_WIDTH-1);
78+
// number of floating point registers
79+
localparam NUM_FP_WORDS = 2**(ADDR_WIDTH-1);
80+
localparam NUM_TOT_WORDS = FPU ? ( PULP_ZFINX ? NUM_WORDS : NUM_WORDS + NUM_FP_WORDS ) : NUM_WORDS;
81+
82+
// integer and floating-point register file
83+
// distributed RAM blocks
84+
logic [DATA_WIDTH-1:0] mem_a [NUM_TOT_WORDS];
85+
logic [DATA_WIDTH-1:0] mem_b [NUM_TOT_WORDS];
86+
87+
// distributed RAM block selectors
88+
logic [NUM_TOT_WORDS-1:0] mem_block_sel;
89+
logic [NUM_TOT_WORDS-1:0] mem_block_sel_q;
90+
91+
// write enable signals for all registers
92+
logic [NUM_TOT_WORDS-1:0] we_a_dec;
93+
logic [NUM_TOT_WORDS-1:0] we_b_dec;
94+
95+
//-----------------------------------------------------------------------------
96+
//-- READ : Read address decoder RAD
97+
//-----------------------------------------------------------------------------
98+
99+
// Read from the block corresponding to the write port that last wrote to the corresponding
100+
// address.
101+
if (FPU == 1 && PULP_ZFINX == 0) begin
102+
assign rdata_a_o = (raddr_a_i == '0) ? '0 :
103+
mem_block_sel_q[raddr_a_i[5:0]] ? mem_b[raddr_a_i[5:0]] : mem_a[raddr_a_i[5:0]];
104+
assign rdata_b_o = (raddr_b_i == '0) ? '0 :
105+
mem_block_sel_q[raddr_b_i[5:0]] ? mem_b[raddr_b_i[5:0]] : mem_a[raddr_b_i[5:0]];
106+
assign rdata_c_o = (raddr_c_i == '0) ? '0 :
107+
mem_block_sel_q[raddr_c_i[5:0]] ? mem_b[raddr_c_i[5:0]] : mem_a[raddr_c_i[5:0]];
108+
end else begin
109+
assign rdata_a_o = (raddr_a_i == '0) ? '0 :
110+
mem_block_sel_q[raddr_a_i[4:0]] ? mem_b[raddr_a_i[4:0]] : mem_a[raddr_a_i[4:0]];
111+
assign rdata_b_o = (raddr_b_i == '0) ? '0 :
112+
mem_block_sel_q[raddr_b_i[4:0]] ? mem_b[raddr_b_i[4:0]] : mem_a[raddr_b_i[4:0]];
113+
assign rdata_c_o = (raddr_c_i == '0) ? '0 :
114+
mem_block_sel_q[raddr_c_i[4:0]] ? mem_b[raddr_c_i[4:0]] : mem_a[raddr_c_i[4:0]];
115+
end
116+
117+
//-----------------------------------------------------------------------------
118+
//-- WRITE : Write Address Decoder (WAD)
119+
//-----------------------------------------------------------------------------
120+
121+
always_comb begin : we_a_decoder
122+
for (int i = 0; i < NUM_TOT_WORDS; i++) begin
123+
if (waddr_a_i == i) begin
124+
we_a_dec[i] = we_a_i;
125+
end else begin
126+
we_a_dec[i] = 1'b0;
127+
end
128+
end
129+
end
130+
131+
always_comb begin : we_b_decoder
132+
for (int i=0; i<NUM_TOT_WORDS; i++) begin
133+
if (waddr_b_i == i) begin
134+
we_b_dec[i] = we_b_i;
135+
end else begin
136+
we_b_dec[i] = 1'b0;
137+
end
138+
end
139+
end
140+
141+
// update block selector:
142+
// signal mem_block_sel records where the current valid value is stored.
143+
// if port a and b try to write to the same address simultaneously, write port b has priority.
144+
always_comb begin
145+
mem_block_sel[0] = '0;
146+
for (int i = 1; i<NUM_TOT_WORDS; i++) begin
147+
if (we_b_dec[i] == 1'b1) begin
148+
mem_block_sel[i] = 1'b1;
149+
end else if (we_a_dec[i] == 1'b1) begin
150+
mem_block_sel[i] = 1'b0;
151+
end else begin
152+
mem_block_sel[i] = mem_block_sel_q[i];
153+
end
154+
end
155+
end
156+
157+
// block selector flops
158+
always_ff @(posedge clk) begin
159+
if (rst_n == 1'b0) begin
160+
mem_block_sel_q <= '0;
161+
end else begin
162+
mem_block_sel_q <= mem_block_sel;
163+
end
164+
end
165+
166+
always_ff @(posedge clk) begin : regs_a
167+
if(we_a_i) begin
168+
mem_a[waddr_a_i] <= wdata_a_i;
169+
end
170+
end
171+
172+
always_ff @(posedge clk) begin : regs_b
173+
if(we_b_i) begin
174+
mem_b[waddr_b_i] <= wdata_b_i;
175+
end
176+
end
177+
178+
endmodule

src_files.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ cv32e40p_regfile_fpga:
9393
]
9494
files: [
9595
./rtl/cv32e40p_register_file_test_wrap.sv,
96-
./rtl/cv32e40p_register_file_ff.sv,
96+
./rtl/cv32e40p_register_file_fpga.sv,
9797
]
9898

9999
tb_riscv:

0 commit comments

Comments
 (0)