Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Bender.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ dependencies:
axi: { git: "https://github.com/pulp-platform/axi.git", version: 0.31.0 }
common_cells:
{ git: "https://github.com/pulp-platform/common_cells", version: 1.23.0 }
fpnew: { git: "https://github.com/openhwgroup/cvfpu.git", rev: 2c79477 } # branch: develop
fpnew: { git: "https://github.com/openhwgroup/cvfpu.git", rev: 58ca3c3 } # branch: develop
tech_cells_generic:
{ git: "https://github.com/pulp-platform/tech_cells_generic.git", version: 0.2.13 }

Expand Down
3 changes: 3 additions & 0 deletions core/cva6.sv
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,7 @@ module cva6
logic [CVA6Cfg.XLEN-1:0] fpu_result_ex_id;
logic fpu_valid_ex_id;
exception_t fpu_exception_ex_id;
logic fpu_early_valid_ex_id;
// ALU2
logic [CVA6Cfg.NrIssuePorts-1:0] alu2_valid_id_ex;
// Accelerator
Expand Down Expand Up @@ -890,6 +891,7 @@ module cva6
.fpu_valid_o (fpu_valid_id_ex),
.fpu_fmt_o (fpu_fmt_id_ex),
.fpu_rm_o (fpu_rm_id_ex),
.fpu_early_valid_i (fpu_early_valid_ex_id),
// ALU2
.alu2_valid_o (alu2_valid_id_ex),
// CSR
Expand Down Expand Up @@ -1022,6 +1024,7 @@ module cva6
.fpu_result_o (fpu_result_ex_id),
.fpu_valid_o (fpu_valid_ex_id),
.fpu_exception_o (fpu_exception_ex_id),
.fpu_early_valid_o (fpu_early_valid_ex_id),
// ALU2
.alu2_valid_i (alu2_valid_id_ex),
.amo_valid_commit_i (amo_valid_commit),
Expand Down
5 changes: 4 additions & 1 deletion core/ex_stage.sv
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,8 @@ module ex_stage
output logic fpu_valid_o,
// FPU exception - ISSUE_STAGE
output exception_t fpu_exception_o,
// FPU early valid - ISSUE_STAGE
output logic fpu_early_valid_o,
// ALU2 instruction is valid - ISSUE_STAGE
input logic [CVA6Cfg.NrIssuePorts-1:0] alu2_valid_i,
// CVXIF instruction is valid - ISSUE_STAGE
Expand Down Expand Up @@ -465,7 +467,8 @@ module ex_stage
.fpu_trans_id_o(fpu_trans_id),
.result_o(fpu_result),
.fpu_valid_o(fpu_valid),
.fpu_exception_o
.fpu_exception_o,
.fpu_early_valid_o
);
end else begin : no_fpu_gen
assign fpu_ready_o = '0;
Expand Down
6 changes: 4 additions & 2 deletions core/fpu_wrap.sv
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ module fpu_wrap
output logic [CVA6Cfg.TRANS_ID_BITS-1:0] fpu_trans_id_o,
output logic [ CVA6Cfg.FLen-1:0] result_o,
output logic fpu_valid_o,
output exception_t fpu_exception_o
output exception_t fpu_exception_o,
output logic fpu_early_valid_o
);

// this is a workaround
Expand Down Expand Up @@ -553,7 +554,8 @@ module fpu_wrap
.tag_o (fpu_trans_id_o),
.out_valid_o (fpu_out_valid),
.out_ready_i (fpu_out_ready),
.busy_o ( /* unused */)
.busy_o ( /* unused */),
.early_valid_o (fpu_early_valid_o)
);

// Pack status flag into exception cause, tval ignored in wb, exception is always invalid
Expand Down
1 change: 0 additions & 1 deletion core/include/config_pkg.sv
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,6 @@ package config_pkg;
assert (Cfg.NrExecuteRegionRules <= NrMaxRules);
assert (Cfg.NrCachedRegionRules <= NrMaxRules);
assert (Cfg.NrPMPEntries <= 64);
assert (!(Cfg.SuperscalarEn && Cfg.RVF));
assert (Cfg.FETCH_WIDTH == 32 || Cfg.FETCH_WIDTH == 64)
else $fatal(1, "[frontend] fetch width != not supported");
// Support for disabling MIP.MSIP and MIE.MSIE in Hypervisor and Supervisor mode is not supported
Expand Down
77 changes: 39 additions & 38 deletions core/issue_read_operands.sv
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ module issue_read_operands
output logic [CVA6Cfg.NrIssuePorts-1:0] mult_valid_o,
// FPU FU is ready - EX_STAGE
input logic fpu_ready_i,
// FPU FU will perform a writeback in the next cycle - EX_STAGE
input logic fpu_early_valid_i,
// FPU FU is valid - EX_STAGE
output logic [CVA6Cfg.NrIssuePorts-1:0] fpu_valid_o,
// FPU fmt field - EX_STAGE
Expand Down Expand Up @@ -152,10 +154,10 @@ module issue_read_operands
rs3_len_t operand_c_fpr;
// output flipflop (ID <-> EX)
fu_data_t [CVA6Cfg.NrIssuePorts-1:0] fu_data_n, fu_data_q;
logic [CVA6Cfg.VLEN-1:0] pc_n;
logic is_compressed_instr_n;
branchpredict_sbe_t branch_predict_n;
logic [CVA6Cfg.XLEN-1:0] imm_forward_rs3;
logic [ CVA6Cfg.VLEN-1:0] pc_n;
logic is_compressed_instr_n;
branchpredict_sbe_t branch_predict_n;
logic [CVA6Cfg.NrIssuePorts-1:0][CVA6Cfg.XLEN-1:0] imm_forward_rs3;

logic [CVA6Cfg.NrIssuePorts-1:0] alu_valid_n, alu_valid_q;
logic [CVA6Cfg.NrIssuePorts-1:0] aes_valid_n, aes_valid_q;
Expand Down Expand Up @@ -215,8 +217,10 @@ module issue_read_operands
logic [CVA6Cfg.NrIssuePorts-1:0] forward_rs1, forward_rs2, forward_rs3;

// original instruction
riscv::instruction_t orig_instr;
assign orig_instr = riscv::instruction_t'(orig_instr_i[0]);
riscv::instruction_t [CVA6Cfg.NrIssuePorts-1:0] orig_instr;
for (genvar i = 0; i < CVA6Cfg.NrIssuePorts; i++) begin
assign orig_instr[i] = riscv::instruction_t'(orig_instr_i[i]);
end

// ALU-ALU bypass signals
alu_bypass_t alu_bypass, alu_bypass_n, alu_bypass_q;
Expand Down Expand Up @@ -309,10 +313,11 @@ module issue_read_operands
end

if (CVA6Cfg.SuperscalarEn) begin
// When a bypass is possible, an instruction uses `alu2` only when `alu` is already busy,
// in all other scenarios `alu2` is preferred over `alu`, unless it is busy
// When a bypass is possible or an FPU instruction is present on the second issue port,
// an instruction uses `alu2` only when `alu` is already busy
// In all other scenarios `alu2` is preferred over `alu`, unless it is busy
for (genvar i = 0; i < 2; i++) begin
assign use_alu2[i] = is_alu_bypass ? fus_busy[i].alu : !fus_busy[i].alu2;
assign use_alu2[i] = is_alu_bypass || (issue_instr_i[1].fu inside {FPU, FPU_VEC}) ? fus_busy[i].alu : !fus_busy[i].alu2;
end
end else begin
assign use_alu2 = '0;
Expand Down Expand Up @@ -347,7 +352,6 @@ module issue_read_operands
if (CVA6Cfg.FpPresent && !fpu_ready_i) begin
fus_busy[0].fpu = 1'b1;
fus_busy[0].fpu_vec = 1'b1;
if (CVA6Cfg.SuperscalarEn) fus_busy[0].alu2 = 1'b1;
end

if (!lsu_ready_i) begin
Expand All @@ -356,6 +360,11 @@ module issue_read_operands
end

if (CVA6Cfg.SuperscalarEn) begin

if (fpu_early_valid_i) begin
fus_busy[0].alu2 = 1'b1;
end

fus_busy[1] = fus_busy[0];

// Never issue CSR instruction on second issue port.
Expand Down Expand Up @@ -390,10 +399,6 @@ module issue_read_operands
ALU: begin
if (use_alu2[0]) begin
fus_busy[1].alu2 = 1'b1;
// TODO is there a minimum float execution time?
// If so we could issue FPU & ALU2 the same cycle
fus_busy[1].fpu = 1'b1;
fus_busy[1].fpu_vec = 1'b1;
end else begin
fus_busy[1].alu = 1'b1;
fus_busy[1].ctrl_flow = 1'b1;
Expand All @@ -408,10 +413,18 @@ module issue_read_operands
FPU, FPU_VEC: begin
fus_busy[1].fpu = 1'b1;
fus_busy[1].fpu_vec = 1'b1;
if (issue_instr_i[1].op inside {[FLD : FSB]}) begin
fus_busy[1].load = 1'b1;
fus_busy[1].store = 1'b1;
end
end
LOAD, STORE: begin
fus_busy[1].load = 1'b1;
fus_busy[1].store = 1'b1;
if (issue_instr_i[0].op inside {[FLD : FSB]}) begin
fus_busy[1].fpu = 1'b1;
fus_busy[1].fpu_vec = 1'b1;
end
end
CVXIF: ;
default: ;
Expand Down Expand Up @@ -658,10 +671,12 @@ module issue_read_operands
end

// third operand from fp regfile or gp regfile if NR_RGPR_PORTS == 3
if (OPERANDS_PER_INSTR == 3) begin : gen_gp_rs3
assign imm_forward_rs3 = rs3_res[0];
end else begin : gen_fp_rs3
assign imm_forward_rs3 = {{CVA6Cfg.XLEN - CVA6Cfg.FLen{1'b0}}, rs3_res[0]};
for (genvar i = 0; i < CVA6Cfg.NrIssuePorts; i++) begin
if (OPERANDS_PER_INSTR == 3) begin : gen_gp_rs3
assign imm_forward_rs3[i] = rs3_res[i];
end else begin : gen_fp_rs3
assign imm_forward_rs3[i] = {{CVA6Cfg.XLEN - CVA6Cfg.FLen{1'b0}}, rs3_res[i]};
end
end

// Forwarding/Output MUX
Expand Down Expand Up @@ -696,7 +711,7 @@ module issue_read_operands
fu_data_n[i].operand_b = rs2_res[i];
end
if ((CVA6Cfg.FpPresent || (CVA6Cfg.CvxifEn && OPERANDS_PER_INSTR == 3)) && forward_rs3[i]) begin
fu_data_n[i].imm = imm_forward_rs3;
fu_data_n[i].imm = imm_forward_rs3[i];
end

// use the PC as operand a
Expand Down Expand Up @@ -760,12 +775,12 @@ module issue_read_operands
default: begin
if (issue_instr_i[i].fu == FPU && CVA6Cfg.FpPresent) begin
fpu_valid_n[i] = 1'b1;
fpu_fmt_n = orig_instr.rftype.fmt; // fmt bits from instruction
fpu_rm_n = orig_instr.rftype.rm; // rm bits from instruction
fpu_fmt_n = orig_instr[i].rftype.fmt; // fmt bits from instruction
fpu_rm_n = orig_instr[i].rftype.rm; // rm bits from instruction
end else if (issue_instr_i[i].fu == FPU_VEC && CVA6Cfg.FpPresent) begin
fpu_valid_n[i] = 1'b1;
fpu_fmt_n = orig_instr.rvftype.vfmt; // vfmt bits from instruction
fpu_rm_n = {2'b0, orig_instr.rvftype.repl}; // repl bit from instruction
fpu_fmt_n = orig_instr[i].rvftype.vfmt; // vfmt bits from instruction
fpu_rm_n = {2'b0, orig_instr[i].rvftype.repl}; // repl bit from instruction
end
end
endcase
Expand Down Expand Up @@ -948,7 +963,7 @@ module issue_read_operands
};

if (CVA6Cfg.SuperscalarEn) begin
if (!(issue_instr_i[0].fu inside {FPU, FPU_VEC})) begin
if (!(issue_instr_i[0].fu inside {FPU, FPU_VEC} || issue_instr_i[0].op inside {[FLD:FSB]})) begin
fp_raddr_pack = {
issue_instr_i[1].result[4:0], issue_instr_i[1].rs2[4:0], issue_instr_i[1].rs1[4:0]
};
Expand Down Expand Up @@ -1101,20 +1116,6 @@ module issue_read_operands
);
end

// FPU does not declare that it will return a result the subsequent cycle so
// it is not possible for issue stage to know when ALU2 can be used if there
// is an FPU. As there are discussions to change the FPU, I did not explore
// its architecture to create this "FPU returns next cycle" signal. Also, a
// "lookahead" optimization should be added to be performant with FPU: when
// issue port 2 is issuing to FPU, issue port 1 should issue to ALU1 instead
// of ALU2 so that FPU is not busy. However, if FPU has a minimum execution
// time of 2 cycles, it is possible to simply not raise fus_busy[1].alu2.
initial begin
assert (!(CVA6Cfg.SuperscalarEn && CVA6Cfg.FpPresent))
else
$fatal(1, "FPU is not yet supported in superscalar CVA6, see comments above this assertion.");
end

for (genvar i = 0; i < CVA6Cfg.NrIssuePorts; i++) begin
assert property (@(posedge clk_i) (branch_valid_q) |-> (!$isunknown(
fu_data_q[i].operand_a
Expand Down
3 changes: 3 additions & 0 deletions core/issue_stage.sv
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ module issue_stage
output logic [1:0] fpu_fmt_o,
// FPU rm field - EX_STAGE
output logic [2:0] fpu_rm_o,
// FPU early valid - EX_STAGE
input logic fpu_early_valid_i,
// ALU2 FU is valid - EX_STAGE
output logic [CVA6Cfg.NrIssuePorts-1:0] alu2_valid_o,
// CSR is valid - EX_STAGE
Expand Down Expand Up @@ -283,6 +285,7 @@ module issue_stage
.fpu_valid_o,
.fpu_fmt_o,
.fpu_rm_o,
.fpu_early_valid_i,
.alu2_valid_o,
.csr_valid_o,
.cvxif_valid_o (xfu_valid_o),
Expand Down