Skip to content

Commit 00a7336

Browse files
committed
Missing one gemv conversion.
1 parent edf2e59 commit 00a7336

File tree

1 file changed

+6
-4
lines changed

1 file changed

+6
-4
lines changed

kernel/riscv64/sbgemv_t_vector.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT alpha, IFLOAT *a, BLASLONG lda, IFLOAT *
6060
BLASLONG ix = 0, iy = 0;
6161
#if defined(HFLOAT16)
6262
_Float16 *a_ptr = (_Float16 *)(a);
63+
_Float16 *x_ptr = (_Float16 *)(x);
6364
#else
6465
__bf16 *a_ptr = (__bf16 *)(a);
66+
__bf16 *x_ptr = (__bf16 *)(x);
6567
#endif
6668
FLOAT temp;
6769

@@ -83,15 +85,15 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT alpha, IFLOAT *a, BLASLONG lda, IFLOAT *
8385
#endif
8486
for (k = 0; k < m/gvl; k++) {
8587
va = VLEV_IFLOAT(&a_ptr[j], gvl);
86-
vx = VLEV_IFLOAT(&x[j], gvl);
88+
vx = VLEV_IFLOAT(&x_ptr[j], gvl);
8789
vr = VFMACCVV_FLOAT(vz, va, vx, gvl); // could vfmacc here and reduce outside loop
8890
v_res = VFREDSUM_FLOAT(vr, v_res, gvl); // but that reordering diverges far enough from scalar path to make tests fail
8991
j += gvl;
9092
}
9193
if (j < m) {
9294
gvl = VSETVL(m-j);
9395
va = VLEV_IFLOAT(&a_ptr[j], gvl);
94-
vx = VLEV_IFLOAT(&x[j], gvl);
96+
vx = VLEV_IFLOAT(&x_ptr[j], gvl);
9597
vr = VFMACCVV_FLOAT(vz, va, vx, gvl);
9698
v_res = VFREDSUM_FLOAT(vr, v_res, gvl);
9799
}
@@ -113,7 +115,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT alpha, IFLOAT *a, BLASLONG lda, IFLOAT *
113115
#endif
114116
for (k = 0; k < m/gvl; k++) {
115117
va = VLEV_IFLOAT(&a_ptr[j], gvl);
116-
vx = VLSEV_IFLOAT(&x[ix], stride_x, gvl);
118+
vx = VLSEV_IFLOAT(&x_ptr[ix], stride_x, gvl);
117119
vr = VFMACCVV_FLOAT(vz, va, vx, gvl);
118120
v_res = VFREDSUM_FLOAT(vr, v_res, gvl);
119121
j += gvl;
@@ -122,7 +124,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT alpha, IFLOAT *a, BLASLONG lda, IFLOAT *
122124
if (j < m) {
123125
gvl = VSETVL(m-j);
124126
va = VLEV_IFLOAT(&a_ptr[j], gvl);
125-
vx = VLSEV_IFLOAT(&x[ix], stride_x, gvl);
127+
vx = VLSEV_IFLOAT(&x_ptr[ix], stride_x, gvl);
126128
vr = VFMACCVV_FLOAT(vz, va, vx, gvl);
127129
v_res = VFREDSUM_FLOAT(vr, v_res, gvl);
128130
}

0 commit comments

Comments
 (0)