@@ -60,8 +60,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT alpha, IFLOAT *a, BLASLONG lda, IFLOAT *
6060 BLASLONG ix = 0 , iy = 0 ;
6161#if defined(HFLOAT16 )
6262 _Float16 * a_ptr = (_Float16 * )(a );
63+ _Float16 * x_ptr = (_Float16 * )(x );
6364#else
6465 __bf16 * a_ptr = (__bf16 * )(a );
66+ __bf16 * x_ptr = (__bf16 * )(x );
6567#endif
6668 FLOAT temp ;
6769
@@ -83,15 +85,15 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT alpha, IFLOAT *a, BLASLONG lda, IFLOAT *
8385#endif
8486 for (k = 0 ; k < m /gvl ; k ++ ) {
8587 va = VLEV_IFLOAT (& a_ptr [j ], gvl );
86- vx = VLEV_IFLOAT (& x [j ], gvl );
88+ vx = VLEV_IFLOAT (& x_ptr [j ], gvl );
8789 vr = VFMACCVV_FLOAT (vz , va , vx , gvl ); // could vfmacc here and reduce outside loop
8890 v_res = VFREDSUM_FLOAT (vr , v_res , gvl ); // but that reordering diverges far enough from scalar path to make tests fail
8991 j += gvl ;
9092 }
9193 if (j < m ) {
9294 gvl = VSETVL (m - j );
9395 va = VLEV_IFLOAT (& a_ptr [j ], gvl );
94- vx = VLEV_IFLOAT (& x [j ], gvl );
96+ vx = VLEV_IFLOAT (& x_ptr [j ], gvl );
9597 vr = VFMACCVV_FLOAT (vz , va , vx , gvl );
9698 v_res = VFREDSUM_FLOAT (vr , v_res , gvl );
9799 }
@@ -113,7 +115,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT alpha, IFLOAT *a, BLASLONG lda, IFLOAT *
113115#endif
114116 for (k = 0 ; k < m /gvl ; k ++ ) {
115117 va = VLEV_IFLOAT (& a_ptr [j ], gvl );
116- vx = VLSEV_IFLOAT (& x [ix ], stride_x , gvl );
118+ vx = VLSEV_IFLOAT (& x_ptr [ix ], stride_x , gvl );
117119 vr = VFMACCVV_FLOAT (vz , va , vx , gvl );
118120 v_res = VFREDSUM_FLOAT (vr , v_res , gvl );
119121 j += gvl ;
@@ -122,7 +124,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT alpha, IFLOAT *a, BLASLONG lda, IFLOAT *
122124 if (j < m ) {
123125 gvl = VSETVL (m - j );
124126 va = VLEV_IFLOAT (& a_ptr [j ], gvl );
125- vx = VLSEV_IFLOAT (& x [ix ], stride_x , gvl );
127+ vx = VLSEV_IFLOAT (& x_ptr [ix ], stride_x , gvl );
126128 vr = VFMACCVV_FLOAT (vz , va , vx , gvl );
127129 v_res = VFREDSUM_FLOAT (vr , v_res , gvl );
128130 }
0 commit comments