Skip to content

Commit 7e07c91

Browse files
committed
+add Base implementation of class SynetConvolution16bNhwcSpecV2 (Forward, part 1).
1 parent 229ee99 commit 7e07c91

File tree

2 files changed

+13
-10
lines changed

2 files changed

+13
-10
lines changed

src/Simd/SimdBaseSynetConvolution16bNhwcSpecV2.cpp

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -194,10 +194,7 @@ namespace Simd
194194
{
195195
size_t macroD = Simd::Min(p.dstC, mad + a.macroD) - mad;
196196
const uint16_t* weight = _weight.data + mad * a.K;
197-
// for (size_t mac = 0, mao = 0; mac < a.srcC; mac += a.macroC, mao += a.macroO)
198-
// {
199-
// size_t macroC = Simd::Min(a.srcC, mac + a.macroC) - mac;
200-
// size_t nK = DivHi(macroC, a.microC) * a.kA;
197+
201198
// for (size_t dyBeg = 0, dyN = 0; dyBeg < dstH; dyN++)
202199
// {
203200
// size_t dyEnd = Simd::Min(dyBeg + a.macroH, dstH);
@@ -236,12 +233,11 @@ namespace Simd
236233
// }
237234
// dyBeg = dyEnd;
238235
// }
239-
// weight += macroC * a.kA * a.F;
240-
// }
241-
// bias += macroD;
242-
// if (p.activation == ::SimdConvolutionActivationPrelu)
243-
// params += macroD;
244-
// dst += macroD * _elemD;
236+
237+
bias += macroD;
238+
if (p.activation == ::SimdConvolutionActivationPrelu)
239+
params += macroD;
240+
dst += macroD * _elemD;
245241
}
246242
}
247243

src/Test/TestSynetConvolution16b.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -530,6 +530,13 @@ namespace Test
530530
//result = result && SynetConvolution16bForwardAutoTest(eps, Param(1, 512, 32, 32, 512, _3, _1, _1, _1, _1, 1, aPr, tT, b16, b16), c, f1, f2);
531531
//result = result && SynetConvolution16bForwardAutoTest(eps, Param(1, 1024, 32, 32, 1024, _3, _1, _1, _1, _1, 1, aPr, tT, b16, b16), c, f1, f2);
532532
#endif
533+
#if 1
534+
//result = result && SynetConvolution16bForwardAutoTest(eps, Param(1, 64, 64, 64, 64, _3, _1, _1, _1, _1, 1, aPr, tT, b16, b16), c, f1, f2);
535+
result = result && SynetConvolution16bForwardAutoTest(eps, Param(1, 128, 32, 32, 1024, _3, _1, _1, _1, _1, 1, aPr, tT, b16, b16), c, f1, f2);
536+
result = result && SynetConvolution16bForwardAutoTest(eps, Param(1, 256, 16, 16, 256, _3, _1, _1, _1, _1, 1, aPr, tT, b16, b16), c, f1, f2);
537+
//result = result && SynetConvolution16bForwardAutoTest(eps, Param(1, 512, 32, 32, 512, _3, _1, _1, _1, _1, 1, aPr, tT, b16, b16), c, f1, f2);
538+
//result = result && SynetConvolution16bForwardAutoTest(eps, Param(1, 1024, 32, 32, 1024, _3, _1, _1, _1, _1, 1, aPr, tT, b16, b16), c, f1, f2);
539+
#endif
533540
#else
534541
result = result && SynetConvolution16bForwardAutoTest(eps, Param(1, 16, 320, 320, 32, _2, _1, _1, _0, _1, 1, aId, tT, b16, f32), c, f1, f2);
535542
#endif

0 commit comments

Comments
 (0)