@@ -38,6 +38,9 @@ genindex_executable_t::genindex_executable_t(std::shared_ptr<op_t> &op,
3838 output_dims_[i] = output_lt.dims [i];
3939 output_strides_[i] = output_lt.layout .strides [i];
4040 }
41+ info_ = std::string (dnnl_engine_kind2str (
42+ static_cast <dnnl_engine_kind_t >(p_engine.get_kind ())))
43+ + " ," + op->str ();
4144#if DNNL_GPU_RUNTIME != DNNL_RUNTIME_NONE \
4245 && DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL
4346 if (p_engine.get_kind () == engine::kind::gpu) {
@@ -59,7 +62,7 @@ genindex_executable_t::genindex_executable_t(std::shared_ptr<op_t> &op,
5962#endif
6063}
6164
62- void genindex_executable_t::execute (const stream &stream,
65+ void genindex_executable_t::execute_impl (const stream &stream,
6366 const std::unordered_map<int , memory> &args) const {
6467 const auto &it_dst = args.find (DNNL_ARG_DST);
6568 if (it_dst == args.end ()) return ;
@@ -79,8 +82,24 @@ void genindex_executable_t::execute(const stream &stream,
7982 stream.get ()->after_exec_hook ();
8083}
8184
85+ void genindex_executable_t::execute (const stream &stream,
86+ const std::unordered_map<int , memory> &args) const {
87+ if (get_verbose (dnnl::impl::verbose_t ::exec_profile,
88+ dnnl::impl::component_t ::graph)) {
89+ stream.get ()->wait ();
90+ double start_ms = dnnl::impl::get_msec ();
91+ execute_impl (stream, args);
92+ stream.get ()->wait ();
93+ double duration_ms = dnnl::impl::get_msec () - start_ms;
94+ VPROF (start_ms, graph, exec, VERBOSE_profile, info_.c_str (),
95+ duration_ms);
96+ } else {
97+ execute_impl (stream, args);
98+ }
99+ }
100+
82101#ifdef DNNL_WITH_SYCL
83- ::sycl::event genindex_executable_t::execute_sycl (const stream &stream,
102+ ::sycl::event genindex_executable_t::execute_sycl_impl (const stream &stream,
84103 const std::unordered_map<int , memory> &args,
85104 const std::vector<::sycl::event> &deps) const {
86105 if (stream.get_engine ().get_kind () == engine::kind::cpu) {
@@ -127,10 +146,28 @@ ::sycl::event genindex_executable_t::execute_sycl(const stream &stream,
127146 throw std::runtime_error (" Unimplement" );
128147#endif
129148}
149+
150+ ::sycl::event genindex_executable_t::execute_sycl (const stream &stream,
151+ const std::unordered_map<int , memory> &args,
152+ const std::vector<::sycl::event> &deps) const {
153+ if (get_verbose (dnnl::impl::verbose_t ::exec_profile,
154+ dnnl::impl::component_t ::graph)) {
155+ stream.get ()->wait ();
156+ double start_ms = dnnl::impl::get_msec ();
157+ execute_sycl_impl (stream, args, deps);
158+ stream.get ()->wait ();
159+ double duration_ms = dnnl::impl::get_msec () - start_ms;
160+ VPROF (start_ms, graph, exec, VERBOSE_profile, info_.c_str (),
161+ duration_ms);
162+ return {}; // no event returned in profiling mode
163+ } else {
164+ return execute_sycl_impl (stream, args, deps);
165+ }
166+ }
130167#endif
131168
132169#if DNNL_GPU_RUNTIME == DNNL_RUNTIME_OCL
133- cl_event genindex_executable_t::execute_ocl (const stream &stream,
170+ cl_event genindex_executable_t::execute_ocl_impl (const stream &stream,
134171 const std::unordered_map<int , memory> &args,
135172 const std::vector<cl_event> &deps) const {
136173#if DNNL_GPU_VENDOR == DNNL_VENDOR_INTEL
@@ -174,6 +211,24 @@ cl_event genindex_executable_t::execute_ocl(const stream &stream,
174211 throw std::runtime_error (" Unimplement" );
175212#endif
176213}
214+
215+ cl_event genindex_executable_t::execute_ocl (const stream &stream,
216+ const std::unordered_map<int , memory> &args,
217+ const std::vector<cl_event> &deps) const {
218+ if (get_verbose (dnnl::impl::verbose_t ::exec_profile,
219+ dnnl::impl::component_t ::graph)) {
220+ stream.get ()->wait ();
221+ double start_ms = dnnl::impl::get_msec ();
222+ execute_ocl_impl (stream, args, deps);
223+ stream.get ()->wait ();
224+ double duration_ms = dnnl::impl::get_msec () - start_ms;
225+ VPROF (start_ms, graph, exec, VERBOSE_profile, info_.c_str (),
226+ duration_ms);
227+ return nullptr ; // no event returned in profiling mode
228+ } else {
229+ return execute_ocl_impl (stream, args, deps);
230+ }
231+ }
177232#endif
178233
179234arg_indices_t genindex_executable_t::get_arg_indices (const op_t *op) {
0 commit comments