@@ -765,6 +765,7 @@ class ImageDecoder : public StatelessOperator<Backend> {
765765
766766 // The image descriptors are created in parallel, in block-wise fashion.
767767 auto init_desc_task = [&](int start_sample, int end_sample) {
768+ DomainTimeRange tr (" Create images " + std::to_string (start_sample) + " .." + std::to_string (end_sample), DomainTimeRange::kOrange );
768769 for (int orig_idx = start_sample; orig_idx < end_sample; orig_idx++) {
769770 auto &st = *state_[orig_idx];
770771 if (use_cache && st.load_from_cache ) {
@@ -799,19 +800,24 @@ class ImageDecoder : public StatelessOperator<Backend> {
799800 } else {
800801 DomainTimeRange tr (" Create images" , DomainTimeRange::kOrange );
801802 // Many tasks? Run in thread pool.
802- // The first span of tasks is processed in the main operator thread.
803- for (int i = 1 ; i < ntasks; i++) {
804- int start = i * nsamples / ntasks;
805- int end = (i + 1 ) * nsamples / ntasks;
806- tp_->AddWork ([&, start, end](int ) {
803+ int block_idx = 0 ;
804+ atomic_idx_.store (0 );
805+ auto create_images_task = [&, nblocks](int tid) {
806+ int block_idx;
807+ while ((block_idx = atomic_idx_.fetch_add (1 )) < nblocks) {
808+ int64_t start = nsamples * block_idx / nblocks;
809+ int64_t end = nsamples * (block_idx + 1 ) / nblocks;
807810 init_desc_task (start, end);
808- });
811+ }
812+ };
813+
814+ for (int task_idx = 0 ; task_idx < ntasks - 1 ; task_idx++) {
815+ tp_->AddWork (create_images_task, -task_idx);
809816 }
810- // Start processing of subsequent segments...
811- tp_->RunAll (false );
812- // ...and process the 1st segment in this thread.
813- init_desc_task (0 , nsamples / ntasks);
814- tp_->WaitForWork ();
817+ assert (ntasks >= 2 );
818+ tp_->RunAll (false ); // start work but not wait
819+ create_images_task (-1 );
820+ tp_->WaitForWork (); // wait for the other threads
815821 }
816822
817823 bool any_need_processing = false ;
0 commit comments