compiler: fix missing parallel omp flag for nested by mloubout · Pull Request #2932 · devitocodes/devito

mloubout · 2026-05-14T13:17:36Z

No description provided.

codecov · 2026-05-14T13:37:54Z

Codecov Report

✅ All modified and coverable lines are covered by tests.
✅ Project coverage is 83.35%. Comparing base (ddb2459) to head (13f9357).

Additional details and impacted files

@@           Coverage Diff           @@
##             main    #2932   +/-   ##
=======================================
  Coverage   83.35%   83.35%           
=======================================
  Files         248      248           
  Lines       51734    51734           
  Branches     4463     4463           
=======================================
+ Hits        43122    43124    +2     
+ Misses       7859     7858    -1     
+ Partials      753      752    -1

Flag	Coverage Δ
pytest-gpu-aomp-amdgpuX	`68.70% <ø> (+0.01%)`	⬆️
pytest-gpu-gcc-	`78.04% <ø> (-0.02%)`	⬇️
pytest-gpu-icx-	`77.96% <ø> (-0.01%)`	⬇️
pytest-gpu-nvc-nvidiaX	`69.24% <ø> (ø)`

Flags with carried forward coverage won't be shown. Click here to find out more.

☔ View full report in Codecov by Sentry.
📢 Have feedback on the report? Share it here.

🚀 New features to boost your workflow:

❄️ Test Analytics: Detect flaky tests, report on failures, and find test suite problems.

JDBetteridge · 2026-05-14T14:17:04Z

Hmmm, I'm still getting a fail with this branch...

JDBetteridge · 2026-05-14T14:23:22Z

It's possible that it is unrelated

tests/test_dle.py::TestNodeParallelism::test_dynamic_nthreads

now passes, but

tests/test_dle.py::TestNodeParallelism::test_incr_perfect_sparse_outer

is failing here

JDBetteridge · 2026-05-14T14:26:10Z

Generated code:

print(op.ccode)
/* Devito generated code for Operator `Kernel` */

#define _POSIX_C_SOURCE 200809L
#define START(S) struct timeval start_ ## S , end_ ## S ; gettimeofday(&start_ ## S , NULL);
#define STOP(S,T) gettimeofday(&end_ ## S, NULL); T->S += (double)(end_ ## S .tv_sec-start_ ## S.tv_sec)+(double)(end_ ## S .tv_usec-start_ ## S .tv_usec)/1000000;
#define MAX(a,b) (((a) > (b)) ? (a) : (b))

#include "stdlib.h"
#include "math.h"
#include "sys/time.h"
#include "omp.h"

struct dataobj
{
  void *restrict data;
  int * size;
  unsigned long nbytes;
  unsigned long * npsize;
  unsigned long * dsize;
  int * hsize;
  int * hofs;
  int * oofs;
  void * dmap;
} ;

struct profiler
{
  double section0;
} ;


int Kernel(struct dataobj *restrict u_vec, struct dataobj *restrict u_vec, struct dataobj *restrict u_coords_vec, const int x_M, const int x_m, const int y_M, const int y_m, const int z_M, const int z_m, const float h_x, const float h_y, const float h_z, const float o_x, const float o_y, const float o_z, const int p_u_M, const int p_u_m, const int time_M, const int time_m, const int nthreads_nested, const int nthreads_nonaffine, struct profiler * timers)
{
  float (*restrict u)[u_vec->size[1]] __attribute__ ((aligned (64))) = (float (*)[u_vec->size[1]]) u_vec->data;
  float (*restrict u)[u_vec->size[1]][u_vec->size[2]][u_vec->size[3]] __attribute__ ((aligned (64))) = (float (*)[u_vec->size[1]][u_vec->size[2]][u_vec->size[3]]) u_vec->data;
  float (*restrict u_coords)[u_coords_vec->size[1]] __attribute__ ((aligned (64))) = (float (*)[u_coords_vec->size[1]]) u_coords_vec->data;

  for (int time = time_m, t0 = (time)%(2); time <= time_M; time += 1, t0 = (time)%(2))
  {
    START(section0)
    #pragma omp parallel num_threads(nthreads_nonaffine)
    {
      int chunk_size = (int)(MAX(1, (int)((1.0/3.0)*(p_u_M - p_u_m + 1)/nthreads_nonaffine)));
      #pragma omp for schedule(dynamic,chunk_size)
      for (int p_u = p_u_m; p_u <= p_u_M; p_u += 1)
      {
        for (int rp_ux = 0; rp_ux <= 1; rp_ux += 1)
        {
          #pragma omp parallel for collapse(2) schedule(static,1) num_threads(nthreads_nested)
          for (int rp_uy = 0; rp_uy <= 1; rp_uy += 1)
          {
            for (int rp_uz = 0; rp_uz <= 1; rp_uz += 1)
            {
              int posx = (int)(floorf((-o_x + u_coords[p_u][0])/h_x));
              int posy = (int)(floorf((-o_y + u_coords[p_u][1])/h_y));
              int posz = (int)(floorf((-o_z + u_coords[p_u][2])/h_z));
              float px = -floorf((-o_x + u_coords[p_u][0])/h_x) + (-o_x + u_coords[p_u][0])/h_x;
              float py = -floorf((-o_y + u_coords[p_u][1])/h_y) + (-o_y + u_coords[p_u][1])/h_y;
              float pz = -floorf((-o_z + u_coords[p_u][2])/h_z) + (-o_z + u_coords[p_u][2])/h_z;
              if (rp_ux + posx >= x_m - 1 && rp_uy + posy >= y_m - 1 && rp_uz + posz >= z_m - 1 && rp_ux + posx <= x_M + 1 && rp_uy + posy <= y_M + 1 && rp_uz + posz <= z_M + 1)
              {
                float r0 = (rp_ux*px + (1 - rp_ux)*(1 - px))*(rp_uy*py + (1 - rp_uy)*(1 - py))*(rp_uz*pz + (1 - rp_uz)*(1 - pz))*u[time][p_u];
                #pragma omp atomic update
                u[t0][rp_ux + posx + 1][rp_uy + posy + 1][rp_uz + posz + 1] += r0;
              }
            }
          }
        }
      }
    }
    STOP(section0,timers)
  }

  return 0;
}

mloubout · 2026-05-14T15:37:26Z

Ok so for this one, it makes sense that it fails on ppc since it checks the pragmas (which only ppc supports with the nested). So maybe a skipif('ppc'). Can you list all the tests that fail on ppc on top of this one?

FabioLuporini

incredible, but if it passes all tests -- and there are many -- I guess it's OK...

compiler: fix missing parallel omp flag for nested

13f9357

mloubout added the compiler label May 14, 2026

JDBetteridge approved these changes May 14, 2026

View reviewed changes

FabioLuporini approved these changes May 15, 2026

View reviewed changes

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

compiler: fix missing parallel omp flag for nested#2932

compiler: fix missing parallel omp flag for nested#2932
mloubout wants to merge 1 commit into
mainfrom
patch-nested-parallel

mloubout commented May 14, 2026

Uh oh!

codecov Bot commented May 14, 2026 •

edited

Loading

Uh oh!

JDBetteridge commented May 14, 2026

Uh oh!

JDBetteridge commented May 14, 2026

Uh oh!

JDBetteridge commented May 14, 2026

Uh oh!

mloubout commented May 14, 2026

Uh oh!

FabioLuporini left a comment

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants

Conversation

mloubout commented May 14, 2026

Uh oh!

codecov Bot commented May 14, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Codecov Report

Uh oh!

JDBetteridge commented May 14, 2026

Uh oh!

JDBetteridge commented May 14, 2026

Uh oh!

JDBetteridge commented May 14, 2026

Uh oh!

mloubout commented May 14, 2026

Uh oh!

FabioLuporini left a comment

Choose a reason for hiding this comment

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants

codecov Bot commented May 14, 2026 •

edited

Loading