compiler: fix missing parallel omp flag for nested#2932
Conversation
Codecov Report✅ All modified and coverable lines are covered by tests. Additional details and impacted files@@ Coverage Diff @@
## main #2932 +/- ##
=======================================
Coverage 83.35% 83.35%
=======================================
Files 248 248
Lines 51734 51734
Branches 4463 4463
=======================================
+ Hits 43122 43124 +2
+ Misses 7859 7858 -1
+ Partials 753 752 -1
Flags with carried forward coverage won't be shown. Click here to find out more. ☔ View full report in Codecov by Sentry. 🚀 New features to boost your workflow:
|
|
Hmmm, I'm still getting a fail with this branch... |
|
It's possible that it is unrelated now passes, but is failing here |
|
Generated code: print(op.ccode)
/* Devito generated code for Operator `Kernel` */
#define _POSIX_C_SOURCE 200809L
#define START(S) struct timeval start_ ## S , end_ ## S ; gettimeofday(&start_ ## S , NULL);
#define STOP(S,T) gettimeofday(&end_ ## S, NULL); T->S += (double)(end_ ## S .tv_sec-start_ ## S.tv_sec)+(double)(end_ ## S .tv_usec-start_ ## S .tv_usec)/1000000;
#define MAX(a,b) (((a) > (b)) ? (a) : (b))
#include "stdlib.h"
#include "math.h"
#include "sys/time.h"
#include "omp.h"
struct dataobj
{
void *restrict data;
int * size;
unsigned long nbytes;
unsigned long * npsize;
unsigned long * dsize;
int * hsize;
int * hofs;
int * oofs;
void * dmap;
} ;
struct profiler
{
double section0;
} ;
int Kernel(struct dataobj *restrict u_vec, struct dataobj *restrict u_vec, struct dataobj *restrict u_coords_vec, const int x_M, const int x_m, const int y_M, const int y_m, const int z_M, const int z_m, const float h_x, const float h_y, const float h_z, const float o_x, const float o_y, const float o_z, const int p_u_M, const int p_u_m, const int time_M, const int time_m, const int nthreads_nested, const int nthreads_nonaffine, struct profiler * timers)
{
float (*restrict u)[u_vec->size[1]] __attribute__ ((aligned (64))) = (float (*)[u_vec->size[1]]) u_vec->data;
float (*restrict u)[u_vec->size[1]][u_vec->size[2]][u_vec->size[3]] __attribute__ ((aligned (64))) = (float (*)[u_vec->size[1]][u_vec->size[2]][u_vec->size[3]]) u_vec->data;
float (*restrict u_coords)[u_coords_vec->size[1]] __attribute__ ((aligned (64))) = (float (*)[u_coords_vec->size[1]]) u_coords_vec->data;
for (int time = time_m, t0 = (time)%(2); time <= time_M; time += 1, t0 = (time)%(2))
{
START(section0)
#pragma omp parallel num_threads(nthreads_nonaffine)
{
int chunk_size = (int)(MAX(1, (int)((1.0/3.0)*(p_u_M - p_u_m + 1)/nthreads_nonaffine)));
#pragma omp for schedule(dynamic,chunk_size)
for (int p_u = p_u_m; p_u <= p_u_M; p_u += 1)
{
for (int rp_ux = 0; rp_ux <= 1; rp_ux += 1)
{
#pragma omp parallel for collapse(2) schedule(static,1) num_threads(nthreads_nested)
for (int rp_uy = 0; rp_uy <= 1; rp_uy += 1)
{
for (int rp_uz = 0; rp_uz <= 1; rp_uz += 1)
{
int posx = (int)(floorf((-o_x + u_coords[p_u][0])/h_x));
int posy = (int)(floorf((-o_y + u_coords[p_u][1])/h_y));
int posz = (int)(floorf((-o_z + u_coords[p_u][2])/h_z));
float px = -floorf((-o_x + u_coords[p_u][0])/h_x) + (-o_x + u_coords[p_u][0])/h_x;
float py = -floorf((-o_y + u_coords[p_u][1])/h_y) + (-o_y + u_coords[p_u][1])/h_y;
float pz = -floorf((-o_z + u_coords[p_u][2])/h_z) + (-o_z + u_coords[p_u][2])/h_z;
if (rp_ux + posx >= x_m - 1 && rp_uy + posy >= y_m - 1 && rp_uz + posz >= z_m - 1 && rp_ux + posx <= x_M + 1 && rp_uy + posy <= y_M + 1 && rp_uz + posz <= z_M + 1)
{
float r0 = (rp_ux*px + (1 - rp_ux)*(1 - px))*(rp_uy*py + (1 - rp_uy)*(1 - py))*(rp_uz*pz + (1 - rp_uz)*(1 - pz))*u[time][p_u];
#pragma omp atomic update
u[t0][rp_ux + posx + 1][rp_uy + posy + 1][rp_uz + posz + 1] += r0;
}
}
}
}
}
}
STOP(section0,timers)
}
return 0;
} |
|
Ok so for this one, it makes sense that it fails on ppc since it checks the pragmas (which only ppc supports with the nested). So maybe a skipif('ppc'). Can you list all the tests that fail on ppc on top of this one? |
FabioLuporini
left a comment
There was a problem hiding this comment.
incredible, but if it passes all tests -- and there are many -- I guess it's OK...
No description provided.