Skip to content

Commit cd1c07a

Browse files
committed
miscellaneous fixes
- use pthread instread of omp in multi_monitor.cpp test - fix path for hipcc - use papi@master as default version of papi - print the test name in run script
1 parent d151772 commit cd1c07a

File tree

5 files changed

+146
-113
lines changed

5 files changed

+146
-113
lines changed

validation_tests/papi-rocm/Makefile

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
PAPI_ROCM_ROOT ?= $(ROCM_ROOT) #/path/to/rocm/
22

3-
CC = $(PAPI_ROCM_ROOT)/bin/hipcc
4-
CXX = $(PAPI_ROCM_ROOT)/bin/hipcc
5-
CXXFLAGS += -g -O2 -fopenmp
3+
CC = $(HIP_ROOT)/bin/hipcc
4+
CXX = $(HIP_ROOT)/bin/hipcc
5+
CXXFLAGS += -g -O2
66
CPPFLAGS += -I$(PAPI_ROCM_ROOT)/include -I$(PAPI_ROOT)/include
7-
LDFLAGS += -L$(PAPI_ROOT)/lib -lpapi -fopenmp
7+
LDFLAGS += -L$(PAPI_ROOT)/lib -lpapi
88

99
ALL: single_monitor multi_monitor overflow
1010

validation_tests/papi-rocm/compile.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@
33
set -e
44
set -x
55

6-
make PAPIROOT=$PAPI_ROOT
6+
make PAPIROOT=$PAPI_ROOT

validation_tests/papi-rocm/multi_monitor.cpp

Lines changed: 130 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,119 @@
22
#include <stdlib.h>
33
#include <string.h>
44
#include <unistd.h>
5-
#include <omp.h>
5+
#include <pthread.h>
66
#include "papi.h"
77
#include "matmul.h"
88

9+
#define NUM_EVENTS 2
10+
const char *events[NUM_EVENTS] = {
11+
"rocm:::SQ_WAVES",
12+
"rocm:::SQ_WAVES_RESTORED",
13+
};
14+
15+
typedef struct {
16+
int num_thread;
17+
} thread_arg_t;
18+
19+
void *run(void *arg)
20+
{
21+
int eventset = PAPI_NULL;
22+
int papi_errno = PAPI_create_eventset(&eventset);
23+
if (papi_errno != PAPI_OK) {
24+
fprintf(stderr, "ERROR: PAPI_create_eventset: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
25+
exit(EXIT_FAILURE);
26+
}
27+
28+
int thread_num = ((thread_arg_t *) arg)->num_thread;
29+
for (int j = 0; j < NUM_EVENTS; ++j) {
30+
char named_event[PAPI_MAX_STR_LEN] = { 0 };
31+
sprintf(named_event, "%s:device=%d", events[j], thread_num);
32+
papi_errno = PAPI_add_named_event(eventset, (const char *) named_event);
33+
if (papi_errno != PAPI_OK && papi_errno != PAPI_ENOEVNT) {
34+
fprintf(stderr, "ERROR: PAPI_add_named_event: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
35+
exit(EXIT_FAILURE);
36+
}
37+
}
38+
39+
papi_errno = PAPI_start(eventset);
40+
if (papi_errno != PAPI_OK) {
41+
fprintf(stderr, "ERROR: PAPI_start: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
42+
exit(EXIT_FAILURE);
43+
}
44+
45+
hipError_t hip_errno = hipSetDevice(thread_num);
46+
if (hip_errno != hipSuccess) {
47+
fprintf(stderr, "ERROR: hipSetDevice: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
48+
exit(EXIT_FAILURE);
49+
}
50+
51+
hipStream_t stream;
52+
hip_errno = hipStreamCreate(&stream);
53+
if (hip_errno != hipSuccess) {
54+
fprintf(stderr, "ERROR: hipStreamCreate: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
55+
exit(EXIT_FAILURE);
56+
}
57+
58+
void *handle;
59+
int matmul_errno;
60+
matmul_errno = matmul_init(&handle);
61+
if (matmul_errno != MATMUL_SUCCESS) {
62+
fprintf(stderr, "ERROR: matmul_init: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
63+
exit(EXIT_FAILURE);
64+
}
65+
66+
matmul_errno = matmul_run(handle, stream);
67+
if (matmul_errno != MATMUL_SUCCESS) {
68+
fprintf(stderr, "ERROR: matmul_run: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
69+
exit(EXIT_FAILURE);
70+
}
71+
72+
hip_errno = hipStreamSynchronize(stream);
73+
if (hip_errno != hipSuccess) {
74+
fprintf(stderr, "ERROR: hipStreamSynchronize: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
75+
exit(EXIT_FAILURE);
76+
}
77+
78+
hip_errno = hipStreamDestroy(stream);
79+
if (hip_errno != hipSuccess) {
80+
fprintf(stderr, "ERROR: hipStreamDestroy: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
81+
exit(EXIT_FAILURE);
82+
}
83+
84+
matmul_errno = matmul_finalize(&handle);
85+
if (matmul_errno != MATMUL_SUCCESS) {
86+
fprintf(stderr, "ERROR: matmul_finalize: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
87+
exit(EXIT_FAILURE);
88+
}
89+
90+
long long counters[NUM_EVENTS] = { 0 };
91+
papi_errno = PAPI_stop(eventset, counters);
92+
if (papi_errno != PAPI_OK) {
93+
fprintf(stderr, "ERROR: PAPI_stop: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
94+
exit(EXIT_FAILURE);
95+
}
96+
97+
for (int i = 0; i < NUM_EVENTS; ++i) {
98+
fprintf(stdout, "[tid:%d] %s:device=%d : %lld\n",
99+
thread_num, events[i], thread_num,
100+
counters[i]);
101+
}
102+
103+
papi_errno = PAPI_cleanup_eventset(eventset);
104+
if (papi_errno != PAPI_OK) {
105+
fprintf(stderr, "ERROR: PAPI_cleanup_eventset: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
106+
exit(EXIT_FAILURE);
107+
}
108+
109+
papi_errno = PAPI_destroy_eventset(&eventset);
110+
if (papi_errno != PAPI_OK) {
111+
fprintf(stderr, "ERROR: PAPI_destroy_eventset: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
112+
exit(EXIT_FAILURE);
113+
}
114+
115+
pthread_exit(NULL);
116+
}
117+
9118
int main(int argc, char *argv[])
10119
{
11120
int papi_errno;
@@ -30,7 +139,7 @@ int main(int argc, char *argv[])
30139
exit(EXIT_FAILURE);
31140
}
32141

33-
papi_errno = PAPI_thread_init((unsigned long (*)(void)) omp_get_thread_num);
142+
papi_errno = PAPI_thread_init((unsigned long (*)(void)) pthread_self);
34143
if (papi_errno != PAPI_OK) {
35144
fprintf(stderr, "ERROR: PAPI_thread_init: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
36145
exit(EXIT_FAILURE);
@@ -44,112 +153,33 @@ int main(int argc, char *argv[])
44153
}
45154

46155
num_threads = (num_threads < num_devices) ? num_threads : num_devices;
47-
omp_set_num_threads(num_threads);
48156
fprintf(stdout, "Run rocm test with %d threads\n", num_threads);
49157

50-
#define NUM_EVENTS 2
51-
const char *events[NUM_EVENTS] = {
52-
"rocm:::SQ_WAVES",
53-
"rocm:::SQ_WAVES_RESTORED",
54-
};
55-
56-
#pragma omp parallel
57-
{
58-
int eventset = PAPI_NULL;
59-
papi_errno = PAPI_create_eventset(&eventset);
60-
if (papi_errno != PAPI_OK) {
61-
fprintf(stderr, "ERROR: PAPI_create_eventset: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
62-
exit(EXIT_FAILURE);
63-
}
64-
65-
int thread_num = omp_get_thread_num();
66-
for (int j = 0; j < NUM_EVENTS; ++j) {
67-
char named_event[PAPI_MAX_STR_LEN] = { 0 };
68-
sprintf(named_event, "%s:device=%d", events[j], thread_num);
69-
papi_errno = PAPI_add_named_event(eventset, (const char *) named_event);
70-
if (papi_errno != PAPI_OK && papi_errno != PAPI_ENOEVNT) {
71-
fprintf(stderr, "ERROR: PAPI_add_named_event: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
72-
exit(EXIT_FAILURE);
73-
}
74-
}
75-
76-
papi_errno = PAPI_start(eventset);
77-
if (papi_errno != PAPI_OK) {
78-
fprintf(stderr, "ERROR: PAPI_start: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
79-
exit(EXIT_FAILURE);
80-
}
81-
82-
hip_errno = hipSetDevice(thread_num);
83-
if (hip_errno != hipSuccess) {
84-
fprintf(stderr, "ERROR: hipSetDevice: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
85-
exit(EXIT_FAILURE);
86-
}
87-
88-
hipStream_t stream;
89-
hip_errno = hipStreamCreate(&stream);
90-
if (hip_errno != hipSuccess) {
91-
fprintf(stderr, "ERROR: hipStreamCreate: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
92-
exit(EXIT_FAILURE);
93-
}
94-
95-
void *handle;
96-
int matmul_errno;
97-
matmul_errno = matmul_init(&handle);
98-
if (matmul_errno != MATMUL_SUCCESS) {
99-
fprintf(stderr, "ERROR: matmul_init: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
100-
exit(EXIT_FAILURE);
101-
}
102-
103-
matmul_errno = matmul_run(handle, stream);
104-
if (matmul_errno != MATMUL_SUCCESS) {
105-
fprintf(stderr, "ERROR: matmul_run: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
106-
exit(EXIT_FAILURE);
107-
}
108-
109-
hip_errno = hipStreamSynchronize(stream);
110-
if (hip_errno != hipSuccess) {
111-
fprintf(stderr, "ERROR: hipStreamSynchronize: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
112-
exit(EXIT_FAILURE);
113-
}
114-
115-
hip_errno = hipStreamDestroy(stream);
116-
if (hip_errno != hipSuccess) {
117-
fprintf(stderr, "ERROR: hipStreamDestroy: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
118-
exit(EXIT_FAILURE);
119-
}
120-
121-
matmul_errno = matmul_finalize(&handle);
122-
if (matmul_errno != MATMUL_SUCCESS) {
123-
fprintf(stderr, "ERROR: matmul_finalize: %d: %s\n", PAPI_EMISC, PAPI_strerror(PAPI_EMISC));
124-
exit(EXIT_FAILURE);
125-
}
158+
pthread_t *thread = (pthread_t *)malloc(num_threads * sizeof(*thread));
159+
if (thread == NULL) {
160+
return EXIT_FAILURE;
161+
}
126162

127-
long long counters[NUM_EVENTS] = { 0 };
128-
papi_errno = PAPI_stop(eventset, counters);
129-
if (papi_errno != PAPI_OK) {
130-
fprintf(stderr, "ERROR: PAPI_stop: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
131-
exit(EXIT_FAILURE);
132-
}
163+
thread_arg_t *arg = (thread_arg_t *)malloc(num_threads * sizeof(*arg));
164+
if (arg == NULL) {
165+
return EXIT_FAILURE;
166+
}
133167

134-
for (int i = 0; i < NUM_EVENTS; ++i) {
135-
fprintf(stdout, "[tid:%d] %s:device=%d : %lld\n",
136-
omp_get_thread_num(), events[i], thread_num,
137-
counters[i]);
138-
}
168+
pthread_attr_t attr;
169+
pthread_attr_init(&attr);
170+
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
139171

140-
papi_errno = PAPI_cleanup_eventset(eventset);
141-
if (papi_errno != PAPI_OK) {
142-
fprintf(stderr, "ERROR: PAPI_cleanup_eventset: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
143-
exit(EXIT_FAILURE);
144-
}
172+
for (int i = 0; i < num_threads; ++i) {
173+
arg[i].num_thread = i;
174+
pthread_create(&thread[i], &attr, run, &arg[i]);
175+
}
145176

146-
papi_errno = PAPI_destroy_eventset(&eventset);
147-
if (papi_errno != PAPI_OK) {
148-
fprintf(stderr, "ERROR: PAPI_destroy_eventset: %d: %s\n", papi_errno, PAPI_strerror(papi_errno));
149-
exit(EXIT_FAILURE);
150-
}
177+
for (int i = 0; i < num_threads; ++i) {
178+
pthread_join(thread[i], NULL);
151179
}
152180

181+
free(thread);
182+
free(arg);
153183
PAPI_shutdown();
154184

155185
return EXIT_SUCCESS;
Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
#!/bin/bash
22

3+
HSA_ROOT=$PAPI_ROCM_ROOT
4+
PAPI_ROOT="$(dirname $(dirname `which papi_component_avail`))"
5+
36
echo "=== Run rocm test in sampling mode"
4-
ROCP_HSA_INTERCEPT=0 LD_LIBRARY_PATH=$PAPI_ROOT/lib ./single_monitor
5-
ROCP_HSA_INTERCEPT=0 LD_LIBRARY_PATH=$PAPI_ROOT/lib ./multi_monitor --threads=1
6-
ROCP_HSA_INTERCEPT=0 LD_LIBRARY_PATH=$PAPI_ROOT/lib ./multi_monitor --threads=2
7-
ROCP_HSA_INTERCEPT=0 LD_LIBRARY_PATH=$PAPI_ROOT/lib ./overflow
7+
echo "single monitor" && ROCP_HSA_INTERCEPT=0 LD_LIBRARY_PATH=$PAPI_ROOT/lib:$HSA_ROOT/lib:$LD_LIBRARY_PATH ./single_monitor && echo ""
8+
echo "multi monitor" && ROCP_HSA_INTERCEPT=0 LD_LIBRARY_PATH=$PAPI_ROOT/lib:$HSA_ROOT/lib:$LD_LIBRARY_PATH ./multi_monitor --threads=1 && echo ""
9+
echo "multi monitor" && ROCP_HSA_INTERCEPT=0 LD_LIBRARY_PATH=$PAPI_ROOT/lib:$HSA_ROOT/lib:$LD_LIBRARY_PATH ./multi_monitor --threads=2 && echo ""
10+
echo "overflow" && ROCP_HSA_INTERCEPT=0 LD_LIBRARY_PATH=$PAPI_ROOT/lib:$HSA_ROOT/lib:$LD_LIBRARY_PATH ./overflow && echo ""
811

912
echo "=== Run rocm test in intercept mode"
10-
ROCP_HSA_INTERCEPT=1 LD_LIBRARY_PATH=$PAPI_ROOT/lib ./single_monitor
11-
ROCP_HSA_INTERCEPT=1 LD_LIBRARY_PATH=$PAPI_ROOT/lib ./multi_monitor --threads=1
12-
ROCP_HSA_INTERCEPT=1 LD_LIBRARY_PATH=$PAPI_ROOT/lib ./multi_monitor --threads=2
13+
echo "single monitor" && ROCP_HSA_INTERCEPT=1 LD_LIBRARY_PATH=$PAPI_ROOT/lib:$HSA_ROOT/lib:$LD_LIBRARY_PATH ./single_monitor && echo ""
14+
echo "multi monitor" && ROCP_HSA_INTERCEPT=1 LD_LIBRARY_PATH=$PAPI_ROOT/lib:$HSA_ROOT/lib:$LD_LIBRARY_PATH ./multi_monitor --threads=1 && echo ""
15+
echo "multi monitor" && ROCP_HSA_INTERCEPT=1 LD_LIBRARY_PATH=$PAPI_ROOT/lib:$HSA_ROOT/lib:$LD_LIBRARY_PATH ./multi_monitor --threads=2 && echo ""
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
#!/bin/bash
22
. ../../setup.sh
3-
spackLoadUnique papi+rocm@6.0.0.2:
3+
spackLoadUnique papi+rocm@master:

0 commit comments

Comments
 (0)