Skip to content

Commit 6cc9698

Browse files
authored
Merge pull request #347 from fjtrujy/fix_pg
Fix gprof profiling to work with both PBP and PRX executables
2 parents 0f2f391 + 324c580 commit 6cc9698

File tree

5 files changed

+262
-23
lines changed

5 files changed

+262
-23
lines changed

src/prof/prof.c

Lines changed: 59 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,17 @@ static int initialized = 0;
8181
extern int _ftext;
8282
extern int _etext;
8383

84+
/// _start is the entry point defined in both crt0.c and crt0_prx.c
85+
extern int _start;
86+
87+
/// module_start is only defined in PRX startup code (crt0_prx.c) as an alias for _start
88+
/// Using weak reference allows us to detect PRX vs PBP at runtime
89+
/// We also verify module_start == _start to handle the case where a PBP defines its own module_start
90+
extern int module_start __attribute__((weak));
91+
92+
/// relocation offset: runtime_address - link_address (for PRX)
93+
static unsigned int reloc_offset;
94+
8495
/* forward declarations */
8596
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
8697
void __gprof_cleanup(void);
@@ -104,20 +115,44 @@ static void initialize()
104115
memset(&gp, '\0', sizeof(gp));
105116
gp.state = GMON_PROF_ON;
106117

107-
/* For PRX modules, the code is relocated at load time.
108-
&_ftext and &_etext give us runtime (relocated) addresses.
109-
Link-time addresses start at 0 for PSP executables.
110-
111-
Runtime: &_ftext = relocated_base, &_etext = relocated_base + text_size
112-
Link-time: _ftext = 0, _etext = text_size
113-
114-
So: lowpc_link = 0
115-
highpc_link = &_etext - &_ftext (which equals text_size)
118+
/* Detect PRX vs PBP using the module_start symbol.
119+
In crt0_prx.c, module_start is defined as an alias for _start,
120+
so they share the same address. We check both conditions:
121+
1. module_start exists (not NULL)
122+
2. module_start and _start are at the same address
123+
This handles edge cases where a PBP defines its own module_start function.
124+
125+
For PRX (relocatable modules):
126+
- Linked at address 0 (or small offset like 0x20 for C++)
127+
- Relocated at load time to a PAGE-ALIGNED address (e.g., 0x08804000)
128+
- reloc_offset = page-aligned load base
129+
- link_addr = runtime_addr - reloc_offset
130+
131+
For PBP (standard executables):
132+
- Linked at address 0x08804000 + offset (typically _ftext = 0x0880403c)
133+
- Loaded at same address (no relocation)
134+
- reloc_offset = 0
135+
- link_addr = runtime_addr
116136
*/
117137
gp.lowpc = (unsigned int)&_ftext;
118138
gp.highpc = (unsigned int)&_etext;
119-
gp.lowpc_link = 0;
120-
gp.highpc_link = (unsigned int)&_etext - (unsigned int)&_ftext;
139+
140+
unsigned int runtime_base = (unsigned int)&_ftext & 0x0FFFFFFF;
141+
142+
if (&module_start != NULL && (void *)&module_start == (void *)&_start) {
143+
/* PRX: code was relocated from near-zero to runtime address.
144+
The load base is page-aligned, so we can compute it by masking
145+
off the page offset. The link-time _ftext is the page offset. */
146+
unsigned int page_offset = runtime_base & 0xFFF;
147+
reloc_offset = runtime_base - page_offset; /* Page-aligned load base */
148+
gp.lowpc_link = page_offset; /* Link-time _ftext */
149+
gp.highpc_link = (gp.highpc - gp.lowpc) + page_offset;
150+
} else {
151+
/* PBP: no relocation, addresses match ELF */
152+
reloc_offset = 0;
153+
gp.lowpc_link = runtime_base;
154+
gp.highpc_link = (unsigned int)&_etext & 0x0FFFFFFF;
155+
}
121156
gp.textsize = gp.highpc - gp.lowpc;
122157
gp.hashfraction = HISTFRACTION;
123158

@@ -278,16 +313,17 @@ void __mcount(unsigned int frompc, unsigned int selfpc)
278313
return;
279314
}
280315

281-
/* Mask upper bits and convert to link-time addresses.
282-
Link-time addresses = runtime addresses - gp.lowpc (since lowpc_link = 0) */
283-
frompc = (frompc & 0x0FFFFFFF) - gp.lowpc;
284-
selfpc = (selfpc & 0x0FFFFFFF) - gp.lowpc;
316+
/* Mask upper bits to normalize cached/uncached addresses,
317+
then subtract relocation offset to get link-time addresses. */
318+
frompc = (frompc & 0x0FFFFFFF) - reloc_offset;
319+
selfpc = (selfpc & 0x0FFFFFFF) - reloc_offset;
285320

286-
/* Check if within text section (using link-time range) */
287-
if (frompc <= gp.highpc_link)
321+
/* Check if within text section (using link-time addresses) */
322+
if (frompc >= gp.lowpc_link && frompc < gp.highpc_link)
288323
{
289324
gp.pc = selfpc;
290-
e = frompc / gp.hashfraction;
325+
/* Arc index is based on offset within text section */
326+
e = (frompc - gp.lowpc_link) / gp.hashfraction;
291327
arc = gp.arcs + e;
292328
arc->frompc = frompc;
293329
arc->selfpc = selfpc;
@@ -300,14 +336,15 @@ void __mcount(unsigned int frompc, unsigned int selfpc)
300336
__attribute__((__no_instrument_function__, __no_profile_instrument_function__))
301337
static SceUInt timer_handler(SceUID uid, SceKernelSysClock *requested, SceKernelSysClock *actual, void *common)
302338
{
303-
unsigned int frompc = gp.pc; /* Already in link-time address */
339+
unsigned int pc = gp.pc; /* Already converted to link-time address by __mcount */
304340

305341
if (gp.state == GMON_PROF_ON)
306342
{
307-
/* Check if within text section (using link-time range) */
308-
if (frompc <= gp.highpc_link)
343+
/* Check if within text section (using link-time addresses) */
344+
if (pc >= gp.lowpc_link && pc < gp.highpc_link)
309345
{
310-
int e = frompc / gp.hashfraction;
346+
/* Sample index is based on offset within text section */
347+
int e = (pc - gp.lowpc_link) / gp.hashfraction;
311348
gp.samples[e]++;
312349
}
313350
}

src/samples/gprof/basic/Makefile.sample

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,17 @@
11
TARGET = gprofbasic
22
OBJS = main.o
33

4-
INCDIR =
4+
INCDIR =
55
CFLAGS = -O2 -Wall -pg -g
66
CXXFLAGS = $(CFLAGS) -fno-exceptions -fno-rtti
77
ASFLAGS = $(CFLAGS)
88

99
LIBDIR =
1010
LDFLAGS = -pg -g
1111

12+
BUILD_PRX = 1
13+
PSP_FW_VERSION = 600
14+
1215
EXTRA_TARGETS = EBOOT.PBP
1316
PSP_EBOOT_TITLE = GProf Basic Example
1417

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
TARGET = gprofbasic_cpp
2+
OBJS = main.o
3+
4+
INCDIR =
5+
CFLAGS = -O2 -Wall -pg -g
6+
CXXFLAGS = $(CFLAGS) -fno-exceptions -fno-rtti
7+
ASFLAGS = $(CFLAGS)
8+
9+
LIBDIR =
10+
LDFLAGS = -pg -g
11+
LIBS = -lstdc++
12+
13+
BUILD_PRX = 1
14+
PSP_FW_VERSION = 600
15+
16+
EXTRA_TARGETS = EBOOT.PBP
17+
PSP_EBOOT_TITLE = GProf Basic C++ Example
18+
19+
PSPSDK=$(shell psp-config --pspsdk-path)
20+
include $(PSPSDK)/lib/build.mak
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
Sample program to show how to use the `gprof` feature with C++.
2+
3+
The requirements are quite easy, just adding `-g -pg` flags to the `CFLAGS` and `LDFLAGS` is enough to make things work out of the box. For C++ programs, you also need to link with `-lstdc++`.
4+
5+
This example demonstrates:
6+
- Object-oriented programming with a `PrimeCalculator` class
7+
- C++ standard library features (`std::vector`, `std::cout`)
8+
- Profiling C++ code with gprof
9+
10+
Firstly execute your program, then once program ends it will automatically generate a `gmon.out` file at CWD level.
11+
12+
In order to inspect the content of the generated file you need to use the `psp-gprof` binary.
13+
14+
For instance, following the next syntax:
15+
```
16+
psp-gprof -b {binary.elf} gmon.out
17+
```
18+
19+
like:
20+
```
21+
psp-gprof -b gprofbasic_cpp.elf gmon.out
22+
```
23+
24+
25+
It will show something like:
26+
```
27+
Flat profile:
28+
29+
Each sample counts as 0.001 seconds.
30+
% cumulative self self total
31+
time seconds seconds calls ms/call ms/call name
32+
95.98 0.17 0.17 104728 0.00 0.00 PrimeCalculator::isPrime(int) const
33+
4.02 0.17 0.01 1 7.00 7.00 dummyFunction()
34+
0.00 0.17 0.00 1 0.00 174.00 main
35+
0.00 0.17 0.00 1 0.00 167.00 PrimeCalculator::sumOfSquareRoots(int) const
36+
37+
38+
Call graph
39+
40+
41+
granularity: each sample hit covers 2 byte(s) for 0.57% of 0.17 seconds
42+
43+
index % time self children called name
44+
0.00 0.17 1/1 _main [2]
45+
[1] 100.0 0.00 0.17 1 main [1]
46+
0.00 0.17 1/1 PrimeCalculator::sumOfSquareRoots(int) const [4]
47+
0.01 0.00 1/1 dummyFunction() [5]
48+
-----------------------------------------------
49+
<spontaneous>
50+
[2] 100.0 0.00 0.17 _main [2]
51+
0.00 0.17 1/1 main [1]
52+
-----------------------------------------------
53+
0.17 0.00 104728/104728 PrimeCalculator::sumOfSquareRoots(int) const [4]
54+
[3] 96.0 0.17 0.00 104728 PrimeCalculator::isPrime(int) const [3]
55+
-----------------------------------------------
56+
0.00 0.17 1/1 main [1]
57+
[4] 96.0 0.00 0.17 1 PrimeCalculator::sumOfSquareRoots(int) const [4]
58+
0.17 0.00 104728/104728 PrimeCalculator::isPrime(int) const [3]
59+
-----------------------------------------------
60+
0.01 0.00 1/1 main [1]
61+
[5] 4.0 0.01 0.00 1 dummyFunction() [5]
62+
-----------------------------------------------
63+
64+
65+
Index by function name
66+
67+
[5] dummyFunction() [1] main
68+
[3] PrimeCalculator::isPrime(int) const
69+
[4] PrimeCalculator::sumOfSquareRoots(int) const
70+
```
71+
72+
Cheers
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
/*
2+
* PSP Software Development Kit - https://github.com/pspdev
3+
* -----------------------------------------------------------------------
4+
* Licensed under the BSD license, see LICENSE in PSPSDK root for details.
5+
*
6+
* main.cpp - A basic C++ example for checking the GProf profiler.
7+
*
8+
* Copyright (c) 2024 Francisco Javier Trujillo Mata - fjtrujy@gmail.com
9+
*
10+
*/
11+
#include <cstdlib>
12+
#include <cstring>
13+
#include <cstdio>
14+
#include <cmath>
15+
#include <iostream>
16+
#include <vector>
17+
18+
#include <pspmoduleinfo.h>
19+
#include <pspthreadman.h>
20+
21+
PSP_MODULE_INFO("GProf Basic C++ Example", 0, 1, 1);
22+
PSP_MAIN_THREAD_ATTR(THREAD_ATTR_USER | THREAD_ATTR_VFPU);
23+
24+
// Class to handle prime number operations
25+
class PrimeCalculator
26+
{
27+
public:
28+
// Method to check if a number is prime
29+
bool isPrime(int num) const
30+
{
31+
if (num <= 1)
32+
return false;
33+
if (num <= 3)
34+
return true;
35+
if (num % 2 == 0 || num % 3 == 0)
36+
return false;
37+
for (int i = 5; i * i <= num; i += 6)
38+
{
39+
if (num % i == 0 || num % (i + 2) == 0)
40+
return false;
41+
}
42+
return true;
43+
}
44+
45+
// Method to compute the sum of square roots of the first N prime numbers
46+
double sumOfSquareRoots(int N) const
47+
{
48+
int count = 0;
49+
int num = 2;
50+
double sum = 0.0;
51+
52+
while (count < N)
53+
{
54+
if (isPrime(num))
55+
{
56+
sum += std::sqrt(num);
57+
count++;
58+
}
59+
num++;
60+
}
61+
return sum;
62+
}
63+
64+
// Method to get the first N prime numbers
65+
std::vector<int> getFirstNPrimes(int N) const
66+
{
67+
std::vector<int> primes;
68+
int num = 2;
69+
70+
while (primes.size() < static_cast<size_t>(N))
71+
{
72+
if (isPrime(num))
73+
{
74+
primes.push_back(num);
75+
}
76+
num++;
77+
}
78+
return primes;
79+
}
80+
};
81+
82+
void dummyFunction()
83+
{
84+
for (int i = 0; i < 10000; i++)
85+
{
86+
std::cout << ".";
87+
}
88+
std::cout << std::endl;
89+
}
90+
91+
int main(int argc, char *argv[])
92+
{
93+
std::cout << "Hello from C++!" << std::endl;
94+
95+
dummyFunction();
96+
97+
PrimeCalculator calculator;
98+
int N = 10000; // Large number of primes to compute
99+
100+
double sum = calculator.sumOfSquareRoots(N);
101+
std::cout << "Sum of square roots of the first " << N
102+
<< " prime numbers is " << sum << std::endl;
103+
104+
std::cout << "Goodbye from C++!" << std::endl;
105+
106+
return 0;
107+
}

0 commit comments

Comments
 (0)