Skip to content

Commit 40ab75f

Browse files
nevansnpe9
authored andcommitted
Integrate performance testing into main tree for release
This is a combination of a bunch of small fixes to get the performance monitoring infrastructure ready for release. I've squashed the changes because they shouldn't pollute the commit log. Update README.performance-monitoring to use a markdown TOC Change README.performance-monitoring to README.performance-monitoring.md so it's recognized as markdown. change .travis.yml to use performance flags get performance compiling with C++ es make sure bool plays nicely with both c++ and regular c get rid of a straggler from the rebase add logging.h and performance.h to Makefile.am add first draft of performance option and ensure that performance profiling does not break a regular make check enabled and disabled with travis.yml making both causes travis.yml to time out fix OSX structure compile silliness and add a test case test performance infrastructure really automake, really? ignore performance test case
1 parent f07dff6 commit 40ab75f

File tree

11 files changed

+168
-59
lines changed

11 files changed

+168
-59
lines changed

.travis.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
language: c
2-
script: ./autogen.sh && ./configure && make && make check
2+
script: ./autogen.sh && ./configure --enable-performance-monitoring && make && make check

README.performance-monitoring renamed to README.performance-monitoring.md

Lines changed: 32 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1,51 +1,31 @@
1-
<div id="table-of-contents">
2-
<h2>Table of Contents</h2>
3-
<div id="text-table-of-contents">
4-
<ul>
5-
<li><a href="#sec-1">1. Introduction</a></li>
6-
<li><a href="#sec-2">2. Quick note on memory management</a></li>
7-
<li><a href="#sec-3">3. Tutorial</a>
8-
<ul>
9-
<li><a href="#sec-3-1">3.1. Quickstart</a></li>
10-
<li><a href="#sec-3-2">3.2. Setup Functions</a>
11-
<ul>
12-
<li><a href="#sec-3-2-1">3.2.1. Internal measurements</a></li>
13-
<li><a href="#sec-3-2-2">3.2.2. State group management</a></li>
14-
</ul>
15-
</li>
16-
<li><a href="#sec-3-3">3.3. Runtime Functions</a>
17-
<ul>
18-
<li><a href="#sec-3-3-1">3.3.1. <code>void qtperf_enter_state(qtperfdata_t* data, qtperfid_t state_id)</code></a></li>
19-
<li><a href="#sec-3-3-2">3.3.2. <code>const char* qtperf_state_name(qtstategroup_t* group, qtperfid_t state_id)</code></a></li>
20-
<li><a href="#sec-3-3-3">3.3.3. <code>void qtperf_start()</code></a></li>
21-
<li><a href="#sec-3-3-4">3.3.4. <code>void qtperf_stop()</code></a></li>
22-
</ul>
23-
</li>
24-
<li><a href="#sec-3-4">3.4. Reporting and data access functions</a>
25-
<ul>
26-
<li><a href="#sec-3-4-1">3.4.1. <code>void qtperf_print_results()</code></a></li>
27-
<li><a href="#sec-3-4-2">3.4.2. <code>void qtperf_print_delimited(qtstategroup_t* group, const char* delim, bool print_headers, const char* prefix)</code></a></li>
28-
<li><a href="#sec-3-4-3">3.4.3. <code>void qtperf_print_perfdata(qtperfdata_t* perfdata, bool show_states_with_zero_time)</code></a></li>
29-
<li><a href="#sec-3-4-4">3.4.4. <code>void qtperf_print_group(qtstategroup_t* group)</code></a></li>
30-
<li><a href="#sec-3-4-5">3.4.5. <code>void qtperf_print_perfdata(qtperfdata_t* data, bool show_states_with_zero_time)</code></a></li>
31-
<li><a href="#sec-3-4-6">3.4.6. <code>qtperfcounter_t qtperf_total_group_time(qtstategroup_t* group)</code></a></li>
32-
<li><a href="#sec-3-4-7">3.4.7. <code>qtperfcounter_t qtperf_total_time(qtperfdata_t* data)</code></a></li>
33-
</ul>
34-
</li>
35-
<li><a href="#sec-3-5">3.5. Iterators</a>
36-
<ul>
37-
<li><a href="#sec-3-5-1">3.5.1. <code>void qtperf_iter_begin(qtperf_iterator** iter)</code></a></li>
38-
<li><a href="#sec-3-5-2">3.5.2. <code>qtperfdata_t* qtperf_iter_next(qtperf_iterator_t** iter)</code></a></li>
39-
<li><a href="#sec-3-5-3">3.5.3. <code>qtperfdata_t* qtperf_iter_deref(qtperf_iterator_t * iter)</code></a></li>
40-
<li><a href="#sec-3-5-4">3.5.4. <code>qtperf_iterator_t* qtperf_iter_end()</code></a></li>
41-
</ul>
42-
</li>
43-
<li><a href="#sec-3-6">3.6. Teardown</a></li>
44-
</ul>
45-
</li>
46-
</ul>
47-
</div>
48-
</div>
1+
# Table of contents
2+
1. [Introduction](#introduction)
3+
2. [Quick note on memory management](#quick-note-on-memory-management)
4+
3. [Tutorial](#tutorial)
5+
1. [Quickstart](#quickstart)
6+
2. [Setup Functions](#setup-functions)
7+
1. [Internal measurements](#internal-measurements)
8+
2. [State group management](#state-group-management)
9+
3. [Runtime Functions](#runtime-functions)
10+
1. `void qtperf_enter_state(qtperfdata_t* data, qtperfid_t state_id)`
11+
2. `const char* qtperf_state_name(qtstategroup_t* group, qtperfid_t state_id)`
12+
3. `void qtperf_start()`
13+
4. `void qtperf_stop()`
14+
4. [Reporting and data access functions](#report-and-data-access-functions)
15+
1. `void qtperf_print_results()`
16+
2. `void qtperf_print_delimited(qtstategroup_t* group, const char* delim, bool print_headers, const char* prefix)`
17+
3. `void qtperf_print_perfdata(qtperfdata_t* perfdata, bool show_states_with_zero_time)`
18+
4. `void qtperf_print_group(qtstategroup_t* group)`
19+
5. `void qtperf_print_perfdata(qtperfdata_t* data, bool show_states_with_zero_time)`
20+
6. `void qtperf_print_group(qtstategroup_t* group)`
21+
7. `qtperfcounter_t qtperf_total_group_time(qtstategroup_t* group)`
22+
8. `qtperfcounter_t qtperf_total_time(qtperfdata_t* data)`
23+
5. [Iterators](#iterators)
24+
1. `void qtperf_iter_begin(qtperf_iterator** iter)`
25+
2. `qtperfdata_t* qtperf_iter_next(qtperf_iterator_t** iter)`
26+
3. `qtperfdata_t* qtperf_iter_deref(qtperf_iterator_t * iter)`
27+
4. `qtperf_iterator_t* qtperf_iter_end()`
28+
6. [Teardown](#teardown)
4929

5030
# Introduction
5131

@@ -101,7 +81,7 @@ they are to be tracked.
10181

10282
Here is a quick example of a tiny program that makes use of the
10383
basic internal logging features of qtperf:
104-
84+
```
10585
#include<qthread/qthread.h>
10686
#include<qthread/performance.h>
10787
#include<qthread/logging.h>
@@ -149,9 +129,9 @@ basic internal logging features of qtperf:
149129
150130
return 0;
151131
}
152-
132+
```
153133
Here is a program to demonstrate how to set up and use a custom state group:
154-
134+
```
155135
#include<qthread/qthread.h>
156136
#include<qthread/performance.h>
157137
#include<qthread/logging.h>
@@ -292,7 +272,7 @@ Here is a program to demonstrate how to set up and use a custom state group:
292272
293273
return 0;
294274
}
295-
275+
```
296276
API
297277

298278
## Setup Functions

configure.ac

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,11 @@ AC_ARG_ENABLE([internal-spinlock],
155155
[AS_HELP_STRING([--disable-internal-spinlock],
156156
[avoid using the internal spinlock])])
157157

158+
AC_ARG_ENABLE([performance-monitoring],
159+
[AS_HELP_STRING([--enable-performance-monitoring],
160+
[enable performance monitoring infrastructure])])
161+
162+
158163
AC_ARG_ENABLE([debug],
159164
[AS_HELP_STRING([--enable-debug=[[areas]]],
160165
[prints out debugging information as programs
@@ -742,6 +747,12 @@ AC_CACHE_SAVE
742747
# Figure out if we need makecontext
743748
QTHREAD_PICK_CONTEXT_TYPE(qthread_makecontext_type)
744749

750+
AS_IF([test "x$enable_performance_monitoring" = "xyes"],
751+
[AC_DEFINE([QTHREAD_PERFORMANCE], [1], [Defined if performance monitoring support desired]),
752+
[disable_lazy_threadids=yes]
753+
[enable_picky=yes]
754+
])
755+
745756
AS_IF([test "x$with_gcd" = "xshift"],
746757
[AC_DEFINE([QTHREAD_SHIFT_GCD],[1],[use a shift-based gcd algorithm])])
747758

@@ -1060,6 +1071,8 @@ AS_IF([test "x$enable_multinode" = "xyes"],
10601071
LIBS="$LIBS $portals4_LIBS $portals4_runtime_LIBS"
10611072
])
10621073

1074+
1075+
10631076
AS_IF([test "x$enable_lf_febs" == "xyes"],
10641077
[AC_DEFINE([LOCK_FREE_FEBS], [1], [Define to use a lock-free hash table for FEB metadata.])],
10651078
[enable_lf_febs=no])
@@ -1093,6 +1106,7 @@ AM_CONDITIONAL([COMPILE_EUREKAS], [test "x$enable_eurekas" = "xyes"])
10931106
AM_CONDITIONAL([HAVE_GUARD_PAGES], [test "x$enable_guard_pages" = "xyes"])
10941107
AM_CONDITIONAL([HAVE_PROG_TIMELIMIT], [test "x$timelimit_path" != "x"])
10951108
AM_CONDITIONAL([COMPILE_MULTINODE], [test "$enable_multinode" = "yes"])
1109+
AM_CONDITIONAL([QTHREAD_PERFORMANCE], [test "$enable_performance_monitoring" = "yes"])
10961110
AM_CONDITIONAL([WANT_SINGLE_WORKER_SCHEDULER], [test "x$with_scheduler" = "xnemesis" -o "x$with_scheduler" = "xlifo" -o "x$with_scheduler" = "xmutexfifo" -o "x$with_scheduler" = "xmtsfifo" -o "x$with_scheduler" = "xmdlifo"])
10971111
AM_CONDITIONAL([COMPILE_OMP_BENCHMARKS], [test "x$have_openmp" = "xyes"])
10981112
AM_CONDITIONAL([COMPILE_TBB_BENCHMARKS], [test "x$have_tbb" = "xyes"])

include/qt_shepherd_innards.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#define QT_SHEPHERD_INNARDS_H
33

44
#ifdef QTHREAD_PERFORMANCE
5-
#include<qthread/performance.h>
5+
#include <qthread/performance.h>
66
#endif
77

88
/* System Pre-requisites */
@@ -48,7 +48,7 @@ struct qthread_worker_s {
4848
qthread_worker_id_t worker_id;
4949
qthread_worker_id_t packed_worker_id;
5050
#ifdef QTHREAD_PERFORMANCE
51-
qtperfdata_t* performance_data;
51+
struct qtperfdata_s* performance_data;
5252
#endif
5353
Q_ALIGNED(8) uint_fast8_t QTHREAD_CASLOCK(active);
5454
};

include/qthread/Makefile.am

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ pkginclude_HEADERS = \
3030
wavefront.h \
3131
loop_templates.hpp \
3232
loop_iter.hpp \
33+
performance.h \
34+
logging.h \
3335
loop.hpp
3436

3537
# These headers are generated by ./configure

include/qthread/performance.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,10 @@
3636
* \version 0.1
3737
*/
3838

39+
#ifdef __cplusplus
40+
extern "C" {
41+
#endif
42+
3943
#ifndef QT_PERFORMANCE_H
4044
#define QT_PERFORMANCE_H
4145
#include<stddef.h>
@@ -45,7 +49,9 @@
4549
# define PERFDBG 0
4650
#endif
4751

52+
#ifndef __cplusplus
4853
typedef unsigned char bool;
54+
#endif
4955
typedef size_t qtperfid_t;
5056
typedef unsigned long long qtperfcounter_t;
5157
typedef unsigned long qttimestamp_t;
@@ -680,4 +686,9 @@ bool qtperf_check_invariants(void);
680686
#endif // ifdef QTHREAD_PERFORMANCE
681687

682688

689+
#endif
690+
691+
/* Declarations of this file */
692+
#ifdef __cplusplus
693+
}
683694
#endif

src/performance.c

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -159,9 +159,6 @@ qtperfdata_t* qtperf_create_perfdata(qtstategroup_t* state_group) {
159159
}
160160

161161
void qtperf_free_perfdata_internals(qtperfdata_t* perfdata) {
162-
qt_free(perfdata->perf_counters);
163-
perfdata->perf_counters = NULL;
164-
perfdata->state_group = NULL;
165162
// don't spin lock here, it will deadlock from qtperf_free_perf_list
166163
if(perfdata->piggybacks != NULL) {
167164
size_t index=0;

0 commit comments

Comments
 (0)