Skip to content

Commit 8fb017e

Browse files
authored
feat: adding a new high-precision benchmarking tool (for Linux only). (#343)
* Adding a new high-precision benchmarking tool (for Linux only). * Various fixes
1 parent 52714f7 commit 8fb017e

File tree

7 files changed

+313
-12
lines changed

7 files changed

+313
-12
lines changed

benchmarks/CMakeLists.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,15 @@ target_link_libraries(percent_encode PRIVATE ada)
3232
target_include_directories(percent_encode PUBLIC "$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>")
3333
target_include_directories(percent_encode PUBLIC "$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/benchmarks>")
3434

35+
36+
if(CMAKE_SYSTEM_NAME MATCHES "Linux")
37+
# The model_bench program requires accurate/low-overhead performance counters.
38+
# We only have such support under Linux.
39+
add_executable(model_bench model_bench.cpp)
40+
target_link_libraries(model_bench PRIVATE ada)
41+
target_compile_definitions(model_bench PRIVATE ADA_URL_FILE="${url-dataset_SOURCE_DIR}/out.txt")
42+
endif()
43+
3544
include(${PROJECT_SOURCE_DIR}/cmake/import.cmake)
3645

3746
set_off(BENCHMARK_ENABLE_TESTING)

benchmarks/model_bench.cpp

Lines changed: 280 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,280 @@
1+
#include <cstdlib>
2+
#include <filesystem>
3+
#include <fstream>
4+
#include <iostream>
5+
#include <memory>
6+
#include <sstream>
7+
8+
#include "ada.h"
9+
#include "performancecounters/event_counter.h"
10+
event_collector collector;
11+
12+
bool file_exists(const char *filename) {
13+
namespace fs = std::filesystem;
14+
std::filesystem::path f{filename};
15+
if (std::filesystem::exists(filename)) {
16+
return true;
17+
} else {
18+
std::cout << " file missing: " << filename << std::endl;
19+
return false;
20+
}
21+
}
22+
23+
std::string read_file(std::string filename) {
24+
constexpr auto read_size = std::size_t(4096);
25+
auto stream = std::ifstream(filename.c_str());
26+
stream.exceptions(std::ios_base::badbit);
27+
auto out = std::string();
28+
auto buf = std::string(read_size, '\0');
29+
while (stream.read(&buf[0], read_size)) {
30+
out.append(buf, 0, size_t(stream.gcount()));
31+
}
32+
out.append(buf, 0, size_t(stream.gcount()));
33+
return out;
34+
}
35+
36+
std::vector<std::string> split_string(const std::string &str) {
37+
auto result = std::vector<std::string>{};
38+
auto ss = std::stringstream{str};
39+
for (std::string line; std::getline(ss, line, '\n');) {
40+
std::string_view view = line;
41+
// Some parsers like boost/url will refuse to parse a URL with trailing
42+
// whitespace.
43+
while (!view.empty() && std::isspace(view.back())) {
44+
view.remove_suffix(1);
45+
}
46+
while (!view.empty() && std::isspace(view.front())) {
47+
view.remove_prefix(1);
48+
}
49+
if (!view.empty()) {
50+
result.emplace_back(view);
51+
}
52+
}
53+
return result;
54+
}
55+
56+
struct stat_numbers {
57+
std::string url_string{};
58+
std::string href{};
59+
ada::url_components components{};
60+
event_aggregate counters{};
61+
bool is_valid = true;
62+
bool has_port = false;
63+
bool has_credentials = false;
64+
bool has_fragment = false;
65+
bool has_search = false;
66+
};
67+
68+
size_t count_ascii_bytes(const std::string &s) {
69+
size_t counter = 0;
70+
for (uint8_t c : s) {
71+
if (c < 128) {
72+
counter++;
73+
}
74+
}
75+
return counter;
76+
}
77+
78+
template <class result_type = ada::url_aggregator>
79+
std::vector<stat_numbers> collect_values(
80+
const std::vector<std::string> &url_examples, size_t trials) {
81+
std::vector<stat_numbers> numbers(url_examples.size());
82+
for (size_t i = 0; i < url_examples.size(); i++) {
83+
numbers[i].url_string = url_examples[i];
84+
ada::result<result_type> url = ada::parse<result_type>(url_examples[i]);
85+
if (url) {
86+
numbers[i].is_valid = true;
87+
numbers[i].href = url->get_href();
88+
numbers[i].components = url->get_components();
89+
numbers[i].has_port = url->has_port();
90+
numbers[i].has_credentials = url->has_credentials();
91+
numbers[i].has_fragment = url->has_hash();
92+
numbers[i].has_search = url->has_search();
93+
} else {
94+
numbers[i].is_valid = false;
95+
}
96+
}
97+
volatile size_t href_size = 0;
98+
for (size_t i = 0; i < trials; i++) {
99+
for (stat_numbers &n : numbers) {
100+
std::atomic_thread_fence(std::memory_order_acquire);
101+
collector.start();
102+
ada::result<result_type> url = ada::parse<result_type>(n.url_string);
103+
if (url) {
104+
href_size += url->get_href().size();
105+
}
106+
std::atomic_thread_fence(std::memory_order_release);
107+
event_count allocate_count = collector.end();
108+
n.counters << allocate_count;
109+
}
110+
}
111+
return numbers;
112+
}
113+
114+
#ifdef ADA_URL_FILE
115+
const char *default_file = ADA_URL_FILE;
116+
#else
117+
const char *default_file = nullptr;
118+
#endif
119+
120+
std::vector<std::string> init_data(const char *input = default_file) {
121+
std::vector<std::string> input_urls;
122+
if (input == nullptr) {
123+
return input_urls;
124+
}
125+
126+
if (!file_exists(input)) {
127+
std::cout << "File not found !" << input << std::endl;
128+
return input_urls;
129+
} else {
130+
std::cout << "# Loading " << input << std::endl;
131+
input_urls = split_string(read_file(input));
132+
}
133+
return input_urls;
134+
}
135+
136+
void print(const stat_numbers &n) {
137+
std::cout << std::setw(15) << n.url_string.size() << ",";
138+
std::cout << std::setw(15) << n.counters.best.cycles() << "," << std::setw(15)
139+
<< size_t(n.counters.cycles()) << ",";
140+
std::cout << std::setw(15) << n.counters.best.instructions() << ","
141+
<< std::setw(15) << n.counters.instructions() << ",";
142+
std::cout << std::setw(15) << n.is_valid << ",";
143+
144+
// hash size
145+
146+
std::cout << std::setw(15) << n.href.size() << ",";
147+
size_t end = n.href.size();
148+
if (n.components.hash_start != ada::url_components::omitted) {
149+
std::cout << std::setw(15) << (end - n.components.hash_start) << ",";
150+
end = n.components.hash_start;
151+
} else {
152+
std::cout << std::setw(15) << 0 << ",";
153+
}
154+
// search size
155+
if (n.components.search_start != ada::url_components::omitted) {
156+
std::cout << std::setw(15) << (end - n.components.search_start) << ",";
157+
end = n.components.search_start;
158+
} else {
159+
std::cout << std::setw(15) << 0 << ",";
160+
}
161+
// path size
162+
std::cout << std::setw(15) << (end - n.components.pathname_start) << ",";
163+
end = n.components.pathname_start;
164+
// port size
165+
std::cout << std::setw(15) << (end - n.components.host_end) << ",";
166+
end = n.components.host_end;
167+
// host size
168+
std::cout << std::setw(15) << (end - n.components.host_start) << ",";
169+
end = n.components.host_start;
170+
// user/pass size
171+
std::cout << std::setw(15) << (end - n.components.protocol_end) << ",";
172+
end = n.components.protocol_end;
173+
// protocol type
174+
ada::result<ada::url_aggregator> url =
175+
ada::parse<ada::url_aggregator>(n.url_string);
176+
if (url) {
177+
std::cout << std::setw(15) << int(url->type);
178+
} else {
179+
std::cout << std::setw(15) << -1;
180+
}
181+
std::cout << ",";
182+
std::cout << std::setw(15) << n.has_port << ",";
183+
std::cout << std::setw(15) << n.has_credentials << ",";
184+
std::cout << std::setw(15) << n.has_fragment << ",";
185+
std::cout << std::setw(15) << n.has_search << ",";
186+
std::cout << std::setw(15)
187+
<< (n.url_string.size() - count_ascii_bytes(n.url_string)) << ",";
188+
std::cout << std::setw(15) << (n.href.size() - count_ascii_bytes(n.href))
189+
<< ",";
190+
std::cout << std::setw(15)
191+
<< (count_ascii_bytes(n.url_string) == n.url_string.size()) << ",";
192+
std::cout << std::setw(15) << (n.href == n.url_string);
193+
}
194+
void print(const std::vector<stat_numbers> numbers) {
195+
std::cout << std::setw(15) << "input_size"
196+
<< ",";
197+
std::cout << std::setw(15) << "best_cycles"
198+
<< ",";
199+
std::cout << std::setw(15) << "mean_cycles"
200+
<< ",";
201+
std::cout << std::setw(15) << "best_instr"
202+
<< ",";
203+
std::cout << std::setw(15) << "mean_instr"
204+
<< ",";
205+
std::cout << std::setw(15) << "is_valid"
206+
<< ",";
207+
std::cout << std::setw(15) << "href_size"
208+
<< ",";
209+
std::cout << std::setw(15) << "hash_size"
210+
<< ",";
211+
std::cout << std::setw(15) << "search_size"
212+
<< ",";
213+
std::cout << std::setw(15) << "path_size"
214+
<< ",";
215+
std::cout << std::setw(15) << "port_size"
216+
<< ",";
217+
std::cout << std::setw(15) << "host_size"
218+
<< ",";
219+
std::cout << std::setw(15) << "credential_size"
220+
<< ",";
221+
std::cout << std::setw(15) << "protocol_type"
222+
<< ",";
223+
std::cout << std::setw(15) << "has_port"
224+
<< ",";
225+
std::cout << std::setw(15) << "has_authority"
226+
<< ",";
227+
std::cout << std::setw(15) << "has_fragment"
228+
<< ",";
229+
std::cout << std::setw(15) << "has_search"
230+
<< ",";
231+
std::cout << std::setw(15) << "non_ascii_bytes"
232+
<< ",";
233+
std::cout << std::setw(15) << "href_non_ascii_bytes"
234+
<< ",";
235+
std::cout << std::setw(15) << "is_ascii"
236+
<< ",";
237+
std::cout << std::setw(15) << "input_is_href";
238+
239+
std::cout << std::endl;
240+
241+
for (const stat_numbers &n : numbers) {
242+
print(n);
243+
std::cout << std::endl;
244+
}
245+
}
246+
247+
int main(int argc, char **argv) {
248+
std::vector<std::string> input_urls;
249+
if (argc == 1) {
250+
input_urls = init_data();
251+
} else {
252+
input_urls = init_data(argv[1]);
253+
}
254+
if (input_urls.empty()) {
255+
std::cout << "pass the path to a file containing a list of URL (one per "
256+
"line) as a parameter."
257+
<< std::endl;
258+
return EXIT_FAILURE;
259+
}
260+
if (!collector.has_events()) {
261+
std::cout << "We require access to performance counters. (Try sudo.)"
262+
<< std::endl;
263+
return EXIT_FAILURE;
264+
}
265+
std::string empty;
266+
// We always start with a null URL for calibration.
267+
input_urls.insert(input_urls.begin(), empty);
268+
bool use_ada_url = (getenv("USE_URL") != nullptr);
269+
size_t trials = 100;
270+
std::cout << "# trials " << trials << std::endl;
271+
if (use_ada_url) {
272+
std::cout << "# ada::url" << std::endl;
273+
print(collect_values<ada::url>(input_urls, trials));
274+
} else {
275+
std::cout << "# ada::url_aggregator" << std::endl;
276+
print(collect_values<ada::url_aggregator>(input_urls, trials));
277+
}
278+
279+
return EXIT_SUCCESS;
280+
}

include/ada/url-inl.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@ namespace ada {
1919
[[nodiscard]] ada_really_inline bool url::has_credentials() const noexcept {
2020
return !username.empty() || !password.empty();
2121
}
22+
[[nodiscard]] ada_really_inline bool url::has_port()
23+
const noexcept {
24+
return port.has_value();
25+
}
2226
[[nodiscard]] inline bool url::cannot_have_credentials_or_port() const {
2327
return !host.has_value() || host.value().empty() ||
2428
type == ada::scheme::type::FILE;
@@ -149,9 +153,9 @@ inline void url::clear_pathname() { path.clear(); }
149153

150154
inline void url::clear_search() { query = std::nullopt; }
151155

152-
inline bool url::has_hash() const { return hash.has_value(); }
156+
[[nodiscard]] inline bool url::has_hash() const noexcept { return hash.has_value(); }
153157

154-
inline bool url::has_search() const { return query.has_value(); }
158+
[[nodiscard]] inline bool url::has_search() const noexcept { return query.has_value(); }
155159

156160
inline void url::set_protocol_as_file() { type = ada::scheme::type::FILE; }
157161

include/ada/url.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,8 @@ struct url : url_base {
9393

9494
/** @return true if it has an host but it is the empty string */
9595
[[nodiscard]] inline bool has_empty_hostname() const noexcept;
96+
/** @return true if the URL has a (non default) port */
97+
[[nodiscard]] inline bool has_port() const noexcept;
9698
/** @return true if it has a host (included an empty host) */
9799
[[nodiscard]] inline bool has_hostname() const noexcept;
98100
[[nodiscard]] bool has_valid_domain() const noexcept override;
@@ -280,7 +282,10 @@ struct url : url_base {
280282
*/
281283
[[nodiscard]] ada_really_inline ada::url_components get_components()
282284
const noexcept;
283-
285+
/** @return true if the URL has a hash component */
286+
[[nodiscard]] inline bool has_hash() const noexcept override;
287+
/** @return true if the URL has a search component */
288+
[[nodiscard]] inline bool has_search() const noexcept override;
284289
private:
285290
friend ada::url ada::parser::parse_url<ada::url>(std::string_view,
286291
const ada::url *);
@@ -369,8 +374,6 @@ struct url : url_base {
369374

370375
inline void clear_pathname() override;
371376
inline void clear_search() override;
372-
inline bool has_hash() const override;
373-
inline bool has_search() const override;
374377
inline void set_protocol_as_file();
375378

376379
/**

include/ada/url_aggregator-inl.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -704,12 +704,12 @@ inline void url_aggregator::clear_hostname() {
704704
ADA_ASSERT_TRUE(validate());
705705
}
706706

707-
inline bool url_aggregator::has_hash() const {
707+
[[nodiscard]] inline bool url_aggregator::has_hash() const noexcept {
708708
ada_log("url_aggregator::has_hash");
709709
return components.hash_start != url_components::omitted;
710710
}
711711

712-
inline bool url_aggregator::has_search() const {
712+
[[nodiscard]] inline bool url_aggregator::has_search() const noexcept {
713713
ada_log("url_aggregator::has_search");
714714
return components.search_start != url_components::omitted;
715715
}

include/ada/url_aggregator.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -183,13 +183,18 @@ struct url_aggregator : url_base {
183183
[[nodiscard]] inline bool has_empty_hostname() const noexcept;
184184
/** @return true if it has a host (included an empty host) */
185185
[[nodiscard]] inline bool has_hostname() const noexcept;
186+
/** @return true if the URL has a non-empty username */
186187
[[nodiscard]] inline bool has_non_empty_username() const noexcept;
188+
/** @return true if the URL has a non-empty password */
187189
[[nodiscard]] inline bool has_non_empty_password() const noexcept;
188-
[[nodiscard]] inline bool has_password() const noexcept;
189190
/** @return true if the URL has a (non default) port */
190191
[[nodiscard]] inline bool has_port() const noexcept;
191-
inline bool has_hash() const override;
192-
inline bool has_search() const override;
192+
/** @return true if the URL has a password */
193+
[[nodiscard]] inline bool has_password() const noexcept;
194+
/** @return true if the URL has a hash component */
195+
[[nodiscard]] inline bool has_hash() const noexcept override;
196+
/** @return true if the URL has a search component */
197+
[[nodiscard]] inline bool has_search() const noexcept override;
193198

194199
private:
195200
friend ada::url_aggregator ada::parser::parse_url<ada::url_aggregator>(

include/ada/url_base.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,10 +102,10 @@ struct url_base {
102102
virtual inline void clear_search() = 0;
103103

104104
/** @private */
105-
virtual inline bool has_hash() const = 0;
105+
virtual inline bool has_hash() const noexcept = 0;
106106

107107
/** @private */
108-
virtual inline bool has_search() const = 0;
108+
virtual inline bool has_search() const noexcept = 0;
109109

110110
}; // url_base
111111

0 commit comments

Comments
 (0)