Skip to content

Commit 7dec57c

Browse files
authored
Query stats (#91)
* Add query result statistics rows_read(), bytes_read(), elapsed() * Add example and statistics test
1 parent c330249 commit 7dec57c

File tree

10 files changed

+110
-10
lines changed

10 files changed

+110
-10
lines changed

README-zh.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ res = chdb.query('select version()', 'Pretty'); print(res)
6161
# 查看更多数据类型格式,请参见 tests/format_output.py
6262
res = chdb.query('select * from file("data.parquet", Parquet)', 'JSON'); print(res)
6363
res = chdb.query('select * from file("data.csv", CSV)', 'CSV'); print(res)
64+
print(f"SQL read {res.rows_read()} rows, {res.bytes_read()} bytes, elapsed {res.elapsed()} seconds")
6465
```
6566

6667
### Pandas DataFrame 输出

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ res = chdb.query('select version()', 'Pretty'); print(res)
6666
# See more data type format in tests/format_output.py
6767
res = chdb.query('select * from file("data.parquet", Parquet)', 'JSON'); print(res)
6868
res = chdb.query('select * from file("data.csv", CSV)', 'CSV'); print(res)
69+
print(f"SQL read {res.rows_read()} rows, {res.bytes_read()} bytes, elapsed {res.elapsed()} seconds")
6970
```
7071

7172
### Pandas dataframe output

programs/local/LocalChdb.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,9 @@ PYBIND11_MODULE(_chdb, m)
126126
.def("__len__", &query_result::size)
127127
.def("__repr__", &query_result::str)
128128
.def("size", &query_result::size)
129+
.def("rows_read", &query_result::rows_read)
130+
.def("bytes_read", &query_result::bytes_read)
131+
.def("elapsed", &query_result::elapsed)
129132
.def("get_memview", &query_result::get_memview);
130133

131134

programs/local/LocalChdb.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,31 @@ class local_result_wrapper
5656
}
5757
return py::str(result->buf, result->len);
5858
}
59+
// Query statistics
60+
size_t rows_read()
61+
{
62+
if (result == nullptr)
63+
{
64+
return 0;
65+
}
66+
return result->rows_read;
67+
}
68+
size_t bytes_read()
69+
{
70+
if (result == nullptr)
71+
{
72+
return 0;
73+
}
74+
return result->bytes_read;
75+
}
76+
double elapsed()
77+
{
78+
if (result == nullptr)
79+
{
80+
return 0;
81+
}
82+
return result->elapsed;
83+
}
5984
};
6085

6186
class query_result
@@ -70,6 +95,9 @@ class query_result
7095
py::bytes bytes() { return result_wrapper->bytes(); }
7196
py::str str() { return result_wrapper->str(); }
7297
size_t size() { return result_wrapper->size(); }
98+
size_t rows_read() { return result_wrapper->rows_read(); }
99+
size_t bytes_read() { return result_wrapper->bytes_read(); }
100+
double elapsed() { return result_wrapper->elapsed(); }
73101
memoryview_wrapper * get_memview();
74102
};
75103

programs/local/LocalServer.cpp

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -933,7 +933,16 @@ void LocalServer::readArguments(int argc, char ** argv, Arguments & common_argum
933933
// }
934934
// }
935935

936-
std::vector<char> * pyEntryClickHouseLocal(int argc, char ** argv)
936+
class query_result_
937+
{
938+
public:
939+
uint64_t rows;
940+
uint64_t bytes;
941+
double elapsed;
942+
std::vector<char> * buf;
943+
};
944+
945+
std::unique_ptr<query_result_> pyEntryClickHouseLocal(int argc, char ** argv)
937946
{
938947
try
939948
{
@@ -942,10 +951,14 @@ std::vector<char> * pyEntryClickHouseLocal(int argc, char ** argv)
942951
int ret = app.run();
943952
if (ret == 0)
944953
{
945-
auto buf = app.getQueryOutputVector();
954+
auto result = std::make_unique<query_result_>();
955+
result->buf = app.getQueryOutputVector();
956+
result->rows = app.getProcessedRows();
957+
result->bytes = app.getProcessedBytes();
958+
result->elapsed = app.getElapsedTime();
946959

947960
// std::cerr << std::string(out->begin(), out->end()) << std::endl;
948-
return buf;
961+
return result;
949962
}
950963
else
951964
{
@@ -970,15 +983,18 @@ std::vector<char> * pyEntryClickHouseLocal(int argc, char ** argv)
970983
// todo fix the memory leak and unnecessary copy
971984
local_result * query_stable(int argc, char ** argv)
972985
{
973-
std::vector<char> * result = pyEntryClickHouseLocal(argc, argv);
986+
auto result = pyEntryClickHouseLocal(argc, argv);
974987
if (!result)
975988
{
976989
return nullptr;
977990
}
978991
local_result * res = new local_result;
979-
res->len = result->size();
980-
res->buf = result->data();
981-
res->_vec = result;
992+
res->len = result->buf->size();
993+
res->buf = result->buf->data();
994+
res->_vec = result->buf;
995+
res->rows_read = result->rows;
996+
res->bytes_read = result->bytes;
997+
res->elapsed = result->elapsed;
982998
return res;
983999
}
9841000

@@ -996,10 +1012,10 @@ void free_result(local_result * result)
9961012

9971013
int mainEntryClickHouseLocal(int argc, char ** argv)
9981014
{
999-
auto buf = pyEntryClickHouseLocal(argc, argv);
1000-
if (buf)
1015+
auto result = pyEntryClickHouseLocal(argc, argv);
1016+
if (result)
10011017
{
1002-
std::cout << std::string(buf->begin(), buf->end()) << std::endl;
1018+
std::cout << std::string(result->buf->begin(), result->buf->end()) << std::endl;
10031019
return 0;
10041020
}
10051021
else

programs/local/chdb.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#pragma once
2+
#include <cstdint>
23
#include <stddef.h>
34

45
extern "C" {
@@ -7,6 +8,9 @@ struct local_result
78
char * buf;
89
size_t len;
910
void * _vec; // std::vector<char> *, for freeing
11+
double elapsed;
12+
uint64_t rows_read;
13+
uint64_t bytes_read;
1014
};
1115

1216
local_result * query_stable(int argc, char ** argv);

src/Client/ClientBase.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -437,6 +437,7 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query)
437437
return;
438438

439439
processed_rows += block.rows();
440+
processed_bytes += block.bytes();
440441
/// Even if all blocks are empty, we still need to initialize the output stream to write empty resultset.
441442
initOutputFormat(block, parsed_query);
442443

@@ -1575,6 +1576,7 @@ try
15751576
{
15761577
connection->sendData(block, /* name */"", /* scalar */false);
15771578
processed_rows += block.rows();
1579+
processed_bytes += block.bytes();
15781580
}
15791581
}
15801582

@@ -1722,6 +1724,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
17221724
}
17231725

17241726
processed_rows = 0;
1727+
processed_bytes = 0;
17251728
written_first_block = false;
17261729
progress_indication.resetProgress();
17271730
profile_events.watch.restart();

src/Client/ClientBase.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,9 @@ class ClientBase : public Poco::Util::Application, public IHints<2, ClientBase>
8080
// std::vector<char> vec(buf.begin(), buf.end());
8181
return query_result_memory;
8282
}
83+
size_t getProcessedRows() const { return processed_rows; }
84+
size_t getProcessedBytes() const { return processed_bytes; }
85+
double getElapsedTime() const { return progress_indication.elapsedSeconds(); }
8386

8487
std::vector<String> getAllRegisteredNames() const override { return cmd_options; }
8588

@@ -281,6 +284,7 @@ class ClientBase : public Poco::Util::Application, public IHints<2, ClientBase>
281284
bool need_render_profile_events = true;
282285
bool written_first_block = false;
283286
size_t processed_rows = 0; /// How many rows have been read or written.
287+
size_t processed_bytes = 0; /// How many bytes have been read or written.
284288
bool print_num_processed_rows = false; /// Whether to print the number of processed rows at
285289

286290
bool print_stack_trace = false;

tests/test_gc.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,5 +37,6 @@ def test_gc(self):
3737
self.assertEqual(mv3.tobytes(), b'123,"adbcdefg"\n')
3838
self.assertEqual(len(mv3), 15)
3939

40+
4041
if __name__ == '__main__':
4142
unittest.main()

tests/test_statistics.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
#!python3
2+
3+
import os
4+
import unittest
5+
import chdb
6+
7+
N = 1000
8+
9+
10+
class TestQueryStatistics(unittest.TestCase):
11+
def setUp(self) -> None:
12+
# create tmp csv file
13+
with open(".test.csv", "w") as f:
14+
f.write("a,b,c\n")
15+
for i in range(N):
16+
f.write(f"{i},{i*2},{i*3}\n")
17+
return super().setUp()
18+
19+
def tearDown(self) -> None:
20+
# remove tmp csv file
21+
os.remove(".test.csv")
22+
return super().tearDown()
23+
24+
def test_csv_stats(self):
25+
ret = chdb.query("SELECT * FROM file('.test.csv', CSV)", "CSV")
26+
self.assertEqual(ret.rows_read(), N)
27+
self.assertGreater(ret.elapsed(), 0.000001)
28+
self.assertEqual(ret.bytes_read(), 27000)
29+
print(f"SQL read {ret.rows_read()} rows, {ret.bytes_read()} bytes, elapsed {ret.elapsed()} seconds")
30+
31+
def test_non_exist_stats(self):
32+
ret = chdb.query("SELECT * FROM file('notexist.parquet', Parquet)", "Parquet")
33+
self.assertEqual(ret.rows_read(), 0)
34+
self.assertEqual(ret.bytes_read(), 0)
35+
print(f"SQL read {ret.rows_read()} rows, {ret.bytes_read()} bytes, elapsed {ret.elapsed()} seconds")
36+
37+
38+
if __name__ == "__main__":
39+
unittest.main()

0 commit comments

Comments
 (0)