Skip to content

Commit 1340b70

Browse files
authored
Merge pull request #18 from auxten/tests
Run all tests after build
2 parents b3768b3 + 9f64f31 commit 1340b70

File tree

12 files changed

+1621
-44
lines changed

12 files changed

+1621
-44
lines changed

.github/workflows/build_wheels.yml

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,13 @@ jobs:
120120
sudo rm -f dist/*-linux_x86_64.whl
121121
ls -lh dist
122122
shell: bash
123+
- name: Run tests
124+
run: |
125+
python3 -m pip install dist/*.whl
126+
python3 -m pip install pandas pyarrow
127+
python3 -c "import chdb; res = chdb.query('select 1112222222,555', 'CSV'); print(res.get_memview().tobytes())"
128+
make test
129+
continue-on-error: false
123130
- uses: actions/upload-artifact@v3
124131
with:
125132
path: ./dist/*.whl
@@ -231,10 +238,12 @@ jobs:
231238
- name: Fix wheel platform tag
232239
run: |
233240
python3 -m wheel tags --platform-tag=macosx_10_15_x86_64 --remove dist/*.whl
234-
- name: Run simple test
241+
- name: Run tests
235242
run: |
236243
python3 -m pip install dist/*.whl
244+
python3 -m pip install pandas pyarrow
237245
python3 -c "import chdb; res = chdb.query('select 1112222222,555', 'CSV'); print(res.get_memview().tobytes())"
246+
make test
238247
continue-on-error: false
239248
- name: Show files
240249
run: ls -lh dist
@@ -336,7 +345,8 @@ jobs:
336345
CIBW_BEFORE_BUILD: "pip install -U pip tox pybind11 && bash -x gen_manifest.sh && bash chdb/build.sh"
337346
CIBW_BUILD_VERBOSITY: 3
338347
CIBW_BUILD: "cp37-macosx_x86_64 cp38-macosx_x86_64 cp39-macosx_x86_64 cp310-macosx_x86_64"
339-
CIBW_TEST_COMMAND: python -c "import chdb; res = chdb.query('select 1112222222,555', 'CSV'); print(res.get_memview().tobytes())"
348+
CIBW_TEST_REQUIRES: "pyarrow pandas"
349+
CIBW_TEST_COMMAND: "cd {project} && make test"
340350
# with:
341351
# package-dir: .
342352
# output-dir: wheelhouse

Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@ wheel:
1010
tox -e build -- --wheel
1111
@echo "Done."
1212

13+
test:
14+
@echo "Testing..."
15+
cd tests && python3 run_all.py
16+
1317
pub:
1418
@echo "Publishing wheel..."
1519
tox -e publish

chdb/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import sys
22
import os
33

4-
chdb_version = (0, 5, 0)
4+
chdb_version = (0, 6, 0)
55
if sys.version_info[:2] >= (3, 7):
66
# get the path of the current file
77
current_path = os.path.dirname(os.path.abspath(__file__))

chdb/__main__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def main():
2121
sql = options.sql[0]
2222
output_format = options.format
2323
res = query(sql, output_format)
24-
if output_format.lower() == 'dataframe':
24+
if output_format.lower() in ('dataframe', 'arrowtable'):
2525
temp = res
2626
else:
2727
temp = res.data()

tests/conftest.py

Lines changed: 0 additions & 10 deletions
This file was deleted.

tests/format_output.py

Lines changed: 1485 additions & 0 deletions
Large diffs are not rendered by default.

tests/gen_format_cases.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
#!python3
2+
3+
import os
4+
import re
5+
import pprint
6+
import chdb
7+
from utils import current_dir, data_file, reset_elapsed
8+
9+
# some formats are not supported on chdb, so we need to skip them
10+
# TODO: add support for these formats
11+
# ["Template", "PrettyJSONEachRow", "Prometheus", "Protobuf", "ProtobufSingle", "Avro", "CapnProto", "MsgPack"]
12+
formats = ["TabSeparated", "TabSeparatedRaw", "TabSeparatedWithNames", "TabSeparatedWithNamesAndTypes", "TabSeparatedRawWithNames",
13+
"TabSeparatedRawWithNamesAndTypes", "CSV", "CSVWithNames", "CSVWithNamesAndTypes", "CustomSeparated",
14+
"CustomSeparatedWithNames", "CustomSeparatedWithNamesAndTypes", "SQLInsert", "Values", "Vertical", "JSON", "JSONStrings",
15+
"JSONColumns", "JSONColumnsWithMetadata", "JSONCompact", "JSONCompactStrings", "JSONCompactColumns", "JSONEachRow",
16+
"JSONEachRowWithProgress", "JSONStringsEachRow", "JSONStringsEachRowWithProgress", "JSONCompactEachRow",
17+
"JSONCompactEachRowWithNames", "JSONCompactEachRowWithNamesAndTypes", "JSONCompactStringsEachRow",
18+
"JSONCompactStringsEachRowWithNames", "JSONCompactStringsEachRowWithNamesAndTypes", "JSONObjectEachRow", "BSONEachRow",
19+
"TSKV", "Pretty", "PrettyNoEscapes", "PrettyMonoBlock", "PrettyNoEscapesMonoBlock", "PrettyCompact", "PrettyCompactNoEscapes",
20+
"PrettyCompactMonoBlock", "PrettyCompactNoEscapesMonoBlock", "PrettySpace", "PrettySpaceNoEscapes", "PrettySpaceMonoBlock",
21+
"PrettySpaceNoEscapesMonoBlock", "Parquet", "ArrowTable",
22+
"ORC", "RowBinary", "RowBinaryWithNames", "RowBinaryWithNamesAndTypes", "Native", "Null", "XML", "LineAsString",
23+
"RawBLOB", "Markdown"]
24+
25+
# generate test cases for each format and output
26+
27+
format_output = {}
28+
29+
30+
for fmt in formats:
31+
res = chdb.query("SELECT * FROM file('" + data_file + "', Parquet) limit 10", fmt)
32+
if fmt == "ArrowTable":
33+
data = reset_elapsed(f"{res}")
34+
else:
35+
data = reset_elapsed(res.get_memview().tobytes())
36+
print("format: " + fmt + " size: " + str(len(data)))
37+
format_output[fmt] = {"len": len(data), "data": data}
38+
39+
# dump to py dict for import later
40+
with open(os.path.join(current_dir, "format_output.py"), "w") as f:
41+
f.write("format_output = ")
42+
pprint.pprint(format_output, stream=f)

tests/run_all.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#!python3
2+
3+
import unittest
4+
5+
test_loader = unittest.TestLoader()
6+
test_suite = test_loader.discover('./')
7+
8+
test_runner = unittest.TextTestRunner()
9+
ret = test_runner.run(test_suite)
10+
11+
# if any test fails, exit with non-zero code
12+
if len(ret.failures) > 0 or len(ret.errors) > 0:
13+
exit(1)
14+
else:
15+
exit(0)

tests/test_basic.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
#!python3
2+
3+
import os
4+
import unittest
5+
import chdb
6+
from format_output import format_output
7+
from utils import data_file, reset_elapsed
8+
9+
class TestBasic(unittest.TestCase):
10+
def test_basic(self):
11+
res = chdb.query("SELECT 1", "CSV")
12+
self.assertEqual(len(res.get_memview().tobytes()), 2) # "1\n"
13+
class TestOutput(unittest.TestCase):
14+
def test_output(self):
15+
for format, output in format_output.items():
16+
res = chdb.query("SELECT * FROM file('" + data_file + "', Parquet) limit 10", format)
17+
if format == "ArrowTable":
18+
data = reset_elapsed(f"{res}")
19+
else:
20+
data = reset_elapsed(res.get_memview().tobytes())
21+
self.assertEqual(data, output["data"])
22+
23+
24+
if __name__ == '__main__':
25+
unittest.main()

tests/test_parallel.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
1-
#!/usr/bin/env python3
1+
#!python3
22
import concurrent.futures
33
import time
44
import sys
5-
import _chdb as chdb
5+
import os
6+
import chdb
7+
import unittest
68

79
# run query parallel in n thread and benchmark
8-
thread_count = 1
10+
thread_count = 10
911
query_count = 1000
12+
current_dir=os.path.dirname(os.path.abspath(__file__))
13+
data_file=os.path.join(current_dir, "../contrib/arrow/cpp/submodules/parquet-testing/data/alltypes_dictionary.parquet")
1014

1115
if len(sys.argv) == 2:
1216
thread_count = int(sys.argv[1])
@@ -18,7 +22,7 @@
1822

1923
def run_query(query, format):
2024
res = chdb.query(query, format)
21-
print(len(res.get_memview().tobytes()))
25+
assert len(res.get_memview().tobytes()) == 2290
2226

2327
def run_queries(query, format, count = query_count):
2428
for i in range(count):
@@ -37,6 +41,11 @@ def benchmark(query, format, parallel = thread_count, count = query_count):
3741
wait()
3842
time_end = time.time()
3943
print("Time cost:", time_end - time_start, "s")
44+
print("QPS:", count / (time_end - time_start))
45+
46+
class TestParallel(unittest.TestCase):
47+
def test_parallel(self):
48+
benchmark(f"SELECT * FROM file('{data_file}', Parquet) LIMIT 10", "Arrow")
4049

4150
if __name__ == '__main__':
42-
benchmark("SELECT * FROM file('/home/Clickhouse/bench/result.parquet', Parquet) LIMIT 10", "Arrow")
51+
unittest.main()

0 commit comments

Comments
 (0)