|
| 1 | +#!python3 |
| 2 | + |
| 3 | +import os |
| 4 | +import re |
| 5 | +import pprint |
| 6 | +import chdb |
| 7 | +from utils import current_dir, data_file, reset_elapsed |
| 8 | + |
| 9 | +# some formats are not supported on chdb, so we need to skip them |
| 10 | +# TODO: add support for these formats |
| 11 | +# ["Template", "PrettyJSONEachRow", "Prometheus", "Protobuf", "ProtobufSingle", "Avro", "CapnProto", "MsgPack"] |
| 12 | +formats = ["TabSeparated", "TabSeparatedRaw", "TabSeparatedWithNames", "TabSeparatedWithNamesAndTypes", "TabSeparatedRawWithNames", |
| 13 | + "TabSeparatedRawWithNamesAndTypes", "CSV", "CSVWithNames", "CSVWithNamesAndTypes", "CustomSeparated", |
| 14 | + "CustomSeparatedWithNames", "CustomSeparatedWithNamesAndTypes", "SQLInsert", "Values", "Vertical", "JSON", "JSONStrings", |
| 15 | + "JSONColumns", "JSONColumnsWithMetadata", "JSONCompact", "JSONCompactStrings", "JSONCompactColumns", "JSONEachRow", |
| 16 | + "JSONEachRowWithProgress", "JSONStringsEachRow", "JSONStringsEachRowWithProgress", "JSONCompactEachRow", |
| 17 | + "JSONCompactEachRowWithNames", "JSONCompactEachRowWithNamesAndTypes", "JSONCompactStringsEachRow", |
| 18 | + "JSONCompactStringsEachRowWithNames", "JSONCompactStringsEachRowWithNamesAndTypes", "JSONObjectEachRow", "BSONEachRow", |
| 19 | + "TSKV", "Pretty", "PrettyNoEscapes", "PrettyMonoBlock", "PrettyNoEscapesMonoBlock", "PrettyCompact", "PrettyCompactNoEscapes", |
| 20 | + "PrettyCompactMonoBlock", "PrettyCompactNoEscapesMonoBlock", "PrettySpace", "PrettySpaceNoEscapes", "PrettySpaceMonoBlock", |
| 21 | + "PrettySpaceNoEscapesMonoBlock", "Parquet", "ArrowTable", |
| 22 | + "ORC", "RowBinary", "RowBinaryWithNames", "RowBinaryWithNamesAndTypes", "Native", "Null", "XML", "LineAsString", |
| 23 | + "RawBLOB", "Markdown"] |
| 24 | + |
| 25 | +# generate test cases for each format and output |
| 26 | + |
| 27 | +format_output = {} |
| 28 | + |
| 29 | + |
| 30 | +for fmt in formats: |
| 31 | + res = chdb.query("SELECT * FROM file('" + data_file + "', Parquet) limit 10", fmt) |
| 32 | + if fmt == "ArrowTable": |
| 33 | + data = reset_elapsed(f"{res}") |
| 34 | + else: |
| 35 | + data = reset_elapsed(res.get_memview().tobytes()) |
| 36 | + print("format: " + fmt + " size: " + str(len(data))) |
| 37 | + format_output[fmt] = {"len": len(data), "data": data} |
| 38 | + |
| 39 | +# dump to py dict for import later |
| 40 | +with open(os.path.join(current_dir, "format_output.py"), "w") as f: |
| 41 | + f.write("format_output = ") |
| 42 | + pprint.pprint(format_output, stream=f) |
0 commit comments