Skip to content

Commit a8cdc68

Browse files
committed
Fix Date and Date32 handling
1 parent 392e9ff commit a8cdc68

File tree

2 files changed

+64
-17
lines changed

2 files changed

+64
-17
lines changed

src/Processors/Sources/PythonSource.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,10 @@ Chunk PythonSource::scanDataToChunk()
392392
columns[i] = convert_and_insert_array<UInt32>(col, cursor, count);
393393
else if (which.isDateTime64())
394394
columns[i] = convert_and_insert_array<DateTime64>(col, cursor, count);
395+
else if (which.isDate32())
396+
columns[i] = convert_and_insert_array<Int32>(col, cursor, count);
397+
else if (which.isDate())
398+
columns[i] = convert_and_insert_array<UInt16>(col, cursor, count);
395399
else if (which.isString())
396400
columns[i] = convert_and_insert_array<String>(col, cursor, count);
397401
else

tests/test_query_py.py

Lines changed: 60 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
#!python3
22

3-
from io import StringIO
3+
import io
4+
import random
45
import unittest
56
import numpy as np
67
import pandas as pd
78
import pyarrow as pa
9+
from pyarrow import csv
810
import chdb
911

1012

@@ -27,6 +29,19 @@
2729
717410,0.6095994785374601,draw
2830
"""
2931

32+
SCORES_CSV = """score,result,dateOfBirth
33+
758270,lose,1983-07-24
34+
355079,win,2000-11-27
35+
451231,lose,1980-03-11
36+
854953,lose,1996-08-10
37+
294257,lose,1966-12-12
38+
756327,lose,1997-08-29
39+
379755,lose,1981-10-24
40+
916108,lose,1950-08-30
41+
467033,win,2007-09-15
42+
639860,win,1989-06-30
43+
"""
44+
3045
class myReader(chdb.PyReader):
3146
def __init__(self, data):
3247
self.data = data
@@ -43,6 +58,17 @@ def read(self, col_names, count):
4358

4459

4560
class TestQueryPy(unittest.TestCase):
61+
# def test_query_np(self):
62+
# t3 = {
63+
# "a": np.array([1, 2, 3, 4, 5, 6]),
64+
# "b": np.array(["tom", "jerry", "auxten", "tom", "jerry", "auxten"]),
65+
# }
66+
67+
# ret = chdb.query(
68+
# "SELECT b, sum(a) FROM Python(t3) GROUP BY b ORDER BY b", "debug"
69+
# )
70+
# self.assertEqual(str(ret), EXPECTED)
71+
4672
def test_query_py(self):
4773
reader = myReader(
4874
{
@@ -74,7 +100,7 @@ def test_query_arrow(self):
74100
)
75101

76102
ret = chdb.query(
77-
"SELECT b, sum(a) FROM Python(table) GROUP BY b ORDER BY b", "debug"
103+
"SELECT b, sum(a) FROM Python(table) GROUP BY b ORDER BY b"
78104
)
79105
self.assertEqual(str(ret), EXPECTED)
80106

@@ -87,20 +113,38 @@ def test_query_arrow2(self):
87113
)
88114

89115
ret = chdb.query(
90-
"SELECT b, sum(a) FROM Python(t2) GROUP BY b ORDER BY b", "debug"
116+
"SELECT b, sum(a) FROM Python(t2) GROUP BY b ORDER BY b"
91117
)
92118
self.assertEqual(str(ret), EXPECTED)
93119

94-
# def test_query_np(self):
95-
# t3 = {
96-
# "a": np.array([1, 2, 3, 4, 5, 6]),
97-
# "b": np.array(["tom", "jerry", "auxten", "tom", "jerry", "auxten"]),
98-
# }
120+
def test_query_arrow3(self):
121+
table = csv.read_csv(io.BytesIO(SCORES_CSV.encode()))
122+
ret = chdb.query(
123+
"""
124+
SELECT sum(score), avg(score), median(score),
125+
avgIf(score, dateOfBirth > '1980-01-01') as avgIf,
126+
countIf(result = 'win') AS wins,
127+
countIf(result = 'draw') AS draws,
128+
countIf(result = 'lose') AS losses,
129+
count()
130+
FROM Python(table)
131+
""",
132+
)
133+
self.assertEqual(
134+
str(ret),
135+
"5872873,587287.3,553446.5,470878.25,3,0,7,10\n",
136+
)
99137

100-
# ret = chdb.query(
101-
# "SELECT b, sum(a) FROM Python(t3) GROUP BY b ORDER BY b", "debug"
102-
# )
103-
# self.assertEqual(str(ret), EXPECTED)
138+
def test_random_float(self):
139+
x = {"col1": [random.uniform(0, 1) for _ in range(0, 100000)]}
140+
ret = chdb.sql(
141+
"""
142+
select avg(col1)
143+
FROM Python(x)
144+
"""
145+
)
146+
print(ret.bytes())
147+
self.assertAlmostEqual(float(ret.bytes()), 0.5, delta=0.01)
104148

105149
def test_query_dict(self):
106150
data = {
@@ -109,7 +153,7 @@ def test_query_dict(self):
109153
}
110154

111155
ret = chdb.query(
112-
"SELECT b, sum(a) FROM Python(data) GROUP BY b ORDER BY b", "debug"
156+
"SELECT b, sum(a) FROM Python(data) GROUP BY b ORDER BY b"
113157
)
114158
self.assertEqual(str(ret), EXPECTED)
115159

@@ -120,7 +164,7 @@ def test_query_dict_int(self):
120164
}
121165

122166
ret = chdb.query(
123-
"SELECT b, sum(a) FROM Python(data) GROUP BY b ORDER BY b", "debug"
167+
"SELECT b, sum(a) FROM Python(data) GROUP BY b ORDER BY b"
124168
)
125169
self.assertEqual(
126170
str(ret),
@@ -131,7 +175,7 @@ def test_query_dict_int(self):
131175
)
132176

133177
def test_query_pd_csv(self):
134-
csv_data = pd.read_csv(StringIO(SMALL_CSV))
178+
csv_data = pd.read_csv(io.StringIO(SMALL_CSV))
135179
ret = chdb.query(
136180
"""
137181
SELECT sum(score1), avg(score1), median(score1),
@@ -145,8 +189,7 @@ def test_query_pd_csv(self):
145189
)
146190
self.assertEqual(
147191
str(ret),
148-
"""4099877,409987.7,414399.5,6.128691345453262,0.6128691345453262,0.5693101584911346,1,5,4,10
149-
""",
192+
"4099877,409987.7,414399.5,6.128691345453262,0.6128691345453262,0.5693101584911346,1,5,4,10\n",
150193
)
151194

152195

0 commit comments

Comments
 (0)