6
6
import os
7
7
8
8
9
- TMP_DB = 'tmp.duckdb'
9
+ TMP_DB : str = 'tmp.duckdb'
10
+ COMPLETE_TABLE_SUFFIX : str = '_complete'
10
11
11
12
12
13
@typechecked
@@ -56,7 +57,9 @@ def execute(self, n_runs: int, params: dict[str, Any]) -> ConnectorResult:
56
57
57
58
try :
58
59
# Set up database
59
- create_tbl_stmts : list [str ] = self .generate_create_table_stmts (params ['data' ], self .check_with_scale_factors (params ))
60
+ complete_tables : list [str ]
61
+ actual_tables : list [str ]
62
+ complete_tables , actual_tables = self .generate_create_table_stmts (params ['data' ])
60
63
61
64
if self .check_execute_single_cases (params ):
62
65
# Execute cases singly
@@ -69,8 +72,9 @@ def execute(self, n_runs: int, params: dict[str, Any]) -> ConnectorResult:
69
72
70
73
statements : list [str ] = list ()
71
74
if i == 0 :
72
- # Also use the create_tbl_stmts in this case
73
- statements .extend (create_tbl_stmts )
75
+ # Also use the CREATE TABLE stmts in this case
76
+ statements .extend (complete_tables )
77
+ statements .extend (actual_tables )
74
78
75
79
# Create tables from tmp tables with scale factor
76
80
for table_name , table in params ['data' ].items ():
@@ -82,7 +86,7 @@ def execute(self, n_runs: int, params: dict[str, Any]) -> ConnectorResult:
82
86
sf = 1
83
87
header : int = int (table .get ('header' , 0 ))
84
88
num_rows : int = round ((table ['lines_in_file' ] - header ) * sf )
85
- statements .append (f'INSERT INTO "{ table_name } " SELECT * FROM "{ table_name } _tmp " LIMIT { num_rows } ;' )
89
+ statements .append (f'INSERT INTO "{ table_name } " SELECT * FROM "{ table_name } { COMPLETE_TABLE_SUFFIX } " LIMIT { num_rows } ;' )
86
90
87
91
statements .append (".timer on" )
88
92
statements .append (query_stmt ) # Actual query from this case
@@ -114,13 +118,23 @@ def execute(self, n_runs: int, params: dict[str, Any]) -> ConnectorResult:
114
118
timeout : int = DEFAULT_TIMEOUT + TIMEOUT_PER_CASE * len (cases ) * n_runs
115
119
116
120
statements : list [str ] = list ()
117
- statements .extend (create_tbl_stmts )
121
+ statements .extend (complete_tables )
122
+ statements .extend (actual_tables )
123
+
124
+ # Dropping and recreating tables in between runs removes any cache influences
125
+ refill_stmts : list [str ] = list ()
126
+ for name , table in params ['data' ].items ():
127
+ refill_stmts .append (f'DROP TABLE "{ name } ";' )
128
+ refill_stmts .extend (actual_tables )
129
+ for name , table in params ['data' ].items ():
130
+ refill_stmts .append (f'INSERT INTO "{ name } " (SELECT * FROM "{ name } { COMPLETE_TABLE_SUFFIX } ");' )
118
131
119
- statements .append (".timer on" )
120
132
for _ in range (n_runs ):
133
+ statements .extend (refill_stmts )
134
+ statements .append (".timer on" )
121
135
for case_query in cases .values ():
122
136
statements .append (case_query )
123
- statements .append (".timer off" )
137
+ statements .append (".timer off" )
124
138
125
139
combined_query : str = "\n " .join (statements )
126
140
@@ -157,20 +171,17 @@ def clean_up(self) -> None:
157
171
os .remove (TMP_DB )
158
172
159
173
160
- # Creates tables in the database and copies contents of given files into them
161
- # Call with 'with_scale_factors'=False if data should be loaded as a whole
162
- # Call with 'with_scale_factors'=True if data should be placed in tmp tables
163
- # and copied for each case with different scale factor
164
- def generate_create_table_stmts ( self , data : dict [ str , dict [ str , Any ]], with_scale_factors : bool ) -> list [str ]:
165
- statements : list [str ] = list ()
174
+ # Creates tables in the database and copies contents of given files into them.
175
+ # The complete data is in the 'T_complete' tables. For the individual cases the actual table T
176
+ # can be filled using 'INSERT INTO T (SELECT * FROM T_complete LIMIT x)'
177
+ def generate_create_table_stmts ( self , data : dict [ str , dict [ str , Any ]]) -> tuple [ list [ str ], list [ str ]]:
178
+ complete_tables : list [str ] = list ()
179
+ actual_tables : list [str ] = list ()
166
180
for table_name , table in data .items ():
167
181
columns : str = Connector .parse_attributes (self .DUCKDB_TYPE_PARSER , table ['attributes' ])
168
182
169
- if with_scale_factors :
170
- table_name += "_tmp"
171
-
172
- create : str = f'CREATE TABLE "{ table_name } " { columns } ;'
173
- copy : str = f'COPY "{ table_name } " FROM \' { table ["file" ]} \' ( '
183
+ create : str = f'CREATE TABLE "{ table_name } { COMPLETE_TABLE_SUFFIX } " { columns } ;'
184
+ copy : str = f'COPY "{ table_name } { COMPLETE_TABLE_SUFFIX } " FROM \' { table ["file" ]} \' ( '
174
185
if 'delimiter' in table :
175
186
delim = table ['delimiter' ].replace ("'" , "" )
176
187
copy += f" DELIMITER \' { delim } \' ,"
@@ -181,14 +192,13 @@ def generate_create_table_stmts(self, data: dict[str, dict[str, Any]], with_scal
181
192
182
193
copy = copy [:- 1 ] + " );"
183
194
184
- statements .append (create )
185
- statements .append (copy )
195
+ complete_tables .append (create )
196
+ complete_tables .append (copy )
186
197
187
- if with_scale_factors :
188
- # Create actual table that will be used for experiment
189
- statements .append (f'CREATE TABLE "{ table_name [:- 4 ]} " { columns } ;' )
198
+ # Create actual table that will be used for experiment
199
+ actual_tables .append (f'CREATE TABLE "{ table_name } " { columns } ;' )
190
200
191
- return statements
201
+ return complete_tables , actual_tables
192
202
193
203
194
204
# Parse `results` for timings
0 commit comments