Skip to content

Commit 68fce79

Browse files
committed
[Benchmark] Support index field in connectors
Index support added for mutable, DuckDB, and PostgreSQL. Notably, HyPer is not yet supported as index support is currently disabled in `tableauhyperapi`. The method field is ignored by all databases but mutable.
1 parent 1d37833 commit 68fce79

File tree

5 files changed

+67
-2
lines changed

5 files changed

+67
-2
lines changed

benchmark/database_connectors/connector.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,28 @@ def check_with_scale_factors(params: dict[str, Any]) -> bool:
111111
return False
112112

113113

114+
# Generates statements for creating indexes.
115+
@staticmethod
116+
def generate_create_index_stmts(table_name: str, indexes: dict[str, dict[str, Any]]) -> list[str]:
117+
create_indexes: list[str] = list()
118+
for index_name, index in indexes.items():
119+
method: str | None = index.get('method') # ignored
120+
attributes: str | list[str] = index['attributes']
121+
if isinstance(attributes, list):
122+
attributes = ', '.join(attributes)
123+
create_indexes.append(f'CREATE INDEX {index_name} ON "{table_name}" ({attributes});')
124+
return create_indexes
125+
126+
127+
# Generates statements for dropping indexes.
128+
@staticmethod
129+
def generate_drop_index_stmts(indexes: dict[str, dict[str, Any]]) -> list[str]:
130+
drop_indexes: list[str] = list()
131+
for index_name, _ in indexes.items():
132+
drop_indexes.append(f'DROP INDEX IF EXISTS {index_name};')
133+
return drop_indexes
134+
135+
114136
#===================================================================================================================
115137
# Start the shell with `command` and pass `query` to its stdin. If the process does not respond after `timeout`
116138
# milliseconds, raise a ExperimentTimeoutExpired(). Return the stdout of the process containing the

benchmark/database_connectors/duckdb.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@ def execute(self, n_runs: int, params: dict[str, Any]) -> ConnectorResult:
8282
# Create tables from tmp tables with scale factor
8383
for table_name, table in params['data'].items():
8484
statements.append(f'DELETE FROM "{table_name}";') # empty existing table
85+
drop_indexes: list[str] = self.generate_drop_index_stmts(table.get('indexes', dict()))
86+
statements.extend(drop_indexes) # drop indexes
8587
sf: float | int
8688
if table.get('scale_factors') is not None:
8789
scale_factors = table['scale_factors']
@@ -96,6 +98,8 @@ def execute(self, n_runs: int, params: dict[str, Any]) -> ConnectorResult:
9698
header: int = int(table.get('header', 0))
9799
num_rows: int = round((table['lines_in_file'] - header) * sf)
98100
statements.append(f'INSERT INTO "{table_name}" SELECT * FROM "{table_name}{COMPLETE_TABLE_SUFFIX}" LIMIT {num_rows};')
101+
create_indexes: list[str] = self.generate_create_index_stmts(table_name, table.get('indexes', dict()))
102+
statements.extend(create_indexes) # create indexes
99103

100104
statements.append(".timer on")
101105
statements.append(query_stmt) # Actual query from this case
@@ -130,13 +134,16 @@ def execute(self, n_runs: int, params: dict[str, Any]) -> ConnectorResult:
130134
statements.extend(complete_tables)
131135
statements.extend(actual_tables)
132136

133-
# Dropping and recreating tables in between runs removes any cache influences
137+
# Dropping and recreating tables and indexes in between runs removes any cache influences
134138
refill_stmts: list[str] = list()
135139
for name, table in params['data'].items():
136140
refill_stmts.append(f'DROP TABLE "{name}";')
141+
# Dropping a table also drops its indexes
137142
refill_stmts.extend(actual_tables)
138143
for name, table in params['data'].items():
139144
refill_stmts.append(f'INSERT INTO "{name}" (SELECT * FROM "{name}{COMPLETE_TABLE_SUFFIX}");')
145+
create_indexes: list[str] = self.generate_create_index_stmts(name, table.get('indexes', dict()))
146+
refill_stmts.extend(create_indexes)
140147

141148
for _ in range(n_runs):
142149
statements.extend(refill_stmts)

benchmark/database_connectors/hyper.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,10 @@ def _execute(n_runs: int, params: dict[str, Any]) -> ConnectorResult:
9292
# Set up tables
9393
for table_name, table in params['data'].items():
9494
connection.execute_command(f'DELETE FROM "{table_name}";') # Empty table first
95+
# Index support is currently disabled in tableauhyperapi
96+
# drop_indexes: list[str] = HyPer.generate_drop_index_stmts(table.get('indexes', dict()))
97+
# for stmt in drop_indexes:
98+
# connection.execute_command(stmt) # Drop indexes
9599

96100
sf: float | int
97101
if table.get('scale_factors') is not None:
@@ -107,6 +111,10 @@ def _execute(n_runs: int, params: dict[str, Any]) -> ConnectorResult:
107111
header: int = int(table.get('header', 0))
108112
num_rows: int = round((table['lines_in_file'] - header) * sf)
109113
connection.execute_command(f'INSERT INTO "{table_name}" SELECT * FROM "{table_name}_tmp" LIMIT {num_rows};')
114+
# Index support is currently disabled in tableauhyperapi
115+
# create_indexes: list[str] = HyPer.generate_create_index_stmts(table_name, table.get('indexes', dict()))
116+
# for stmt in create_indexes:
117+
# connection.execute_command(stmt) # Create indexes
110118

111119
# Execute query
112120
with connection.execute_query(query) as result:

benchmark/database_connectors/mutable.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,10 @@ def get_setup_statements(self, suite: str, path_to_data: str, data: dict[str, di
257257
import_str += ' HAS HEADER SKIP HEADER'
258258
statements.append(import_str + ';')
259259

260+
# Create CREATE INDEX statements for current table
261+
create_indexes: list[str] = self.generate_create_index_stmts(table_name, table.get('indexes', dict()))
262+
statements.extend(create_indexes)
263+
260264
return statements
261265

262266

@@ -283,6 +287,23 @@ def parse_results(results: str, pattern: str) -> list[float]:
283287
return durations
284288

285289

290+
# Overrides `generate_create_index_stmts` from Connector ABC
291+
@staticmethod
292+
def generate_create_index_stmts(table_name: str, indexes: dict[str, dict[str, Any]]) -> list[str]:
293+
create_indexes: list[str] = list()
294+
for index_name, index in indexes.items():
295+
method: str | None = index.get('method')
296+
attributes: str | list[str] = index['attributes']
297+
if isinstance(attributes, list):
298+
attributes = ', '.join(attributes)
299+
index_str: str = f'CREATE INDEX {index_name} ON {table_name}'
300+
if method:
301+
index_str += f' USING {method}'
302+
index_str += f' ({attributes});'
303+
create_indexes.append(index_str)
304+
return create_indexes
305+
306+
286307
# Overrides `print_command` from Connector ABC
287308
def print_command(self, command: str | bytes | Sequence[str | bytes], query: str, indent: str = '') -> None:
288309
assert isinstance(command, Sequence) and isinstance(command[0], str) and not isinstance(command, str), \

benchmark/database_connectors/postgresql.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,11 @@ def execute(self, n_runs: int, params: dict[str, Any]) -> ConnectorResult:
9393
header: int = int(table.get('header', 0))
9494
num_rows: int = round((table['lines_in_file'] - header) * sf)
9595
cursor.execute(f'DELETE FROM "{table_name}";') # empty existing table
96+
drop_indexes: list[str] = self.generate_drop_index_stmts(table.get('indexes', dict()))
97+
cursor.execute(''.join(drop_indexes))
9698
cursor.execute(f'INSERT INTO "{table_name}" SELECT * FROM "{table_name}{COMPLETE_TABLE_SUFFIX}" LIMIT {num_rows};') # copy data with scale factor
99+
create_indexes: list[str] = self.generate_create_index_stmts(table_name, table.get('indexes', dict()))
100+
cursor.execute(''.join(create_indexes))
97101
finally:
98102
connection.close()
99103
del connection
@@ -129,13 +133,16 @@ def execute(self, n_runs: int, params: dict[str, Any]) -> ConnectorResult:
129133
# Prepare db
130134
actual_tables: list[str] = self.prepare_db(params)
131135

132-
# Dropping and recreating tables in between runs removes any cache influences
136+
# Dropping and recreating tables and indexes in between runs removes any cache influences
133137
refill_stmts: list[str] = list()
134138
for name, table in params['data'].items():
135139
refill_stmts.append(f'DROP TABLE "{name}";')
140+
# Dropping a table also drops its indexes
136141
refill_stmts.extend(actual_tables)
137142
for name, table in params['data'].items():
138143
refill_stmts.append(f'INSERT INTO "{name}" (SELECT * FROM "{name}{COMPLETE_TABLE_SUFFIX}");')
144+
create_indexes: list[str] = self.generate_create_index_stmts(name, table.get('indexes', dict()))
145+
refill_stmts.extend(create_indexes)
139146

140147
# Write cases/queries to a file that will be passed to the command to execute
141148
with open(TMP_SQL_FILE, "w") as tmp:

0 commit comments

Comments
 (0)