mutable-org
diff --git a/‎benchmark/Benchmark.py
Lines changed: 106 additions & 298 deletions b/‎benchmark/Benchmark.py
Lines changed: 106 additions & 298 deletions
diff --git a/‎benchmark/_schema.yml
Lines changed: 35 additions & 22 deletions b/‎benchmark/_schema.yml
Lines changed: 35 additions & 22 deletions
diff --git a/‎benchmark/database_connectors/connector.py
Lines changed: 27 additions & 0 deletions b/‎benchmark/database_connectors/connector.py
Lines changed: 27 additions & 0 deletions
diff --git a/‎benchmark/database_connectors/duckdb.py
Lines changed: 170 additions & 0 deletions b/‎benchmark/database_connectors/duckdb.py
Lines changed: 170 additions & 0 deletions
@@ -1,28 +1,19 @@
 description: str()
-version: int(required=False, min=1)
 suite: str()
 benchmark: str()
 name: str(required=False)
 readonly: bool()
-pattern: str()
-args: str(required=False)
-configurations: map(str(), required=False)
-tables: include('table_list')
-cases: map(any(str(), include('case')), key=any())
-compare_to: map(required=False)
-chart: include('chart', required=False)
+chart: include('chart_def', required=False)
+data: map(include('table'), required=False)      # Map from table name to 'table'
+systems:
+    mutable: include('mutable', required=False)
+    PostgreSQL: include('PostgreSQL', required=False)
+    DuckDB: include('DuckDB', required=False)
+    HyPer: include('HyPer', required=False)
 ---
-table:
-    name: str()                             # table name
-    path: str(required=False)               # path to table file
-    sf: num(required=False, min=0, max=1)   # scale factor as portion of file to load; defaults to 1
-    delimiter: str(required=False)          # defaults to ','
-    header: int(required=False)             # 1 if file has header, 0 otherwise; defaults to 0
-table_list:
-    list(any(str(), include('table')), required=True)
-case:
-    query: str()
-    tables: include('table_list')
+chart_def:
+    x: include('axis', required=False)
+    y: include('axis', required=False)
 ---
 axis:
     # Kind of scale, one of
@@ -37,6 +28,28 @@ axis:
     type: str(required=False)
     # A label for the axis
     label: str(required=False)
-chart:
-    x: include('axis', required=False)
-    y: include('axis', required=False)
+---
+table:
+    attributes: map(str(), key=str(), required=False)           # table column names and types
+    file: str(required=False)                   # path to table file
+    delimiter: str(required=False)              # defaults to ','
+    header: int()                               # 1 if file has header, 0 otherwise; defaults to 0
+    format: str(required=False)                 # file format
+    scale_factors: map(num(min=0, max=1), required=False) # map from case name to scale factor (portion of file to load)
+    lines_in_file: int(required=False)          # Number of lines in the file. Is counted and added by the benchmark script
+---
+mutable:
+    cases: include('cases')
+    pattern: str()
+    args: str(required=False)
+    configurations: map(str(), required=False)
+PostgreSQL:
+    cases: include('cases')
+DuckDB:
+    cases: include('cases')
+HyPer:
+    single_core: bool(required=False)
+    all_cores: bool(required=False)
+    cases: include('cases')
+---
+cases: map(str(), key=any())
@@ -0,0 +1,27 @@
+from abc import ABC, abstractmethod
+
+class Connector(ABC):
+
+    # Function that performs an experiment n_runs times given the parameters `params`.
+    # Returns a dict with the measured times for the experiment and configuration.
+    # Result has the form:
+    # results
+    # └── configurations
+    #     └── cases
+    #         └── times (list)
+    #
+    # results:          configuration name      --> configuration
+    # configuration:    case  --> times
+    # times:            list of floats (size=n_runs)
+    #
+    # Example: (n_runs=2)
+    #   {
+    #       'PostgreSQL':
+    #           1: [1235.093, 1143.43],
+    #           2: [1033.711, 1337.37],
+    #           3: [1043.452, 1010.01],
+    #           4: [1108.702, 1234.56]
+    #   }
+    @abstractmethod
+    def execute(self, n_runs: int, params: dict):
+        pass
@@ -0,0 +1,170 @@
+from .connector import *
+
+import duckdb
+import os
+import json
+
+
+TMP_DB = 'tmp.duckdb'
+TMP_SQL_FILE = 'tmp.sql'
+
+# TODO way of measuring time is wrong. Use duckdb_cli like in older version.
+
+class DuckDB(Connector):
+
+    def __new__(cls, *args, **kwargs):
+        return super().__new__(cls)
+
+
+    def __init__(self, duckdb_cli, verbose=False):
+        self.duckdb_cli=duckdb_cli
+
+
+    # Runs an experiment 'n_runs' times, all parameters are in 'params'
+    def execute(self, n_runs, params: dict):
+        self.clean_up()
+
+        measurement_times = dict()      # map that is returned with the measured times
+
+        # Check wether tables contain scale factors
+        with_scale_factors = False
+        for table in params['data'].values():
+            if (table.get('scale_factors')):
+                with_scale_factors = True
+                break
+
+        for _ in range(n_runs):
+            try:
+                # Set up database
+                self.generate_create_table_stmts(params['data'], with_scale_factors)
+
+
+                # If tables contain scale factors, they have to be loaded separately for every case
+                if (with_scale_factors and bool(params.get('readonly'))):
+                    # Write cases/queries to a file that will be passed to the command to execute
+                    statements = list()
+                    for case, query_stmt in params['cases'].items():
+                        # Create tables from tmp tables with scale factor
+                        for table_name, table in params['data'].items():
+                            statements.append(f"DELETE FROM {table_name};")     # empty existing table
+                            if table.get('scale_factors'):
+                                sf = table['scale_factors'][case]
+                            else:
+                                sf = 1
+                            header = int(table.get('header', 0))
+                            num_rows = round((table['lines_in_file'] - header) * sf)
+                            statements.append(f"INSERT INTO {table_name} SELECT * FROM {table_name}_tmp LIMIT {num_rows};")
+
+                        statements.append(".timer on")
+                        statements.append(query_stmt)   # Actual query from this case
+                        statements.append(".timer off")
+
+                    # Append statements to file
+                    with open(TMP_SQL_FILE, "a+") as tmp:
+                        for stmt in statements:
+                            tmp.write(stmt + "\n")
+
+
+
+                # Otherwise, tables have to be created just once before the measurements (done above)
+                else:
+                    # Write cases/queries to a file that will be passed to the command to execute
+                    with open(TMP_SQL_FILE, "a+") as tmp:
+                        tmp.write(".timer on\n")
+                        for case_query in params['cases'].values():
+                            tmp.write(case_query + '\n')
+                        tmp.write(".timer off\n")
+
+
+                # Execute query file and collect measurement data
+                command = f"./{self.duckdb_cli} {TMP_DB} < {TMP_SQL_FILE}" + " | grep 'Run Time' | cut -d ' ' -f 5 | awk '{print $1 * 1000;}'"
+                stream = os.popen(f'{command}')
+                for idx, line in enumerate(stream):
+                    time = float(line.replace("\n", "").replace(",", ".")) # in milliseconds
+                    case = list(params['cases'].keys())[idx]
+                    if case not in measurement_times.keys():
+                        measurement_times[case] = list()
+                    measurement_times[case].append(time)
+                stream.close()
+
+
+            finally:
+                self.clean_up()
+
+        return {'DuckDB': measurement_times}
+
+
+    # Deletes the used temporary database
+    def clean_up(self):
+        if os.path.exists(TMP_DB):
+            os.remove(TMP_DB)
+        if os.path.exists(TMP_SQL_FILE):
+            os.remove(TMP_SQL_FILE)
+
+
+    # Parse attributes of one table, return as string
+    def parse_attributes(self, attributes: dict):
+        columns = '('
+        for column_name, ty in attributes.items():
+            not_null = 'NOT NULL' if 'NOT NULL' in ty else ''
+            ty = ty.split(' ')
+            match (ty[0]):
+                case 'INT':
+                    typ = 'INT'
+                case 'CHAR':
+                    typ = f'CHAR({ty[1]})'
+                case 'DECIMAL':
+                    typ = f'DECIMAL({ty[1]},{ty[2]})'
+                case 'DATE':
+                    typ = 'DATE'
+                case 'DOUBLE':
+                    typ = 'DOUBLE'
+                case 'FLOAT':
+                    typ = 'REAL'
+                case 'BIGINT':
+                    typ = 'BIGINT'
+                case _:
+                    raise Exception(f"Unknown type given for '{column_name}'")
+            columns += f"{column_name} {typ} {not_null}, "
+        columns = columns[:-2] + ')'
+        return columns
+
+
+    # Creates tables in the database and copies contents of given files into them
+    # Call with 'with_scale_factors'=False if data should be loaded as a whole
+    # Call with 'with_scale_factors'=True if data should be placed in tmp tables
+    # and copied for each case with different scale factor
+    def generate_create_table_stmts(self, data: dict, with_scale_factors):
+        statements = list()
+        for table_name, table in data.items():
+            columns = self.parse_attributes(table['attributes'])
+
+            delimiter = table.get('delimiter')
+            header = table.get('header')
+            format = table['format'].upper()
+
+            if with_scale_factors:
+                table_name += "_tmp"
+
+            create = f"CREATE TABLE {table_name} {columns};"
+            copy = f"COPY {table_name} FROM '{table['file']}' ( "
+            if delimiter:
+                delim = delimiter.replace("'", "")
+                copy += f" DELIMITER \'{delim}\',"
+            if format:
+                copy += f" FORMAT {format},"
+            if header:
+                copy += f" HEADER," if (header==1) else ""
+
+            copy = copy[:-1] + " );"
+
+            statements.append(create)
+            statements.append(copy)
+
+            if with_scale_factors:
+                # Create actual table that will be used for experiment
+                statements.append(f"CREATE TABLE {table_name[:-4]} {columns};")
+
+        with open(TMP_SQL_FILE, "w") as tmp:
+            for stmt in statements:
+                tmp.write(stmt + "\n")