diff --git a/dataframe-jdbc/api/dataframe-jdbc.api b/dataframe-jdbc/api/dataframe-jdbc.api index ff54fb1b24..b4b3ccc532 100644 --- a/dataframe-jdbc/api/dataframe-jdbc.api +++ b/dataframe-jdbc/api/dataframe-jdbc.api @@ -39,10 +39,10 @@ public final class org/jetbrains/kotlinx/dataframe/io/ReadJdbcKt { public static final fun getDataFrameSchema (Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema; public static synthetic fun getDataFrameSchema$default (Ljava/sql/Connection;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema; public static synthetic fun getDataFrameSchema$default (Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema; - public static final fun getSchemaForAllSqlTables (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/Connection;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Ljava/util/Map; - public static final fun getSchemaForAllSqlTables (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Ljava/util/Map; - public static synthetic fun getSchemaForAllSqlTables$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/Connection;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Ljava/util/Map; - public static synthetic fun getSchemaForAllSqlTables$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Ljava/util/Map; + public static final fun getSchemaForAllSqlTables (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/Connection;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;Ljava/util/List;)Ljava/util/Map; + public static final fun getSchemaForAllSqlTables (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;Ljava/util/List;)Ljava/util/Map; + public static synthetic fun getSchemaForAllSqlTables$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/Connection;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;Ljava/util/List;ILjava/lang/Object;)Ljava/util/Map; + public static synthetic fun getSchemaForAllSqlTables$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;Ljava/util/List;ILjava/lang/Object;)Ljava/util/Map; public static final fun getSchemaForResultSet (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/ResultSet;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema; public static final fun getSchemaForSqlQuery (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/Connection;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema; public static final fun getSchemaForSqlQuery (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema; @@ -52,10 +52,10 @@ public final class org/jetbrains/kotlinx/dataframe/io/ReadJdbcKt { public static final fun getSchemaForSqlTable (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema; public static synthetic fun getSchemaForSqlTable$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/Connection;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema; public static synthetic fun getSchemaForSqlTable$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/schema/DataFrameSchema; - public static final fun readAllSqlTables (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/Connection;Ljava/lang/String;IZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Ljava/util/Map; - public static final fun readAllSqlTables (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;IZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Ljava/util/Map; - public static synthetic fun readAllSqlTables$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/Connection;Ljava/lang/String;IZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Ljava/util/Map; - public static synthetic fun readAllSqlTables$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;IZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;ILjava/lang/Object;)Ljava/util/Map; + public static final fun readAllSqlTables (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/Connection;Ljava/lang/String;IZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;Ljava/util/List;)Ljava/util/Map; + public static final fun readAllSqlTables (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;IZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;Ljava/util/List;)Ljava/util/Map; + public static synthetic fun readAllSqlTables$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Ljava/sql/Connection;Ljava/lang/String;IZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;Ljava/util/List;ILjava/lang/Object;)Ljava/util/Map; + public static synthetic fun readAllSqlTables$default (Lorg/jetbrains/kotlinx/dataframe/DataFrame$Companion;Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;IZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;Ljava/util/List;ILjava/lang/Object;)Ljava/util/Map; public static final fun readDataFrame (Ljava/sql/Connection;Ljava/lang/String;IZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;Z)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static final fun readDataFrame (Ljava/sql/ResultSet;Ljava/sql/Connection;IZLorg/jetbrains/kotlinx/dataframe/io/db/DbType;)Lorg/jetbrains/kotlinx/dataframe/DataFrame; public static final fun readDataFrame (Ljava/sql/ResultSet;Lorg/jetbrains/kotlinx/dataframe/io/db/DbType;IZ)Lorg/jetbrains/kotlinx/dataframe/DataFrame; diff --git a/dataframe-jdbc/build.gradle.kts b/dataframe-jdbc/build.gradle.kts index f616d61891..ac9a7197ce 100644 --- a/dataframe-jdbc/build.gradle.kts +++ b/dataframe-jdbc/build.gradle.kts @@ -26,6 +26,8 @@ dependencies { testImplementation(libs.junit) testImplementation(libs.sl4jsimple) testImplementation(libs.jts) + testImplementation(libs.duckdb.jdbc) + testImplementation(projects.dataframeJson) testImplementation(libs.kotestAssertions) { exclude("org.jetbrains.kotlin", "kotlin-stdlib-jdk8") } diff --git a/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/duckDbTest.kt b/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/duckDbTest.kt new file mode 100644 index 0000000000..e555243ab4 --- /dev/null +++ b/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/duckDbTest.kt @@ -0,0 +1,460 @@ +@file:Suppress("SqlDialectInspection") + +package org.jetbrains.kotlinx.dataframe.io + +import io.kotest.matchers.shouldBe +import org.duckdb.DuckDBColumnType +import org.duckdb.DuckDBColumnType.ARRAY +import org.duckdb.DuckDBColumnType.BIGINT +import org.duckdb.DuckDBColumnType.BIT +import org.duckdb.DuckDBColumnType.BLOB +import org.duckdb.DuckDBColumnType.BOOLEAN +import org.duckdb.DuckDBColumnType.DATE +import org.duckdb.DuckDBColumnType.DECIMAL +import org.duckdb.DuckDBColumnType.DOUBLE +import org.duckdb.DuckDBColumnType.ENUM +import org.duckdb.DuckDBColumnType.FLOAT +import org.duckdb.DuckDBColumnType.HUGEINT +import org.duckdb.DuckDBColumnType.INTEGER +import org.duckdb.DuckDBColumnType.INTERVAL +import org.duckdb.DuckDBColumnType.JSON +import org.duckdb.DuckDBColumnType.LIST +import org.duckdb.DuckDBColumnType.MAP +import org.duckdb.DuckDBColumnType.SMALLINT +import org.duckdb.DuckDBColumnType.STRUCT +import org.duckdb.DuckDBColumnType.TIME +import org.duckdb.DuckDBColumnType.TIMESTAMP +import org.duckdb.DuckDBColumnType.TIMESTAMP_MS +import org.duckdb.DuckDBColumnType.TIMESTAMP_NS +import org.duckdb.DuckDBColumnType.TIMESTAMP_S +import org.duckdb.DuckDBColumnType.TIMESTAMP_WITH_TIME_ZONE +import org.duckdb.DuckDBColumnType.TIME_WITH_TIME_ZONE +import org.duckdb.DuckDBColumnType.TINYINT +import org.duckdb.DuckDBColumnType.UBIGINT +import org.duckdb.DuckDBColumnType.UHUGEINT +import org.duckdb.DuckDBColumnType.UINTEGER +import org.duckdb.DuckDBColumnType.UNION +import org.duckdb.DuckDBColumnType.UNKNOWN +import org.duckdb.DuckDBColumnType.USMALLINT +import org.duckdb.DuckDBColumnType.UTINYINT +import org.duckdb.DuckDBColumnType.UUID +import org.duckdb.DuckDBColumnType.VARCHAR +import org.duckdb.DuckDBConnection +import org.duckdb.DuckDBResultSetMetaData +import org.duckdb.JsonNode +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.api.inferType +import org.jetbrains.kotlinx.dataframe.api.isNotEmpty +import org.jetbrains.kotlinx.dataframe.api.print +import org.jetbrains.kotlinx.dataframe.io.DuckDb.toKType +import org.jetbrains.kotlinx.dataframe.io.db.DbType +import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema +import org.junit.Test +import java.math.BigDecimal +import java.math.BigInteger +import java.sql.Blob +import java.sql.DriverManager +import java.sql.ResultSet +import java.sql.Timestamp +import java.time.LocalDate +import java.time.LocalTime +import java.time.OffsetDateTime +import java.time.OffsetTime +import java.util.UUID +import kotlin.reflect.KType +import kotlin.reflect.KTypeProjection +import kotlin.reflect.full.createType +import kotlin.reflect.full.isSubtypeOf +import kotlin.reflect.full.withNullability +import kotlin.reflect.typeOf + +private const val URL = "jdbc:duckdb:" + +object DuckDb : DbType("duckdb") { + override val driverClassName = "org.duckdb.DuckDBDriver" + + override fun convertSqlTypeToColumnSchemaValue(tableColumnMetadata: TableColumnMetadata): ColumnSchema { + val type = convertSqlTypeToKType(tableColumnMetadata) + return ColumnSchema.Value(type) + } + + // TODO? + override fun isSystemTable(tableMetadata: TableMetadata): Boolean = + tableMetadata.schemaName?.lowercase()?.contains("information_schema") == true || + tableMetadata.schemaName?.lowercase()?.contains("system") == true || + tableMetadata.name.lowercase().contains("system_") + + override fun buildTableMetadata(tables: ResultSet): TableMetadata = + TableMetadata( + tables.getString("TABLE_NAME"), + tables.getString("TABLE_SCHEM"), + tables.getString("TABLE_CAT"), + ) + + /** + * Follows exactly [org.duckdb.DuckDBVector.getObject]. + * + * I added a "// dataframe-jdbc" comment for all types that are covered correctly by + * [org.jetbrains.kotlinx.dataframe.io.makeCommonSqlToKTypeMapping] at the moment, however, to cover + * all nested types, we'll use a full type-map for all [DuckDB types][DuckDBColumnType] exactly. + */ + @Suppress("ktlint:standard:blank-line-between-when-conditions") + internal fun String.toKType(isNullable: Boolean): KType { + val sqlTypeName = this + return when (DuckDBResultSetMetaData.TypeNameToType(sqlTypeName)) { + BOOLEAN -> typeOf() // dataframe-jdbc + TINYINT -> typeOf() + SMALLINT -> typeOf() + INTEGER -> typeOf() // dataframe-jdbc + BIGINT -> typeOf() // dataframe-jdbc + HUGEINT -> typeOf() + UHUGEINT -> typeOf() + UTINYINT -> typeOf() + USMALLINT -> typeOf() + UINTEGER -> typeOf() + UBIGINT -> typeOf() + FLOAT -> typeOf() // dataframe-jdbc + DOUBLE -> typeOf() // dataframe-jdbc + DECIMAL -> typeOf() // dataframe-jdbc + TIME -> typeOf() + TIME_WITH_TIME_ZONE -> typeOf() // dataframe-jdbc + DATE -> typeOf() + TIMESTAMP, TIMESTAMP_MS, TIMESTAMP_NS, TIMESTAMP_S -> typeOf() // dataframe-jdbc + TIMESTAMP_WITH_TIME_ZONE -> typeOf() // dataframe-jdbc + JSON -> typeOf() + BLOB -> typeOf() + UUID -> typeOf() + MAP -> { + val (key, value) = parseMapTypes(sqlTypeName) + Map::class.createType( + listOf( + KTypeProjection.invariant(key.toKType(false)), + KTypeProjection.covariant(value.toKType(true)), + ), + ) + } + + LIST, ARRAY -> { + // TODO requires #1266 and #1273 for specific types + // val listType = parseListType(sqlTypeName) + // Array::class.createType( + // listOf(KTypeProjection.covariant(listType.toKType(true))), + // ) + typeOf() + } + + STRUCT -> typeOf() // TODO requires #1266 for specific types + UNION -> typeOf() // Cannot handle this in Kotlin + VARCHAR -> typeOf() + UNKNOWN, BIT, INTERVAL, ENUM -> typeOf() + }.withNullability(isNullable) + } + + // Parses "MAP(X, Y)" into "X" and "Y", taking parentheses into account + fun parseMapTypes(typeString: String): Pair { + if (!typeString.startsWith("MAP(") || !typeString.endsWith(")")) { + error("invalid MAP type: $typeString") + } + + val content = typeString.removeSurrounding("MAP(", ")") + + // Find the comma that separates key and value types + var parenCount = 0 + var commaIndex = -1 + for (i in content.indices) { + when (content[i]) { + '(' -> parenCount++ + + ')' -> parenCount-- + + ',' -> if (parenCount == 0) { + commaIndex = i + break + } + } + } + + if (commaIndex == -1) error("invalid MAP type: $typeString") + val keyType = content.take(commaIndex).trim() + val valueType = content.substring(commaIndex + 1).trim() + return Pair(keyType, valueType) + } + + // Parses "X[]" and "X[123]" into "X", and "X[][]" into "X[]" + fun parseListType(typeString: String): String { + if (!typeString.endsWith("]")) { + error("invalid LIST/ARRAY type: $typeString") + } + + return typeString.take(typeString.indexOfLast { it == '[' }) + } + + override fun convertSqlTypeToKType(tableColumnMetadata: TableColumnMetadata): KType = + tableColumnMetadata.sqlTypeName.toKType(tableColumnMetadata.isNullable) +} + +class DuckDbTest { + + @Test + fun `read simple dataframe from DuckDB`() { + val df = DriverManager.getConnection(URL).use { connection -> + connection.prepareStatement( + """ + CREATE TABLE IF NOT EXISTS test_table ( + id INTEGER PRIMARY KEY, + name VARCHAR, + age INTEGER, + salary DOUBLE, + hire_date DATE + ) + """.trimIndent(), + ).executeUpdate() + + connection.prepareStatement( + """ + INSERT INTO test_table (id, name, age, salary, hire_date) + VALUES + (1, 'John Doe', 30, 50000.00, '2020-01-15'), + (2, 'Jane Smith', 28, 55000.00, '2021-03-20'), + (3, 'Bob Johnson', 35, 65000.00, '2019-11-10'), + (4, 'Alice Brown', 32, 60000.00, '2020-07-01') + """.trimIndent(), + ).executeUpdate() + + DataFrame.readSqlTable(connection, "test_table", dbType = DuckDb) + } + + df.print(borders = true, columnTypes = true) + df.isNotEmpty() shouldBe true + } + + @Test + fun `read all tables`() { + DriverManager.getConnection(URL).use { connection -> + connection.prepareStatement( + """ + CREATE TABLE IF NOT EXISTS test_table ( + id INTEGER PRIMARY KEY, + name VARCHAR, + age INTEGER, + salary DOUBLE, + hire_date DATE + ) + """.trimIndent(), + ).executeUpdate() + + connection.prepareStatement( + """ + INSERT INTO test_table (id, name, age, salary, hire_date) + VALUES + (1, 'John Doe', 30, 50000.00, '2020-01-15'), + (2, 'Jane Smith', 28, 55000.00, '2021-03-20'), + (3, 'Bob Johnson', 35, 65000.00, '2019-11-10'), + (4, 'Alice Brown', 32, 60000.00, '2020-07-01') + """.trimIndent(), + ).executeUpdate() + + DataFrame.readAllSqlTables(connection = connection, dbType = DuckDb).isNotEmpty() shouldBe true + } + } + + /** + * https://duckdb.org/docs/stable/sql/data_types/overview.html + */ + @Test + fun `read each general-purpose DuckDB type`() { + val df = DriverManager.getConnection(URL).use { connection -> + connection as DuckDBConnection + connection.prepareStatement( + """ + CREATE TABLE IF NOT EXISTS table1 ( + bigint_col BIGINT, + int8_col INT8, + int64_col INT64, + long_col LONG, + bit_col BIT, + bitstring_col BITSTRING, + blob_col BLOB, + bytea_col BYTEA, + binary_col BINARY, + varbinary_col VARBINARY, + boolean_col BOOLEAN, + bool_col BOOL, + logical_col LOGICAL, + date_col DATE, + decimal_col DECIMAL(10,2), + numeric_col NUMERIC(10,2), + double_col DOUBLE, + float8_col FLOAT8, + float_col FLOAT, + float4_col FLOAT4, + real_col REAL, + hugeint_col HUGEINT, + int128_col INT128, + integer_col INTEGER, + int4_col INT4, + int32_col INT32, + int_col INT, + signed_col SIGNED, + interval_col INTERVAL, + json_col JSON, + smallint_col SMALLINT, + int2_col INT2, + int16_col INT16, + short_col SHORT, + time_col TIME, + timestampwtz_col TIMESTAMP WITH TIME ZONE, + timestamptz_col TIMESTAMPTZ, + timestamp_col TIMESTAMP, + datetime_col DATETIME, + tinyint_col TINYINT, + int1_col INT1, + ubigint_col UBIGINT, + uint64_col UINT64, + uhugeint_col UHUGEINT, + uint128_col UINT128, + uint_col UINTEGER, + uint32_col UINT32, + usmallint_col USMALLINT, + uint16_col UINT16, + utinyint_col UTINYINT, + uint8_col UINT8, + uuid_col UUID, + varchar_col VARCHAR, + char_col CHAR(10), + bpchar_col BPCHAR(10), + text_col TEXT, + string_col STRING, + enum_col ENUM('male', 'female', 'other') + ) + """.trimIndent(), + ).executeUpdate() + + connection.prepareStatement( + """ + INSERT INTO table1 VALUES ( + 9223372036854775807, -- bigint + 9223372036854775807, -- int8 + 9223372036854775807, -- int64 + 9223372036854775807, -- long + '1010', -- bit + '1010', -- bitstring + 'DEADBEEF'::BLOB, -- blob + 'DEADBEEF'::BLOB, -- bytea + 'DEADBEEF'::BLOB, -- binary + 'DEADBEEF'::BLOB, -- varbinary + true, -- boolean + true, -- bool + true, -- logical + '2025-06-19', -- date + 123.45, -- decimal + 123.45, -- numeric + 3.14159, -- double + 3.14159, -- float8 + 3.14, -- float + 3.14, -- float4 + 3.14, -- real + '170141183460469231731687303715884105727', -- hugeint + '170141183460469231731687303715884105727', -- int128 + 2147483647, -- integer + 2147483647, -- int4 + 2147483647, -- int32 + 2147483647, -- int + 2147483647, -- signed + INTERVAL '1' YEAR, -- interval + '{"key": "value"}'::JSON, -- json + 32767, -- smallint + 32767, -- int2 + 32767, -- int16 + 32767, -- short + '12:34:56', -- time + '2025-06-19 12:34:56+02', -- timestampwtz + '2025-06-19 12:34:56+02', -- timestamptz + '2025-06-19 12:34:56', -- timestamp + '2025-06-19 12:34:56', -- datetime + 127, -- tinyint + 127, -- int1 + 18446744073709551615, -- ubigint + 18446744073709551615, -- uint64 + '340282366920938463463374607431768211455', -- uhugeint + '340282366920938463463374607431768211455', -- uint128 + 4294967295, -- uinteger + 4294967295, -- uint32 + 65535, -- usmallint + 65535, -- uint16 + 255, -- utinyint + 255, -- uint8 + 'a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11', -- uuid + 'test string', -- varchar + 'test', -- char + 'test', -- bpchar + 'test text', -- text + 'test string', -- string + 'female' -- enum + ) + """.trimIndent(), + ).executeUpdate() + + DataFrame.getSchemaForSqlTable(connection, "table1", dbType = DuckDb).print() + DataFrame.readSqlTable(connection, "table1", dbType = DuckDb) + } + + df.print(columnTypes = true, borders = true) + df.inferType().print(columnTypes = true, borders = true) + + (df.columnTypes() zip df.inferType().columnTypes()).forEach { (provided, inferred) -> + inferred.isSubtypeOf(provided) shouldBe true + } + } + + /** + * https://duckdb.org/docs/stable/sql/data_types/overview.html + */ + @Test + fun `read each nested DuckDB type`() { + val df = DriverManager.getConnection(URL).use { connection -> + connection as DuckDBConnection + connection.prepareStatement( + """ + CREATE TABLE IF NOT EXISTS table2 ( + intarray_col INTEGER[3], + stringarray_col VARCHAR[3], + intlist_col INTEGER[], + stringlist_col VARCHAR[], + stringlistlist_col VARCHAR[][], + intstringmap_col MAP(INTEGER, VARCHAR), + intstrinstinggmap_col MAP(INTEGER, MAP(VARCHAR, VARCHAR)), + ijstruct_col STRUCT(i INTEGER, j VARCHAR), + union_col UNION(num INTEGER, text VARCHAR), + ) + """.trimIndent(), + ).executeUpdate() + + connection.prepareStatement( + """ + INSERT INTO table2 VALUES ( + array_value(1, 2, NULL), -- int array + array_value('a', 'ab', 'abc'), -- string array + list_value(1, 2, 3), -- int list + list_value('a', 'ab', 'abc'), -- string list + list_value(list_value('a', 'ab'), list_value('abc'), NULL), -- string list list + MAP { 1: 'value1', 200: 'value2' }, -- int string map + MAP { 1: MAP { 'value1': 'a', 'value2': 'b' }, 200: MAP { 'value1': 'c', 'value2': 'd' } }, -- int string string map + { 'i': 42, 'j': 'answer' }, -- struct + union_value(num := 2), -- union + ) + """.trimIndent(), + ).executeUpdate() + + DataFrame.getSchemaForSqlTable(connection, "table2", dbType = DuckDb).print() + DataFrame.readSqlTable(connection, "table2", dbType = DuckDb) + } + + df.print(columnTypes = true, borders = true) + df.inferType().print(columnTypes = true, borders = true) + + (df.columnTypes() zip df.inferType().columnTypes()).forEach { (provided, inferred) -> + inferred.isSubtypeOf(provided) shouldBe true + } + } +} diff --git a/examples/notebooks/dev/duckdb/duckdb.ipynb b/examples/notebooks/dev/duckdb/duckdb.ipynb new file mode 100644 index 0000000000..ff8cd97d55 --- /dev/null +++ b/examples/notebooks/dev/duckdb/duckdb.ipynb @@ -0,0 +1,5032 @@ +{ + "cells": [ + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "## DuckDB and dataframe-jdbc\n", + "\n", + "[DuckDB](https://duckdb.org/), for now, is an officially unsupported JDBC source for DataFrame.\n", + "However, a source being officially unsupported does not mean that it is not possible to use it with DataFrame :).\n", + "\n", + "For DuckDB, we actually have two ways to support it:\n", + "\n", + "The first one is to use `dataframe-arrow`, as DuckDB has a [bridge to Arrow](https://duckdb.org/docs/stable/clients/java.html#arrow-methods). [Click here for en example of how to do it](https://github.com/Kotlin/dataframe/blob/62b48942b1aef35f939f2d0aff407872028fd177/dataframe-arrow/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ArrowKtTest.kt#L637). This might work for you, but it is less flexible, as you're forced to use Arrow types.\n", + "\n", + "The second way, which we will show here, is to use `dataframe-jdbc`, as DuckDB has a [JDBC driver](https://duckdb.org/docs/stable/clients/java.html) too.\n", + "This is a more direct approach and shows how you can add other JDBC sources as well, so without any further ado, let's get started!" + ], + "id": "42ad4cd75ea13ecc" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "We're adding the DuckDB dependency before adding DataFrame to make sure classloading works well", + "id": "e81da20f4b3ff48d" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-06-25T09:55:11.389684079Z", + "start_time": "2025-06-25T09:55:09.403546939Z" + } + }, + "cell_type": "code", + "source": [ + "USE {\n", + " dependencies {\n", + " implementation(group = \"org.duckdb\", artifact = \"duckdb_jdbc\", version = \"1.2.2.0\")\n", + " }\n", + "}" + ], + "id": "5d8f65b1a0a8072f", + "outputs": [], + "execution_count": 1 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "And to make sure we use the latest version of DataFrame, we'll use the `%useLatestDescriptors` magic.", + "id": "28b92864a3ddd581" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-06-25T09:55:17.197454634Z", + "start_time": "2025-06-25T09:55:11.777513110Z" + } + }, + "cell_type": "code", + "source": [ + "%useLatestDescriptors\n", + "%use dataframe" + ], + "id": "bb5c30ea2bcb95a3", + "outputs": [], + "execution_count": 2 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "DuckDB can connect to a specific database by path or, if no path is supplied, run in memory!\n", + "This is what we'll use for this example.\n", + "\n", + "[Read more about connecting to DuckDB](https://duckdb.org/docs/stable/clients/java.html)" + ], + "id": "67fba32f59a13ac2" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-06-25T09:55:17.572572502Z", + "start_time": "2025-06-25T09:55:17.467068347Z" + } + }, + "cell_type": "code", + "source": "val URL = \"jdbc:duckdb:\"", + "id": "7e92a0f40e825123", + "outputs": [], + "execution_count": 3 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "Let's insert some example data into our database.\n", + "Usually, a database already has data in it, or, in DuckDB's case, it's connected via an [extension](https://duckdb.org/docs/stable/core_extensions/overview) to another existing database." + ], + "id": "1b0e5d6c2247ac3" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-06-25T09:55:18.925137959Z", + "start_time": "2025-06-25T09:55:17.974574092Z" + } + }, + "cell_type": "code", + "source": [ + "import java.sql.DriverManager\n", + "\n", + "val connection = DriverManager.getConnection(URL)\n", + "\n", + "connection.prepareStatement(\n", + " \"\"\"\n", + " CREATE TABLE IF NOT EXISTS test_table (\n", + " id INTEGER PRIMARY KEY,\n", + " name VARCHAR,\n", + " age INTEGER,\n", + " salary DOUBLE,\n", + " hire_date DATE,\n", + " pets VARCHAR[],\n", + " map MAP(VARCHAR, INT),\n", + " )\n", + " \"\"\".trimIndent(),\n", + ").executeUpdate()\n", + "\n", + "connection.prepareStatement(\n", + " \"\"\"\n", + " INSERT INTO test_table (id, name, age, salary, hire_date, pets, map)\n", + " VALUES\n", + " (1, 'John Doe', 30, 50000.00, '2020-01-15', ['Pookie'], MAP { 'value1': 1, 'value2': 200 }),\n", + " (2, 'Jane Smith', 28, 55000.00, '2021-03-20', [], MAP {}),\n", + " (3, 'Bob Johnson', 35, 65000.00, '2019-11-10', ['Rex', 'Garfield'], MAP { 'value1': 3, 'value2': 4 }),\n", + " (4, 'Alice Brown', 32, 60000.00, '2020-07-01', ['Mia'], MAP { 'value2': 5, 'value1': 6 }),\n", + " (5, 'Charlie Smith', 25, 70000.00, '2022-02-01', ['Max'], MAP { 'value1': 7, 'value2': 8 }),\n", + " (6, 'David Smith', 27, 80000.00, '2022-03-01', ['Max', 'Charlie'], MAP { 'value2': 9, 'value1': 10 }),\n", + " (7, 'Eve Smith', 24, 90000.00, '2022-04-01', ['Max', 'Charlie', 'David'], MAP { 'value1': 11, 'value3': 12 }),\n", + " \"\"\".trimIndent(),\n", + ").executeUpdate()\n" + ], + "id": "6ec14f51ec7f132f", + "outputs": [ + { + "data": { + "text/plain": [ + "7" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 4 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### Attempt 1\n", + "\n", + "Following the [custom JDBC database documentation](https://kotlin.github.io/dataframe/readsqlfromcustomdatabase.html), it seems we need to create our own `DbType` object.\n", + "\n", + "We will skip the `convertSqlTypeToKType` and `convertSqlTypeToColumnSchemaValue` functions for now. Let's see how the defaults fare." + ], + "id": "d053a9768143c8ed" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-06-25T09:55:20.659249316Z", + "start_time": "2025-06-25T09:55:19.416887866Z" + } + }, + "cell_type": "code", + "source": [ + "import org.jetbrains.kotlinx.dataframe.io.db.DbType\n", + "import org.jetbrains.kotlinx.dataframe.io.getSchemaForAllSqlTables\n", + "import org.jetbrains.kotlinx.dataframe.io.readAllSqlTables\n", + "import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema\n", + "import java.sql.ResultSet\n", + "import kotlin.reflect.KType\n", + "\n", + "object DuckDb1 : DbType(\"duckdb\") {\n", + "\n", + " /** the name of the class of the DuckDB JDBC driver */\n", + " override val driverClassName = \"org.duckdb.DuckDBDriver\"\n", + "\n", + " /**\n", + " * How a column type from JDBC, [tableColumnMetadata], is read in Java/Kotlin.\n", + " * The returned type must exactly follow [ResultSet.getObject] of your specific database's JDBC driver.\n", + " * Returning `null` defer the implementation to the default one (which may not always be correct).\n", + " *\n", + " * TODO We'll do this later\n", + " */\n", + " override fun convertSqlTypeToKType(tableColumnMetadata: TableColumnMetadata): KType? = null\n", + "\n", + " /**\n", + " * How a column from JDBC should be represented as DataFrame (value) column\n", + " * See [convertSqlTypeToKType].\n", + " *\n", + " * TODO We'll do this later\n", + " */\n", + " override fun convertSqlTypeToColumnSchemaValue(tableColumnMetadata: TableColumnMetadata): ColumnSchema? = null\n", + "\n", + " /**\n", + " * How to filter out system tables from user-created ones when using\n", + " * [DataFrame.readAllSqlTables] and [DataFrame.getSchemaForAllSqlTables].\n", + " *\n", + " * The names of these can sometimes be found in the specific JDBC integration.\n", + " */\n", + " override fun isSystemTable(tableMetadata: TableMetadata): Boolean =\n", + " tableMetadata.schemaName?.lowercase()?.contains(\"information_schema\") == true ||\n", + " tableMetadata.schemaName?.lowercase()?.contains(\"system\") == true ||\n", + " tableMetadata.name.lowercase().contains(\"system_\")\n", + "\n", + " /**\n", + " * How to retrieve the correct table metadata when using\n", + " * [DataFrame.readAllSqlTables] and [DataFrame.getSchemaForAllSqlTables].\n", + " * The names of these can be found in the [DatabaseMetaData] implementation of the DuckDB JDBC integration.\n", + " */\n", + " override fun buildTableMetadata(tables: ResultSet): TableMetadata =\n", + " TableMetadata(\n", + " name = tables.getString(\"TABLE_NAME\"),\n", + " schemaName = tables.getString(\"TABLE_SCHEM\"),\n", + " catalogue = tables.getString(\"TABLE_CAT\"),\n", + " )\n", + "}" + ], + "id": "7193b09db3aeb312", + "outputs": [], + "execution_count": 5 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "Now, with the same connection, let's see if we can read the data from our database into DataFrame!\n", + "\n", + "Since our `DbType` object is a custom one, we need to explicitly set it as the `dbType` parameter." + ], + "id": "a1557747ed74c422" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-06-25T09:55:23.480417761Z", + "start_time": "2025-06-25T09:55:22.011827068Z" + } + }, + "cell_type": "code", + "source": [ + "val df1 = DataFrame.readSqlTable(connection, tableName = \"test_table\", dbType = DuckDb1)\n", + "df1" + ], + "id": "e88725836ddb7092", + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "The problem is found in one of the loaded libraries: check library converters (fields callbacks)\n", + "Error compiling code:\n", + "@DataSchema\n", + "interface _DataFrameType {\n", + " val age: Int\n", + " val hire_date: java.util.Date\n", + " val id: Int\n", + " val map: Any\n", + " val name: String\n", + " val pets: Array\n", + " val salary: Double\n", + "}\n", + "\n", + "val ColumnsScope<_DataFrameType>.age: DataColumn @JvmName(\"_DataFrameType_age\") get() = this[\"age\"] as DataColumn\n", + "val DataRow<_DataFrameType>.age: Int @JvmName(\"_DataFrameType_age\") get() = this[\"age\"] as Int\n", + "val ColumnsScope<_DataFrameType>.hire_date: DataColumn @JvmName(\"_DataFrameType_hire_date\") get() = this[\"hire_date\"] as DataColumn\n", + "val DataRow<_DataFrameType>.hire_date: java.util.Date @JvmName(\"_DataFrameType_hire_date\") get() = this[\"hire_date\"] as java.util.Date\n", + "val ColumnsScope<_DataFrameType>.id: DataColumn @JvmName(\"_DataFrameType_id\") get() = this[\"id\"] as DataColumn\n", + "val DataRow<_DataFrameType>.id: Int @JvmName(\"_DataFrameType_id\") get() = this[\"id\"] as Int\n", + "val ColumnsScope<_DataFrameType>.map: DataColumn @JvmName(\"_DataFrameType_map\") get() = this[\"map\"] as DataColumn\n", + "val DataRow<_DataFrameType>.map: Any @JvmName(\"_DataFrameType_map\") get() = this[\"map\"] as Any\n", + "val ColumnsScope<_DataFrameType>.name: DataColumn @JvmName(\"_DataFrameType_name\") get() = this[\"name\"] as DataColumn\n", + "val DataRow<_DataFrameType>.name: String @JvmName(\"_DataFrameType_name\") get() = this[\"name\"] as String\n", + "val ColumnsScope<_DataFrameType>.pets: DataColumn> @JvmName(\"_DataFrameType_pets\") get() = this[\"pets\"] as DataColumn>\n", + "val DataRow<_DataFrameType>.pets: Array @JvmName(\"_DataFrameType_pets\") get() = this[\"pets\"] as Array\n", + "val ColumnsScope<_DataFrameType>.salary: DataColumn @JvmName(\"_DataFrameType_salary\") get() = this[\"salary\"] as DataColumn\n", + "val DataRow<_DataFrameType>.salary: Double @JvmName(\"_DataFrameType_salary\") get() = this[\"salary\"] as Double\n", + "(df1 as org.jetbrains.kotlinx.dataframe.DataFrame<*>).cast<_DataFrameType>()\n", + "\n", + "Errors:\n", + "Line_14.jupyter.kts (8:21 - 22) Unresolved reference: T\n", + "Line_14.jupyter.kts (12:106 - 124) Unchecked cast: AnyCol /* = DataColumn<*> */ to DataColumn\n", + "Line_14.jupyter.kts (14:135 - 164) Unchecked cast: AnyCol /* = DataColumn<*> */ to DataColumn\n", + "Line_14.jupyter.kts (16:103 - 121) Unchecked cast: AnyCol /* = DataColumn<*> */ to DataColumn\n", + "Line_14.jupyter.kts (18:106 - 124) Unchecked cast: AnyCol /* = DataColumn<*> */ to DataColumn\n", + "Line_14.jupyter.kts (20:112 - 133) Unchecked cast: AnyCol /* = DataColumn<*> */ to DataColumn\n", + "Line_14.jupyter.kts (22:57 - 58) Unresolved reference: T\n", + "Line_14.jupyter.kts (22:114 - 137) Unchecked cast: AnyCol /* = DataColumn<*> */ to DataColumn>\n", + "Line_14.jupyter.kts (22:134 - 135) Unresolved reference: T\n", + "Line_14.jupyter.kts (23:41 - 42) Unresolved reference: T\n", + "Line_14.jupyter.kts (23:106 - 107) Unresolved reference: T\n", + "Line_14.jupyter.kts (24:118 - 139) Unchecked cast: AnyCol /* = DataColumn<*> */ to DataColumn\n", + "Line_14.jupyter.kts (26:6 - 53) No cast needed\n", + "\n", + "org.jetbrains.kotlinx.jupyter.exceptions.ReplLibraryException: The problem is found in one of the loaded libraries: check library converters (fields callbacks)\n", + "\tat org.jetbrains.kotlinx.jupyter.exceptions.CompositeReplExceptionKt.throwAsLibraryException(CompositeReplException.kt:54)\n", + "\tat org.jetbrains.kotlinx.jupyter.exceptions.CompositeReplExceptionKt.throwLibraryException(CompositeReplException.kt:61)\n", + "\tat org.jetbrains.kotlinx.jupyter.codegen.FieldsProcessorImpl.process(FieldsProcessorImpl.kt:68)\n", + "\tat org.jetbrains.kotlinx.jupyter.repl.impl.CellExecutorImpl.execute_L4Nmkdk$lambda$9$lambda$3(CellExecutorImpl.kt:99)\n", + "\tat org.jetbrains.kotlinx.jupyter.config.LoggingKt.catchAll(Logging.kt:33)\n", + "\tat org.jetbrains.kotlinx.jupyter.config.LoggingKt.catchAll$default(Logging.kt:27)\n", + "\tat org.jetbrains.kotlinx.jupyter.repl.impl.CellExecutorImpl.execute-L4Nmkdk(CellExecutorImpl.kt:98)\n", + "\tat org.jetbrains.kotlinx.jupyter.repl.execution.CellExecutor.execute-L4Nmkdk$default(CellExecutor.kt:14)\n", + "\tat org.jetbrains.kotlinx.jupyter.repl.impl.ReplForJupyterImpl.evaluateUserCode-wNURfNM(ReplForJupyterImpl.kt:616)\n", + "\tat org.jetbrains.kotlinx.jupyter.repl.impl.ReplForJupyterImpl.evalExImpl(ReplForJupyterImpl.kt:474)\n", + "\tat org.jetbrains.kotlinx.jupyter.repl.impl.ReplForJupyterImpl.evalEx$lambda$20(ReplForJupyterImpl.kt:467)\n", + "\tat org.jetbrains.kotlinx.jupyter.repl.impl.ReplForJupyterImpl.withEvalContext(ReplForJupyterImpl.kt:447)\n", + "\tat org.jetbrains.kotlinx.jupyter.repl.impl.ReplForJupyterImpl.evalEx(ReplForJupyterImpl.kt:466)\n", + "\tat org.jetbrains.kotlinx.jupyter.messaging.IdeCompatibleMessageRequestProcessor.processExecuteRequest$lambda$7$lambda$6$lambda$5(IdeCompatibleMessageRequestProcessor.kt:160)\n", + "\tat org.jetbrains.kotlinx.jupyter.streams.BlockingSubstitutionEngine.withDataSubstitution(SubstitutionEngine.kt:70)\n", + "\tat org.jetbrains.kotlinx.jupyter.streams.StreamSubstitutionManager.withSubstitutedStreams(StreamSubstitutionManager.kt:118)\n", + "\tat org.jetbrains.kotlinx.jupyter.messaging.IdeCompatibleMessageRequestProcessor.withForkedIn(IdeCompatibleMessageRequestProcessor.kt:354)\n", + "\tat org.jetbrains.kotlinx.jupyter.messaging.IdeCompatibleMessageRequestProcessor.evalWithIO$lambda$16$lambda$15(IdeCompatibleMessageRequestProcessor.kt:368)\n", + "\tat org.jetbrains.kotlinx.jupyter.streams.BlockingSubstitutionEngine.withDataSubstitution(SubstitutionEngine.kt:70)\n", + "\tat org.jetbrains.kotlinx.jupyter.streams.StreamSubstitutionManager.withSubstitutedStreams(StreamSubstitutionManager.kt:118)\n", + "\tat org.jetbrains.kotlinx.jupyter.messaging.IdeCompatibleMessageRequestProcessor.withForkedErr(IdeCompatibleMessageRequestProcessor.kt:343)\n", + "\tat org.jetbrains.kotlinx.jupyter.messaging.IdeCompatibleMessageRequestProcessor.evalWithIO$lambda$16(IdeCompatibleMessageRequestProcessor.kt:367)\n", + "\tat org.jetbrains.kotlinx.jupyter.streams.BlockingSubstitutionEngine.withDataSubstitution(SubstitutionEngine.kt:70)\n", + "\tat org.jetbrains.kotlinx.jupyter.streams.StreamSubstitutionManager.withSubstitutedStreams(StreamSubstitutionManager.kt:118)\n", + "\tat org.jetbrains.kotlinx.jupyter.messaging.IdeCompatibleMessageRequestProcessor.withForkedOut(IdeCompatibleMessageRequestProcessor.kt:335)\n", + "\tat org.jetbrains.kotlinx.jupyter.messaging.IdeCompatibleMessageRequestProcessor.evalWithIO(IdeCompatibleMessageRequestProcessor.kt:366)\n", + "\tat org.jetbrains.kotlinx.jupyter.messaging.IdeCompatibleMessageRequestProcessor.processExecuteRequest$lambda$7$lambda$6(IdeCompatibleMessageRequestProcessor.kt:159)\n", + "\tat org.jetbrains.kotlinx.jupyter.execution.JupyterExecutorImpl$Task.execute(JupyterExecutorImpl.kt:41)\n", + "\tat org.jetbrains.kotlinx.jupyter.execution.JupyterExecutorImpl.executorThread$lambda$0(JupyterExecutorImpl.kt:83)\n", + "\tat kotlin.concurrent.ThreadsKt$thread$thread$1.run(Thread.kt:30)\n", + "Caused by: org.jetbrains.kotlinx.jupyter.exceptions.ReplCompilerException: at Cell In[0], line 8, column 21: Unresolved reference: T\n", + "at Cell In[0], line 12, column 106: Unchecked cast: AnyCol /* = DataColumn<*> */ to DataColumn\n", + "at Cell In[0], line 14, column 135: Unchecked cast: AnyCol /* = DataColumn<*> */ to DataColumn\n", + "at Cell In[0], line 16, column 103: Unchecked cast: AnyCol /* = DataColumn<*> */ to DataColumn\n", + "at Cell In[0], line 18, column 106: Unchecked cast: AnyCol /* = DataColumn<*> */ to DataColumn\n", + "at Cell In[0], line 20, column 112: Unchecked cast: AnyCol /* = DataColumn<*> */ to DataColumn\n", + "at Cell In[0], line 22, column 57: Unresolved reference: T\n", + "at Cell In[0], line 22, column 114: Unchecked cast: AnyCol /* = DataColumn<*> */ to DataColumn>\n", + "at Cell In[0], line 22, column 134: Unresolved reference: T\n", + "at Cell In[0], line 23, column 41: Unresolved reference: T\n", + "at Cell In[0], line 23, column 106: Unresolved reference: T\n", + "at Cell In[0], line 24, column 118: Unchecked cast: AnyCol /* = DataColumn<*> */ to DataColumn\n", + "at Cell In[0], line 26, column 6: No cast needed\n", + "\tat org.jetbrains.kotlinx.jupyter.repl.impl.JupyterCompilerImpl.compileSync(JupyterCompilerImpl.kt:152)\n", + "\tat org.jetbrains.kotlinx.jupyter.repl.impl.InternalEvaluatorImpl.eval(InternalEvaluatorImpl.kt:127)\n", + "\tat org.jetbrains.kotlinx.jupyter.repl.impl.CellExecutorImpl.execute_L4Nmkdk$lambda$9$lambda$1(CellExecutorImpl.kt:80)\n", + "\tat org.jetbrains.kotlinx.jupyter.repl.impl.ReplForJupyterImpl.withHost(ReplForJupyterImpl.kt:794)\n", + "\tat org.jetbrains.kotlinx.jupyter.repl.impl.CellExecutorImpl.execute-L4Nmkdk(CellExecutorImpl.kt:78)\n", + "\tat org.jetbrains.kotlinx.jupyter.repl.execution.CellExecutor.execute-L4Nmkdk$default(CellExecutor.kt:14)\n", + "\tat org.jetbrains.kotlinx.jupyter.repl.impl.CellExecutorImpl$ExecutionContext.execute(CellExecutorImpl.kt:240)\n", + "\tat org.jetbrains.kotlinx.dataframe.jupyter.ExecuteKt.execute(execute.kt:12)\n", + "\tat org.jetbrains.kotlinx.dataframe.jupyter.ExecuteKt.execute(execute.kt:29)\n", + "\tat org.jetbrains.kotlinx.dataframe.jupyter.Integration.updateAnyFrameVariable(Integration.kt:110)\n", + "\tat org.jetbrains.kotlinx.dataframe.jupyter.Integration.access$updateAnyFrameVariable(Integration.kt:64)\n", + "\tat org.jetbrains.kotlinx.dataframe.jupyter.Integration$onLoaded$4.execution$lambda$0(Integration.kt:290)\n", + "\tat org.jetbrains.kotlinx.jupyter.api.libraries.FieldHandlerFactory.createUpdateExecution$lambda$0(FieldHandlerFactory.kt:49)\n", + "\tat org.jetbrains.kotlinx.jupyter.codegen.FieldsProcessorImplKt.executeEx(FieldsProcessorImpl.kt:95)\n", + "\tat org.jetbrains.kotlinx.jupyter.codegen.FieldsProcessorImplKt.access$executeEx(FieldsProcessorImpl.kt:1)\n", + "\tat org.jetbrains.kotlinx.jupyter.codegen.FieldsProcessorImpl.process(FieldsProcessorImpl.kt:47)\n", + "\t... 27 more\n", + "\n", + "\n" + ] + }, + { + "data": { + "text/html": [ + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnameagesalaryhire_datepetsmap
1John Doe3050000.0000002020-01-15[Pookie]{value2=200, value1=1}
2Jane Smith2855000.0000002021-03-20[ ]{}
3Bob Johnson3565000.0000002019-11-10[Rex, Garfield]{value2=4, value1=3}
4Alice Brown3260000.0000002020-07-01[Mia]{value2=5, value1=6}
5Charlie Smith2570000.0000002022-02-01[Max]{value2=8, value1=7}
6David Smith2780000.0000002022-03-01[Max, Charlie]{value2=9, value1=10}
7Eve Smith2490000.0000002022-04-01[Max, Charlie, David]{value1=11, value3=12}
\n", + " \n", + " \n", + " " + ], + "application/kotlindataframe+json": "{\"$version\":\"2.1.1\",\"metadata\":{\"columns\":[\"id\",\"name\",\"age\",\"salary\",\"hire_date\",\"pets\",\"map\"],\"types\":[{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Int\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.String\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Int\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Double\"},{\"kind\":\"ValueColumn\",\"type\":\"java.util.Date\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Array\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Any\"}],\"nrow\":7,\"ncol\":7},\"kotlin_dataframe\":[{\"id\":1,\"name\":\"John Doe\",\"age\":30,\"salary\":50000.0,\"hire_date\":\"2020-01-15\",\"pets\":\"[Ljava.lang.Object;@65acd002\",\"map\":\"{value2=200, value1=1}\"},{\"id\":2,\"name\":\"Jane Smith\",\"age\":28,\"salary\":55000.0,\"hire_date\":\"2021-03-20\",\"pets\":\"[Ljava.lang.Object;@3b04358\",\"map\":\"{}\"},{\"id\":3,\"name\":\"Bob Johnson\",\"age\":35,\"salary\":65000.0,\"hire_date\":\"2019-11-10\",\"pets\":\"[Ljava.lang.Object;@187b1d89\",\"map\":\"{value2=4, value1=3}\"},{\"id\":4,\"name\":\"Alice Brown\",\"age\":32,\"salary\":60000.0,\"hire_date\":\"2020-07-01\",\"pets\":\"[Ljava.lang.Object;@24f90932\",\"map\":\"{value2=5, value1=6}\"},{\"id\":5,\"name\":\"Charlie Smith\",\"age\":25,\"salary\":70000.0,\"hire_date\":\"2022-02-01\",\"pets\":\"[Ljava.lang.Object;@27d6f16f\",\"map\":\"{value2=8, value1=7}\"},{\"id\":6,\"name\":\"David Smith\",\"age\":27,\"salary\":80000.0,\"hire_date\":\"2022-03-01\",\"pets\":\"[Ljava.lang.Object;@26fdcc4e\",\"map\":\"{value2=9, value1=10}\"},{\"id\":7,\"name\":\"Eve Smith\",\"age\":24,\"salary\":90000.0,\"hire_date\":\"2022-04-01\",\"pets\":\"[Ljava.lang.Object;@66aa0a4d\",\"map\":\"{value1=11, value3=12}\"}]}" + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 6 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "Aside from some errors, that seems to work! Let's check which types are correct. There's usually a difference between the types returned by the JDBC driver and the types provided by DataFrame, causing runtime exceptions due to the mismatch.\n", + "\n", + "We can check this by explicitly inferring the types of a dataframe and seeing if they match the types of the original:" + ], + "id": "cacb433f254870f0" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-06-25T09:55:24.332448463Z", + "start_time": "2025-06-25T09:55:23.744272625Z" + } + }, + "cell_type": "code", + "source": [ + "import kotlin.reflect.full.isSubtypeOf\n", + "\n", + "df1.schema().print()\n", + "println()\n", + "df1.inferType().schema().print()\n", + "println()\n", + "\n", + "(df1.columnTypes() zip df1.inferType().columnTypes()).mapNotNull { (provided, inferred) ->\n", + " if (inferred.isSubtypeOf(provided)) null else \"$inferred is not a subtype of $provided\"\n", + "}" + ], + "id": "d49ceb5c4988bd98", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id: Int\n", + "name: String\n", + "age: Int\n", + "salary: Double\n", + "hire_date: java.util.Date\n", + "pets: Array\n", + "map: Any\n", + "\n", + "id: Int\n", + "name: String\n", + "age: Int\n", + "salary: Double\n", + "hire_date: java.time.LocalDate\n", + "pets: Array\n", + "map: java.util.HashMap<*, *>\n", + "\n" + ] + }, + { + "data": { + "text/plain": [ + "[java.time.LocalDate is not a subtype of java.util.Date]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 7 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "Yup, it looks like we have some more differences! Let's see if we can fix them in our custom `DbType` object.", + "id": "8c84e6be2e15a3f0" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### Attempt 2\n", + "\n", + "We will look at the implementation of `ResultSet.getObject` in DuckDB's JDBC driver, and use that to fix the types that mismatch the DataFrame JDBC implementation." + ], + "id": "1345e4a5600ed332" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-06-25T09:55:26.445321209Z", + "start_time": "2025-06-25T09:55:25.071383537Z" + } + }, + "cell_type": "code", + "source": [ + "import org.duckdb.DuckDBResultSetMetaData\n", + "import org.duckdb.DuckDBColumnType.*\n", + "import org.duckdb.JsonNode\n", + "import org.jetbrains.kotlinx.dataframe.io.db.DbType\n", + "import org.jetbrains.kotlinx.dataframe.io.getSchemaForAllSqlTables\n", + "import org.jetbrains.kotlinx.dataframe.io.readAllSqlTables\n", + "import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema\n", + "import java.math.BigInteger\n", + "import java.sql.ResultSet\n", + "import java.time.LocalTime\n", + "import java.util.UUID\n", + "import kotlin.reflect.*\n", + "import kotlin.reflect.full.withNullability\n", + "\n", + "object DuckDb2 : DbType(\"duckdb\") {\n", + "\n", + " /** the name of the class of the DuckDB JDBC driver */\n", + " override val driverClassName = \"org.duckdb.DuckDBDriver\"\n", + "\n", + " /**\n", + " * How a column type from JDBC, [tableColumnMetadata], is read in Java/Kotlin.\n", + " * The returned type must exactly follow [ResultSet.getObject] of your specific database's JDBC driver.\n", + " * Returning `null` defer the implementation to the default one (which may not always be correct).\n", + " *\n", + " * Following [org.duckdb.DuckDBVector.getObject].\n", + " */\n", + " override fun convertSqlTypeToKType(tableColumnMetadata: TableColumnMetadata): KType? =\n", + " when (DuckDBResultSetMetaData.TypeNameToType(tableColumnMetadata.sqlTypeName)) {\n", + " TINYINT -> typeOf()\n", + " SMALLINT -> typeOf()\n", + " HUGEINT -> typeOf()\n", + " UHUGEINT -> typeOf()\n", + " UTINYINT -> typeOf()\n", + " USMALLINT -> typeOf()\n", + " UINTEGER -> typeOf()\n", + " UBIGINT -> typeOf()\n", + " TIME -> typeOf()\n", + " DATE -> typeOf()\n", + " JSON -> typeOf()\n", + " BLOB -> typeOf()\n", + " UUID -> typeOf()\n", + " MAP -> typeOf>() // TODO we'll add more specific types here later\n", + " LIST, ARRAY -> typeOf()\n", + " STRUCT -> typeOf()\n", + " UNION -> typeOf()\n", + " VARCHAR -> typeOf()\n", + " UNKNOWN, BIT, INTERVAL, ENUM -> typeOf()\n", + " else -> null // use DataFrame JDBC defaults\n", + " }?.withNullability(tableColumnMetadata.isNullable)\n", + "\n", + "\n", + " /**\n", + " * How a column from JDBC should be represented as DataFrame (value) column\n", + " * See [convertSqlTypeToKType].\n", + " */\n", + " override fun convertSqlTypeToColumnSchemaValue(tableColumnMetadata: TableColumnMetadata): ColumnSchema? =\n", + " convertSqlTypeToKType(tableColumnMetadata)?.let {\n", + " // The current DataFrame JDBC implementation will always create value columns as no type-conversion is done\n", + " ColumnSchema.Value(it)\n", + " }\n", + "\n", + " /**\n", + " * How to filter out system tables from user-created ones when using\n", + " * [DataFrame.readAllSqlTables] and [DataFrame.getSchemaForAllSqlTables].\n", + " *\n", + " * The names of these can sometimes be found in the specific JDBC integration.\n", + " */\n", + " override fun isSystemTable(tableMetadata: TableMetadata): Boolean =\n", + " tableMetadata.schemaName?.lowercase()?.contains(\"information_schema\") == true ||\n", + " tableMetadata.schemaName?.lowercase()?.contains(\"system\") == true ||\n", + " tableMetadata.name.lowercase().contains(\"system_\")\n", + "\n", + " /**\n", + " * How to retrieve the correct table metadata when using\n", + " * [DataFrame.readAllSqlTables] and [DataFrame.getSchemaForAllSqlTables].\n", + " * The names of these can be found in the [DatabaseMetaData] implementation of the DuckDB JDBC integration.\n", + " */\n", + " override fun buildTableMetadata(tables: ResultSet): TableMetadata =\n", + " TableMetadata(\n", + " name = tables.getString(\"TABLE_NAME\"),\n", + " schemaName = tables.getString(\"TABLE_SCHEM\"),\n", + " catalogue = tables.getString(\"TABLE_CAT\"),\n", + " )\n", + "}" + ], + "id": "938054833bf22347", + "outputs": [], + "execution_count": 8 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-06-25T09:55:27.766110784Z", + "start_time": "2025-06-25T09:55:26.634106018Z" + } + }, + "cell_type": "code", + "source": [ + "val df2 = DataFrame.readSqlTable(connection, tableName = \"test_table\", dbType = DuckDb2)\n", + "df2" + ], + "id": "a16a6fd1966a4b10", + "outputs": [ + { + "data": { + "text/html": [ + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnameagesalaryhire_datepetsmap
1John Doe3050000.0000002020-01-15[Pookie]{value2=200, value1=1}
2Jane Smith2855000.0000002021-03-20[]{}
3Bob Johnson3565000.0000002019-11-10[Rex, Garfield]{value2=4, value1=3}
4Alice Brown3260000.0000002020-07-01[Mia]{value2=5, value1=6}
5Charlie Smith2570000.0000002022-02-01[Max]{value2=8, value1=7}
6David Smith2780000.0000002022-03-01[Max, Charlie]{value2=9, value1=10}
7Eve Smith2490000.0000002022-04-01[Max, Charlie, David]{value1=11, value3=12}
\n", + " \n", + " \n", + " " + ], + "application/kotlindataframe+json": "{\"$version\":\"2.1.1\",\"metadata\":{\"columns\":[\"id\",\"name\",\"age\",\"salary\",\"hire_date\",\"pets\",\"map\"],\"types\":[{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Int\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.String\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Int\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Double\"},{\"kind\":\"ValueColumn\",\"type\":\"java.time.LocalDate\"},{\"kind\":\"ValueColumn\",\"type\":\"java.sql.Array\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.collections.Map\"}],\"nrow\":7,\"ncol\":7},\"kotlin_dataframe\":[{\"id\":1,\"name\":\"John Doe\",\"age\":30,\"salary\":50000.0,\"hire_date\":\"2020-01-15\",\"pets\":\"[Pookie]\",\"map\":\"{value2=200, value1=1}\"},{\"id\":2,\"name\":\"Jane Smith\",\"age\":28,\"salary\":55000.0,\"hire_date\":\"2021-03-20\",\"pets\":\"[]\",\"map\":\"{}\"},{\"id\":3,\"name\":\"Bob Johnson\",\"age\":35,\"salary\":65000.0,\"hire_date\":\"2019-11-10\",\"pets\":\"[Rex, Garfield]\",\"map\":\"{value2=4, value1=3}\"},{\"id\":4,\"name\":\"Alice Brown\",\"age\":32,\"salary\":60000.0,\"hire_date\":\"2020-07-01\",\"pets\":\"[Mia]\",\"map\":\"{value2=5, value1=6}\"},{\"id\":5,\"name\":\"Charlie Smith\",\"age\":25,\"salary\":70000.0,\"hire_date\":\"2022-02-01\",\"pets\":\"[Max]\",\"map\":\"{value2=8, value1=7}\"},{\"id\":6,\"name\":\"David Smith\",\"age\":27,\"salary\":80000.0,\"hire_date\":\"2022-03-01\",\"pets\":\"[Max, Charlie]\",\"map\":\"{value2=9, value1=10}\"},{\"id\":7,\"name\":\"Eve Smith\",\"age\":24,\"salary\":90000.0,\"hire_date\":\"2022-04-01\",\"pets\":\"[Max, Charlie, David]\",\"map\":\"{value1=11, value3=12}\"}]}" + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 9 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-06-25T09:55:28.831920262Z", + "start_time": "2025-06-25T09:55:28.591603737Z" + } + }, + "cell_type": "code", + "source": [ + "df2.schema().print()\n", + "println()\n", + "df2.inferType().schema().print()\n", + "println()\n", + "\n", + "(df2.columnTypes() zip df2.inferType().columnTypes()).mapNotNull { (provided, inferred) ->\n", + " if (inferred.isSubtypeOf(provided)) null else \"$inferred is not a subtype of $provided\"\n", + "}" + ], + "id": "94a289da0bd5958a", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id: Int\n", + "name: String\n", + "age: Int\n", + "salary: Double\n", + "hire_date: java.time.LocalDate\n", + "pets: java.sql.Array\n", + "map: Map\n", + "\n", + "id: Int\n", + "name: String\n", + "age: Int\n", + "salary: Double\n", + "hire_date: java.time.LocalDate\n", + "pets: org.duckdb.DuckDBArray\n", + "map: java.util.HashMap\n", + "\n" + ] + }, + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 10 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "That seems to work better!", + "id": "af75181568d2eb58" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### Attempt 3\n", + "\n", + "There's just one last thing we can improve, and that's nested types.\n", + "This is completely optional, as DataFrame has `.inferType()`. That relies on runtime inference, however,\n", + "which costs performance. Providing types as specific as possible will let us avoid that.\n", + "\n", + "As we cannot do conversions upon reading from JDBC [yet](https://github.com/Kotlin/dataframe/issues/1266),\n", + "we cannot do anything about the `Struct`, or `JsonNode` types.\n", + "Similarly, we cannot do anything about the `Array` and `List` types [yet](https://github.com/Kotlin/dataframe/issues/1273).\n", + "\n", + "However, we can provide nested `Map` types!\n", + "\n", + "The DuckDB type can be found in `TableColumnMetadata.sqlTypeName`.\n", + "\n", + "DuckDB [MAP types](https://duckdb.org/docs/stable/sql/data_types/map) are represented as a `MAP(X, Y)` type, where `X` and `Y` are the types of the keys and values in the map. They can be nested arbitrarily deep, meaning you can get a: `MAP(MAP(MAP(INT, VARCHAR[][]), STRUCT(i INT, v VARCHAR)), MAP(INT, VARCHAR))` and we should still be able to parse it!\n", + "\n", + "This means we're going to need a special parsing function that splits the `MAP` type into its key and value types at the right comma." + ], + "id": "34b3f29d757eadbe" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-06-25T09:55:31.935279562Z", + "start_time": "2025-06-25T09:55:29.748407018Z" + } + }, + "cell_type": "code", + "source": [ + "import org.duckdb.DuckDBResultSetMetaData\n", + "import org.duckdb.DuckDBColumnType.*\n", + "import org.duckdb.JsonNode\n", + "import org.jetbrains.kotlinx.dataframe.io.db.DbType\n", + "import org.jetbrains.kotlinx.dataframe.io.getSchemaForAllSqlTables\n", + "import org.jetbrains.kotlinx.dataframe.io.readAllSqlTables\n", + "import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema\n", + "import java.math.BigDecimal\n", + "import java.math.BigInteger\n", + "import java.sql.ResultSet\n", + "import java.time.LocalTime\n", + "import java.time.OffsetDateTime\n", + "import java.time.OffsetTime\n", + "import java.util.UUID\n", + "import kotlin.reflect.*\n", + "import kotlin.reflect.full.createType\n", + "import kotlin.reflect.full.withNullability\n", + "\n", + "object DuckDb : DbType(\"duckdb\") {\n", + "\n", + " /** the name of the class of the DuckDB JDBC driver */\n", + " override val driverClassName = \"org.duckdb.DuckDBDriver\"\n", + "\n", + " /**\n", + " * How a column type from JDBC, [tableColumnMetadata], is read in Java/Kotlin.\n", + " * The returned type must exactly follow [ResultSet.getObject] of your specific database's JDBC driver.\n", + " * Returning `null` defer the implementation to the default one (which may not always be correct).\n", + " *\n", + " * Following [org.duckdb.DuckDBVector.getObject].\n", + " */\n", + " override fun convertSqlTypeToKType(tableColumnMetadata: TableColumnMetadata): KType =\n", + " tableColumnMetadata.sqlTypeName.toKType(tableColumnMetadata.isNullable)\n", + "\n", + "\n", + " /**\n", + " * How a column from JDBC should be represented as DataFrame (value) column\n", + " * See [convertSqlTypeToKType].\n", + " */\n", + " override fun convertSqlTypeToColumnSchemaValue(tableColumnMetadata: TableColumnMetadata): ColumnSchema =\n", + " convertSqlTypeToKType(tableColumnMetadata).let {\n", + " // The current DataFrame JDBC implementation will always create value columns as no type-conversion is done\n", + " ColumnSchema.Value(it)\n", + " }\n", + "\n", + " /**\n", + " * Follows exactly [org.duckdb.DuckDBVector.getObject].\n", + " *\n", + " * I added a \"// dataframe-jdbc\" comment for all types that are covered correctly by\n", + " * [org.jetbrains.kotlinx.dataframe.io.makeCommonSqlToKTypeMapping] at the moment, however, to cover\n", + " * all nested types, we'll use a full type-map for all [DuckDB types][DuckDBColumnType] exactly.\n", + " */\n", + " @Suppress(\"ktlint:standard:blank-line-between-when-conditions\")\n", + " private fun String.toKType(isNullable: Boolean): KType {\n", + " val sqlTypeName = this\n", + " return when (DuckDBResultSetMetaData.TypeNameToType(sqlTypeName)!!) {\n", + " BOOLEAN -> typeOf() // dataframe-jdbc\n", + " TINYINT -> typeOf()\n", + " SMALLINT -> typeOf()\n", + " INTEGER -> typeOf() // dataframe-jdbc\n", + " BIGINT -> typeOf() // dataframe-jdbc\n", + " HUGEINT -> typeOf()\n", + " UHUGEINT -> typeOf()\n", + " UTINYINT -> typeOf()\n", + " USMALLINT -> typeOf()\n", + " UINTEGER -> typeOf()\n", + " UBIGINT -> typeOf()\n", + " FLOAT -> typeOf() // dataframe-jdbc\n", + " DOUBLE -> typeOf() // dataframe-jdbc\n", + " DECIMAL -> typeOf() // dataframe-jdbc\n", + " TIME -> typeOf()\n", + " TIME_WITH_TIME_ZONE -> typeOf() // dataframe-jdbc\n", + " DATE -> typeOf()\n", + " TIMESTAMP, TIMESTAMP_MS, TIMESTAMP_NS, TIMESTAMP_S -> typeOf() // dataframe-jdbc\n", + " TIMESTAMP_WITH_TIME_ZONE -> typeOf() // dataframe-jdbc\n", + " JSON -> typeOf()\n", + " BLOB -> typeOf()\n", + " UUID -> typeOf()\n", + " MAP -> {\n", + " val (key, value) = parseMapTypes(sqlTypeName)\n", + " Map::class.createType(\n", + " listOf(\n", + " KTypeProjection.invariant(key.toKType(isNullable = false)), // keys cannot be nullable in DuckDB\n", + " KTypeProjection.covariant(value.toKType(isNullable = true)), // values can be nullable in DuckDB\n", + " ),\n", + " )\n", + " }\n", + " LIST, ARRAY -> typeOf() // TODO requires #1266 and #1273 for specific types\n", + " STRUCT -> typeOf() // TODO requires #1266 for specific types\n", + " UNION -> typeOf() // Cannot handle this in Kotlin\n", + " VARCHAR -> typeOf()\n", + " UNKNOWN, BIT, INTERVAL, ENUM -> typeOf()\n", + " // as a bonus, we have an exhaustive when now :)\n", + " }.withNullability(isNullable)\n", + " }\n", + "\n", + " /** Splits \"MAP(X, Y)\" into \"X\" and \"Y\", taking nested parentheses into account. */\n", + " fun parseMapTypes(typeString: String): Pair {\n", + " if (!typeString.startsWith(\"MAP(\") || !typeString.endsWith(\")\")) {\n", + " error(\"invalid MAP type: $typeString\")\n", + " }\n", + " val content = typeString.removeSurrounding(\"MAP(\", \")\")\n", + "\n", + " // Find the comma that separates key and value types\n", + " var parenCount = 0\n", + " var commaIndex = -1\n", + " for (i in content.indices) {\n", + " when (content[i]) {\n", + " '(' -> parenCount++\n", + " ')' -> parenCount--\n", + " ',' -> if (parenCount == 0) {\n", + " commaIndex = i\n", + " break\n", + " }\n", + " }\n", + " }\n", + " if (commaIndex == -1) error(\"invalid MAP type: $typeString\")\n", + " val keyType = content.take(commaIndex).trim()\n", + " val valueType = content.substring(commaIndex + 1).trim()\n", + " return Pair(keyType, valueType)\n", + " }\n", + "\n", + " /**\n", + " * How to filter out system tables from user-created ones when using\n", + " * [DataFrame.readAllSqlTables] and [DataFrame.getSchemaForAllSqlTables].\n", + " *\n", + " * The names of these can sometimes be found in the specific JDBC integration.\n", + " */\n", + " override fun isSystemTable(tableMetadata: TableMetadata): Boolean =\n", + " tableMetadata.schemaName?.lowercase()?.contains(\"information_schema\") == true ||\n", + " tableMetadata.schemaName?.lowercase()?.contains(\"system\") == true ||\n", + " tableMetadata.name.lowercase().contains(\"system_\")\n", + "\n", + " /**\n", + " * How to retrieve the correct table metadata when using\n", + " * [DataFrame.readAllSqlTables] and [DataFrame.getSchemaForAllSqlTables].\n", + " * The names of these can be found in the [DatabaseMetaData] implementation of the DuckDB JDBC integration.\n", + " */\n", + " override fun buildTableMetadata(tables: ResultSet): TableMetadata =\n", + " TableMetadata(\n", + " name = tables.getString(\"TABLE_NAME\"),\n", + " schemaName = tables.getString(\"TABLE_SCHEM\"),\n", + " catalogue = tables.getString(\"TABLE_CAT\"),\n", + " )\n", + "}" + ], + "id": "e2ef1bc6d7c380cc", + "outputs": [], + "execution_count": 11 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-06-25T09:55:33.106795034Z", + "start_time": "2025-06-25T09:55:32.075637203Z" + } + }, + "cell_type": "code", + "source": [ + "val df3 = DataFrame.readSqlTable(connection, tableName = \"test_table\", dbType = DuckDb)\n", + "df3" + ], + "id": "e4ab6c75ce58c85a", + "outputs": [ + { + "data": { + "text/html": [ + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnameagesalaryhire_datepetsmap
1John Doe3050000.0000002020-01-15[Pookie]{value2=200, value1=1}
2Jane Smith2855000.0000002021-03-20[]{}
3Bob Johnson3565000.0000002019-11-10[Rex, Garfield]{value2=4, value1=3}
4Alice Brown3260000.0000002020-07-01[Mia]{value2=5, value1=6}
5Charlie Smith2570000.0000002022-02-01[Max]{value2=8, value1=7}
6David Smith2780000.0000002022-03-01[Max, Charlie]{value2=9, value1=10}
7Eve Smith2490000.0000002022-04-01[Max, Charlie, David]{value1=11, value3=12}
\n", + " \n", + " \n", + " " + ], + "application/kotlindataframe+json": "{\"$version\":\"2.1.1\",\"metadata\":{\"columns\":[\"id\",\"name\",\"age\",\"salary\",\"hire_date\",\"pets\",\"map\"],\"types\":[{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Int\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.String\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Int\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Double\"},{\"kind\":\"ValueColumn\",\"type\":\"java.time.LocalDate\"},{\"kind\":\"ValueColumn\",\"type\":\"java.sql.Array\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.collections.Map\"}],\"nrow\":7,\"ncol\":7},\"kotlin_dataframe\":[{\"id\":1,\"name\":\"John Doe\",\"age\":30,\"salary\":50000.0,\"hire_date\":\"2020-01-15\",\"pets\":\"[Pookie]\",\"map\":\"{value2=200, value1=1}\"},{\"id\":2,\"name\":\"Jane Smith\",\"age\":28,\"salary\":55000.0,\"hire_date\":\"2021-03-20\",\"pets\":\"[]\",\"map\":\"{}\"},{\"id\":3,\"name\":\"Bob Johnson\",\"age\":35,\"salary\":65000.0,\"hire_date\":\"2019-11-10\",\"pets\":\"[Rex, Garfield]\",\"map\":\"{value2=4, value1=3}\"},{\"id\":4,\"name\":\"Alice Brown\",\"age\":32,\"salary\":60000.0,\"hire_date\":\"2020-07-01\",\"pets\":\"[Mia]\",\"map\":\"{value2=5, value1=6}\"},{\"id\":5,\"name\":\"Charlie Smith\",\"age\":25,\"salary\":70000.0,\"hire_date\":\"2022-02-01\",\"pets\":\"[Max]\",\"map\":\"{value2=8, value1=7}\"},{\"id\":6,\"name\":\"David Smith\",\"age\":27,\"salary\":80000.0,\"hire_date\":\"2022-03-01\",\"pets\":\"[Max, Charlie]\",\"map\":\"{value2=9, value1=10}\"},{\"id\":7,\"name\":\"Eve Smith\",\"age\":24,\"salary\":90000.0,\"hire_date\":\"2022-04-01\",\"pets\":\"[Max, Charlie, David]\",\"map\":\"{value1=11, value3=12}\"}]}" + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 12 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "Now we have a correctly typed dataframe with `map: Map`!", + "id": "5a514d6cb01becbb" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-06-25T09:55:33.860565222Z", + "start_time": "2025-06-25T09:55:33.570210759Z" + } + }, + "cell_type": "code", + "source": [ + "df3.schema().print()\n", + "println()\n", + "df3.inferType().schema().print()\n", + "println()\n", + "\n", + "(df3.columnTypes() zip df3.inferType().columnTypes()).mapNotNull { (provided, inferred) ->\n", + " if (inferred.isSubtypeOf(provided)) null else \"$inferred is not a subtype of $provided\"\n", + "}" + ], + "id": "ff66b0b431f46277", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id: Int\n", + "name: String\n", + "age: Int\n", + "salary: Double\n", + "hire_date: java.time.LocalDate\n", + "pets: java.sql.Array\n", + "map: Map\n", + "\n", + "id: Int\n", + "name: String\n", + "age: Int\n", + "salary: Double\n", + "hire_date: java.time.LocalDate\n", + "pets: org.duckdb.DuckDBArray\n", + "map: java.util.HashMap\n", + "\n" + ] + }, + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 13 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "With that, we just created a complete `DbType` object for DuckDB in DataFrame!\n", + "If we provide this in each call to `DataFrame.readSql*`, we can read our database into a DataFrame." + ], + "id": "6a122de8bb2c703e" + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "### Final conversions\n", + "\n", + "Some optional but recommended conversions are:\n", + "- Turning the `java.sql.Array`'s into `List`'s.\n", + "- Turning the `java.sql.Struct`'s into `Map`s or a `DataRow<*>`.\n", + "- Turning the `Map`'s into a `DataRow<*>`.\n", + "- Turning the `JsonNode` into a `String` and `parse()` it or use `DataRow.readJsonStr()`.\n", + "- Turning `java.time` instances to `kotlinx.datetime` instances\n", + "- Turning `java.sql.Timestamp` into `kotlinx.datetime.LocalDateTime`\n", + "\n", + "Note that most of these rely on runtime inference, which can be costly for large dataframes." + ], + "id": "6349f6d4428944db" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-06-25T09:55:37.142517750Z", + "start_time": "2025-06-25T09:55:34.756891857Z" + } + }, + "cell_type": "code", + "source": [ + "import kotlinx.datetime.toKotlinInstant\n", + "import kotlinx.datetime.toKotlinLocalDateTime\n", + "import org.duckdb.DuckDBStruct\n", + "\n", + "fun AnyFrame.convertDuckDbTypes() = this\n", + " .convert { colsOf() }.with(infer = Infer.Type) { (it.array as Array).toList() }\n", + " .convert { colsOf() }.with { (it as DuckDBStruct).map }\n", + " .convert { colsOf>() }.with {\n", + " // getting a DataRow from Map to create a ColumnGroup with the keys as column names.\n", + " // TODO simplify with #1098 when it's done\n", + " it.mapValues { listOf(it.value) }.toDataFrame().singleOrNull()\n", + " }\n", + " .convert { colsOf() }.with { DataRow.readJsonStr(it.toString()) }\n", + " .convert { colsOf() }.toLocalTime()\n", + " .convert { colsOf() }.toLocalDate()\n", + " .convert { colsOf() }.with { it.toInstant().toKotlinInstant() }\n", + " .convert { colsOf() }.with { it.toLocalDateTime().toKotlinLocalDateTime() }\n", + "\n", + "val df = df3.convertDuckDbTypes()\n", + "\n", + "df.schema().print()\n", + "df" + ], + "id": "f04b0f3742acb554", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id: Int\n", + "name: String\n", + "age: Int\n", + "salary: Double\n", + "hire_date: kotlinx.datetime.LocalDate\n", + "pets: List\n", + "map:\n", + " value2: Int?\n", + " value1: Int?\n", + " value3: Int?\n", + "\n" + ] + }, + { + "data": { + "text/html": [ + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnameagesalaryhire_datepetsmap
value2value1value3
1John Doe3050000.0000002020-01-15[Pookie]2001null
2Jane Smith2855000.0000002021-03-20[ ]nullnullnull
3Bob Johnson3565000.0000002019-11-10[Rex, Garfield]43null
4Alice Brown3260000.0000002020-07-01[Mia]56null
5Charlie Smith2570000.0000002022-02-01[Max]87null
6David Smith2780000.0000002022-03-01[Max, Charlie]910null
7Eve Smith2490000.0000002022-04-01[Max, Charlie, David]null1112
\n", + " \n", + " \n", + " " + ], + "application/kotlindataframe+json": "{\"$version\":\"2.1.1\",\"metadata\":{\"columns\":[\"id\",\"name\",\"age\",\"salary\",\"hire_date\",\"pets\",\"map\"],\"types\":[{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Int\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.String\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Int\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Double\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlinx.datetime.LocalDate\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.collections.List\"},{\"kind\":\"ColumnGroup\"}],\"nrow\":7,\"ncol\":7},\"kotlin_dataframe\":[{\"id\":1,\"name\":\"John Doe\",\"age\":30,\"salary\":50000.0,\"hire_date\":\"2020-01-15\",\"pets\":[\"Pookie\"],\"map\":{\"data\":{\"value2\":200,\"value1\":1,\"value3\":null},\"metadata\":{\"kind\":\"ColumnGroup\",\"columns\":[\"value2\",\"value1\",\"value3\"],\"types\":[{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Int?\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Int?\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Int?\"}]}}},{\"id\":2,\"name\":\"Jane Smith\",\"age\":28,\"salary\":55000.0,\"hire_date\":\"2021-03-20\",\"pets\":[],\"map\":{\"data\":{\"value2\":null,\"value1\":null,\"value3\":null},\"metadata\":{\"kind\":\"ColumnGroup\",\"columns\":[\"value2\",\"value1\",\"value3\"],\"types\":[{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Int?\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Int?\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Int?\"}]}}},{\"id\":3,\"name\":\"Bob Johnson\",\"age\":35,\"salary\":65000.0,\"hire_date\":\"2019-11-10\",\"pets\":[\"Rex\",\"Garfield\"],\"map\":{\"data\":{\"value2\":4,\"value1\":3,\"value3\":null},\"metadata\":{\"kind\":\"ColumnGroup\",\"columns\":[\"value2\",\"value1\",\"value3\"],\"types\":[{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Int?\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Int?\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Int?\"}]}}},{\"id\":4,\"name\":\"Alice Brown\",\"age\":32,\"salary\":60000.0,\"hire_date\":\"2020-07-01\",\"pets\":[\"Mia\"],\"map\":{\"data\":{\"value2\":5,\"value1\":6,\"value3\":null},\"metadata\":{\"kind\":\"ColumnGroup\",\"columns\":[\"value2\",\"value1\",\"value3\"],\"types\":[{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Int?\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Int?\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Int?\"}]}}},{\"id\":5,\"name\":\"Charlie Smith\",\"age\":25,\"salary\":70000.0,\"hire_date\":\"2022-02-01\",\"pets\":[\"Max\"],\"map\":{\"data\":{\"value2\":8,\"value1\":7,\"value3\":null},\"metadata\":{\"kind\":\"ColumnGroup\",\"columns\":[\"value2\",\"value1\",\"value3\"],\"types\":[{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Int?\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Int?\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Int?\"}]}}},{\"id\":6,\"name\":\"David Smith\",\"age\":27,\"salary\":80000.0,\"hire_date\":\"2022-03-01\",\"pets\":[\"Max\",\"Charlie\"],\"map\":{\"data\":{\"value2\":9,\"value1\":10,\"value3\":null},\"metadata\":{\"kind\":\"ColumnGroup\",\"columns\":[\"value2\",\"value1\",\"value3\"],\"types\":[{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Int?\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Int?\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Int?\"}]}}},{\"id\":7,\"name\":\"Eve Smith\",\"age\":24,\"salary\":90000.0,\"hire_date\":\"2022-04-01\",\"pets\":[\"Max\",\"Charlie\",\"David\"],\"map\":{\"data\":{\"value2\":null,\"value1\":11,\"value3\":12},\"metadata\":{\"kind\":\"ColumnGroup\",\"columns\":[\"value2\",\"value1\",\"value3\"],\"types\":[{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Int?\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Int?\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Int?\"}]}}}]}" + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 14 + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "### Some statistics", + "id": "cbccf6911c8bfa52" + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-06-25T09:58:18.520521341Z", + "start_time": "2025-06-25T09:58:18.317239290Z" + } + }, + "cell_type": "code", + "source": [ + "val mostCommonPetName = df.explode { pets }.valueCounts { pets }.first()\n", + "mostCommonPetName" + ], + "id": "954d612f66871c20", + "outputs": [ + { + "data": { + "text/html": [ + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
petscount
Max3
\n", + " \n", + " \n", + " " + ], + "application/kotlindataframe+json": "{\"$version\":\"2.1.1\",\"metadata\":{\"columns\":[\"pets\",\"count\"],\"types\":[{\"kind\":\"ValueColumn\",\"type\":\"kotlin.String\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Int\"}],\"nrow\":1,\"ncol\":2},\"kotlin_dataframe\":[{\"pets\":\"Max\",\"count\":3}]}" + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 18 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-06-25T09:58:33.651999631Z", + "start_time": "2025-06-25T09:58:33.505382022Z" + } + }, + "cell_type": "code", + "source": [ + "val averageSalary = df.salary.mean()\n", + "averageSalary" + ], + "id": "d20398d75404fd1a", + "outputs": [ + { + "data": { + "text/plain": [ + "67142.85714285714" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 19 + }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2025-06-25T10:12:54.130120813Z", + "start_time": "2025-06-25T10:12:53.544460918Z" + } + }, + "cell_type": "code", + "source": [ + "val withScaledMap = df.replace { map }.with {\n", + " map.convert { value1 and value2 and value3 }.perRowCol { row, col ->\n", + " if (col[row] == null) null else col[row]!!.toDouble() / col.sum()\n", + " }.asColumnGroup(map.name())\n", + "}\n", + "\n", + "withScaledMap" + ], + "id": "b4976c4726c4e170", + "outputs": [ + { + "data": { + "text/html": [ + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnameagesalaryhire_datepetsmap
value2value1value3
1John Doe3050000.0000002020-01-15[Pookie]0.8849560.026316null
2Jane Smith2855000.0000002021-03-20[ ]nullnullnull
3Bob Johnson3565000.0000002019-11-10[Rex, Garfield]0.0176990.078947null
4Alice Brown3260000.0000002020-07-01[Mia]0.0221240.157895null
5Charlie Smith2570000.0000002022-02-01[Max]0.0353980.184211null
6David Smith2780000.0000002022-03-01[Max, Charlie]0.0398230.263158null
7Eve Smith2490000.0000002022-04-01[Max, Charlie, David]null0.2894741.000000
\n", + " \n", + " \n", + " " + ], + "application/kotlindataframe+json": "{\"$version\":\"2.1.1\",\"metadata\":{\"columns\":[\"id\",\"name\",\"age\",\"salary\",\"hire_date\",\"pets\",\"map\"],\"types\":[{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Int\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.String\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Int\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Double\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlinx.datetime.LocalDate\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.collections.List\"},{\"kind\":\"ColumnGroup\"}],\"nrow\":7,\"ncol\":7},\"kotlin_dataframe\":[{\"id\":1,\"name\":\"John Doe\",\"age\":30,\"salary\":50000.0,\"hire_date\":\"2020-01-15\",\"pets\":[\"Pookie\"],\"map\":{\"data\":{\"value2\":0.8849557522123894,\"value1\":0.02631578947368421,\"value3\":null},\"metadata\":{\"kind\":\"ColumnGroup\",\"columns\":[\"value2\",\"value1\",\"value3\"],\"types\":[{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Double?\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Double?\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Double?\"}]}}},{\"id\":2,\"name\":\"Jane Smith\",\"age\":28,\"salary\":55000.0,\"hire_date\":\"2021-03-20\",\"pets\":[],\"map\":{\"data\":{\"value2\":null,\"value1\":null,\"value3\":null},\"metadata\":{\"kind\":\"ColumnGroup\",\"columns\":[\"value2\",\"value1\",\"value3\"],\"types\":[{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Double?\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Double?\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Double?\"}]}}},{\"id\":3,\"name\":\"Bob Johnson\",\"age\":35,\"salary\":65000.0,\"hire_date\":\"2019-11-10\",\"pets\":[\"Rex\",\"Garfield\"],\"map\":{\"data\":{\"value2\":0.017699115044247787,\"value1\":0.07894736842105263,\"value3\":null},\"metadata\":{\"kind\":\"ColumnGroup\",\"columns\":[\"value2\",\"value1\",\"value3\"],\"types\":[{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Double?\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Double?\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Double?\"}]}}},{\"id\":4,\"name\":\"Alice Brown\",\"age\":32,\"salary\":60000.0,\"hire_date\":\"2020-07-01\",\"pets\":[\"Mia\"],\"map\":{\"data\":{\"value2\":0.022123893805309734,\"value1\":0.15789473684210525,\"value3\":null},\"metadata\":{\"kind\":\"ColumnGroup\",\"columns\":[\"value2\",\"value1\",\"value3\"],\"types\":[{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Double?\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Double?\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Double?\"}]}}},{\"id\":5,\"name\":\"Charlie Smith\",\"age\":25,\"salary\":70000.0,\"hire_date\":\"2022-02-01\",\"pets\":[\"Max\"],\"map\":{\"data\":{\"value2\":0.035398230088495575,\"value1\":0.18421052631578946,\"value3\":null},\"metadata\":{\"kind\":\"ColumnGroup\",\"columns\":[\"value2\",\"value1\",\"value3\"],\"types\":[{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Double?\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Double?\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Double?\"}]}}},{\"id\":6,\"name\":\"David Smith\",\"age\":27,\"salary\":80000.0,\"hire_date\":\"2022-03-01\",\"pets\":[\"Max\",\"Charlie\"],\"map\":{\"data\":{\"value2\":0.03982300884955752,\"value1\":0.2631578947368421,\"value3\":null},\"metadata\":{\"kind\":\"ColumnGroup\",\"columns\":[\"value2\",\"value1\",\"value3\"],\"types\":[{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Double?\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Double?\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Double?\"}]}}},{\"id\":7,\"name\":\"Eve Smith\",\"age\":24,\"salary\":90000.0,\"hire_date\":\"2022-04-01\",\"pets\":[\"Max\",\"Charlie\",\"David\"],\"map\":{\"data\":{\"value2\":null,\"value1\":0.2894736842105263,\"value3\":1.0},\"metadata\":{\"kind\":\"ColumnGroup\",\"columns\":[\"value2\",\"value1\",\"value3\"],\"types\":[{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Double?\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Double?\"},{\"kind\":\"ValueColumn\",\"type\":\"kotlin.Double?\"}]}}}]}" + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 42 + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": "", + "id": "8e1a7306ada4161e" + } + ], + "metadata": { + "kernelspec": { + "display_name": "Kotlin", + "language": "kotlin", + "name": "kotlin" + }, + "language_info": { + "codemirror_mode": "text/x-kotlin", + "file_extension": ".kt", + "mimetype": "text/x-kotlin", + "name": "kotlin", + "nbconvert_exporter": "", + "pygments_lexer": "kotlin", + "version": "1.8.20-Beta" + }, + "ktnbPluginMetadata": { + "projectLibraries": [] + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 1e1c9361a7..1377def238 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -54,7 +54,7 @@ shadow = "8.3.5" android-gradle-api = "7.3.1" # need to revise our tests to update ktor = "3.0.1" # needs jupyter compatibility with Kotlin 2.1 to update kotlin-compile-testing = "0.7.1" -duckdb = "1.2.2.0" +duckdb = "1.3.1.0" buildconfig = "5.6.7" benchmark = "0.4.12"