Kotlin
diff --git a/‎dataframe-jdbc/build.gradle.kts
Lines changed: 3 additions & 0 deletions b/‎dataframe-jdbc/build.gradle.kts
Lines changed: 3 additions & 0 deletions
diff --git a/‎dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/db/DuckDb.kt
Lines changed: 219 additions & 0 deletions b/‎dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/db/DuckDb.kt
Lines changed: 219 additions & 0 deletions
diff --git a/‎dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/db/util.kt
Lines changed: 3 additions & 1 deletion b/‎dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/db/util.kt
Lines changed: 3 additions & 1 deletion
@@ -16,6 +16,7 @@ repositories {
 
 dependencies {
     api(projects.core)
+    compileOnly(libs.duckdb.jdbc)// todo?
     implementation(libs.kotlinLogging)
     testImplementation(libs.mariadb)
     testImplementation(libs.sqlite)
@@ -26,6 +27,8 @@ dependencies {
     testImplementation(libs.junit)
     testImplementation(libs.sl4jsimple)
     testImplementation(libs.jts)
+    testImplementation(libs.duckdb.jdbc)
+    testImplementation(projects.dataframeJson)
     testImplementation(libs.kotestAssertions) {
         exclude("org.jetbrains.kotlin", "kotlin-stdlib-jdk8")
     }
 
@@ -0,0 +1,219 @@
+package org.jetbrains.kotlinx.dataframe.io.db
+
+import org.duckdb.DuckDBColumnType
+import org.duckdb.DuckDBColumnType.ARRAY
+import org.duckdb.DuckDBColumnType.BIGINT
+import org.duckdb.DuckDBColumnType.BIT
+import org.duckdb.DuckDBColumnType.BLOB
+import org.duckdb.DuckDBColumnType.BOOLEAN
+import org.duckdb.DuckDBColumnType.DATE
+import org.duckdb.DuckDBColumnType.DECIMAL
+import org.duckdb.DuckDBColumnType.DOUBLE
+import org.duckdb.DuckDBColumnType.ENUM
+import org.duckdb.DuckDBColumnType.FLOAT
+import org.duckdb.DuckDBColumnType.HUGEINT
+import org.duckdb.DuckDBColumnType.INTEGER
+import org.duckdb.DuckDBColumnType.INTERVAL
+import org.duckdb.DuckDBColumnType.JSON
+import org.duckdb.DuckDBColumnType.LIST
+import org.duckdb.DuckDBColumnType.MAP
+import org.duckdb.DuckDBColumnType.SMALLINT
+import org.duckdb.DuckDBColumnType.STRUCT
+import org.duckdb.DuckDBColumnType.TIME
+import org.duckdb.DuckDBColumnType.TIMESTAMP
+import org.duckdb.DuckDBColumnType.TIMESTAMP_MS
+import org.duckdb.DuckDBColumnType.TIMESTAMP_NS
+import org.duckdb.DuckDBColumnType.TIMESTAMP_S
+import org.duckdb.DuckDBColumnType.TIMESTAMP_WITH_TIME_ZONE
+import org.duckdb.DuckDBColumnType.TIME_WITH_TIME_ZONE
+import org.duckdb.DuckDBColumnType.TINYINT
+import org.duckdb.DuckDBColumnType.UBIGINT
+import org.duckdb.DuckDBColumnType.UHUGEINT
+import org.duckdb.DuckDBColumnType.UINTEGER
+import org.duckdb.DuckDBColumnType.UNION
+import org.duckdb.DuckDBColumnType.UNKNOWN
+import org.duckdb.DuckDBColumnType.USMALLINT
+import org.duckdb.DuckDBColumnType.UTINYINT
+import org.duckdb.DuckDBColumnType.UUID
+import org.duckdb.DuckDBColumnType.VARCHAR
+import org.duckdb.DuckDBResultSetMetaData
+import org.duckdb.JsonNode
+import org.jetbrains.kotlinx.dataframe.DataFrame
+import org.jetbrains.kotlinx.dataframe.io.TableColumnMetadata
+import org.jetbrains.kotlinx.dataframe.io.TableMetadata
+import org.jetbrains.kotlinx.dataframe.io.db.DuckDb.convertSqlTypeToKType
+import org.jetbrains.kotlinx.dataframe.io.getSchemaForAllSqlTables
+import org.jetbrains.kotlinx.dataframe.io.readAllSqlTables
+import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
+import java.math.BigDecimal
+import java.math.BigInteger
+import java.sql.Array
+import java.sql.Blob
+import java.sql.DatabaseMetaData
+import java.sql.ResultSet
+import java.sql.Struct
+import java.sql.Timestamp
+import java.time.LocalDate
+import java.time.LocalTime
+import java.time.OffsetDateTime
+import java.time.OffsetTime
+import java.util.UUID
+import kotlin.reflect.KType
+import kotlin.reflect.KTypeProjection
+import kotlin.reflect.full.createType
+import kotlin.reflect.full.withNullability
+import kotlin.reflect.typeOf
+
+/**
+ * Represents the [DuckDB](http://duckdb.org/) database type.
+ *
+ * This class provides methods to convert data from a [ResultSet] to the appropriate type for DuckDB,
+ * and to generate the corresponding [column schema][ColumnSchema].
+ */
+public object DuckDb : DbType("duckdb") {
+
+    /** the name of the class of the DuckDB JDBC driver */
+    override val driverClassName: String = "org.duckdb.DuckDBDriver"
+
+    /**
+     * How a column type from JDBC, [tableColumnMetadata], is read in Java/Kotlin.
+     * The returned type must exactly follow [ResultSet.getObject] of your specific database's JDBC driver.
+     * Returning `null` defer the implementation to the default one (which may not always be correct).
+     *
+     * Following [org.duckdb.DuckDBVector.getObject].
+     */
+    override fun convertSqlTypeToKType(tableColumnMetadata: TableColumnMetadata): KType =
+        tableColumnMetadata.sqlTypeName.toKType(tableColumnMetadata.isNullable)
+
+    /**
+     * How a column from JDBC should be represented as DataFrame (value) column
+     * See [convertSqlTypeToKType].
+     */
+    override fun convertSqlTypeToColumnSchemaValue(tableColumnMetadata: TableColumnMetadata): ColumnSchema {
+        val type = convertSqlTypeToKType(tableColumnMetadata)
+        return ColumnSchema.Value(type)
+    }
+
+    /**
+     * Follows exactly [org.duckdb.DuckDBVector.getObject].
+     *
+     * "// dataframe-jdbc" is added for all types that are covered correctly by
+     * [org.jetbrains.kotlinx.dataframe.io.makeCommonSqlToKTypeMapping] at the moment, however, to cover
+     * all nested types, we'll use a full type-map for all [DuckDB types][DuckDBColumnType] exactly.
+     */
+    @Suppress("ktlint:standard:blank-line-between-when-conditions")
+    internal fun String.toKType(isNullable: Boolean): KType {
+        val sqlTypeName = this
+        return when (DuckDBResultSetMetaData.TypeNameToType(sqlTypeName)) {
+            BOOLEAN -> typeOf<Boolean>() // dataframe-jdbc
+            TINYINT -> typeOf<Byte>()
+            SMALLINT -> typeOf<Short>()
+            INTEGER -> typeOf<Int>() // dataframe-jdbc
+            BIGINT -> typeOf<Long>() // dataframe-jdbc
+            HUGEINT -> typeOf<BigInteger>()
+            UHUGEINT -> typeOf<BigInteger>()
+            UTINYINT -> typeOf<Short>()
+            USMALLINT -> typeOf<Int>()
+            UINTEGER -> typeOf<Long>()
+            UBIGINT -> typeOf<BigInteger>()
+            FLOAT -> typeOf<Float>() // dataframe-jdbc
+            DOUBLE -> typeOf<Double>() // dataframe-jdbc
+            DECIMAL -> typeOf<BigDecimal>() // dataframe-jdbc
+            TIME -> typeOf<LocalTime>()
+            TIME_WITH_TIME_ZONE -> typeOf<OffsetTime>() // dataframe-jdbc
+            DATE -> typeOf<LocalDate>()
+            TIMESTAMP, TIMESTAMP_MS, TIMESTAMP_NS, TIMESTAMP_S -> typeOf<Timestamp>() // dataframe-jdbc
+            TIMESTAMP_WITH_TIME_ZONE -> typeOf<OffsetDateTime>() // dataframe-jdbc
+            JSON -> typeOf<JsonNode>()
+            BLOB -> typeOf<Blob>()
+            UUID -> typeOf<UUID>()
+            MAP -> {
+                val (key, value) = parseMapTypes(sqlTypeName)
+                Map::class.createType(
+                    listOf(
+                        KTypeProjection.invariant(key.toKType(false)),
+                        KTypeProjection.covariant(value.toKType(true)),
+                    ),
+                )
+            }
+
+            LIST, ARRAY -> {
+                // TODO requires #1266 and #1273 for specific types
+                //   val listType = parseListType(sqlTypeName)
+                //   Array::class.createType(
+                //       listOf(KTypeProjection.covariant(listType.toKType(true))),
+                //   )
+                typeOf<Array>()
+            }
+
+            STRUCT -> typeOf<Struct>() // TODO requires #1266 for specific types
+            UNION -> typeOf<Any>() // Cannot handle this in Kotlin
+            VARCHAR -> typeOf<String>()
+            UNKNOWN, BIT, INTERVAL, ENUM -> typeOf<String>()
+        }.withNullability(isNullable)
+    }
+
+    /** Parses "MAP(X, Y)" into "X" and "Y", taking parentheses into account */
+    internal fun parseMapTypes(typeString: String): Pair<String, String> {
+        if (!typeString.startsWith("MAP(") || !typeString.endsWith(")")) {
+            error("invalid MAP type: $typeString")
+        }
+
+        val content = typeString.removeSurrounding("MAP(", ")")
+
+        // Find the comma that separates key and value types
+        var parenCount = 0
+        var commaIndex = -1
+        for (i in content.indices) {
+            when (content[i]) {
+                '(' -> parenCount++
+
+                ')' -> parenCount--
+
+                ',' -> if (parenCount == 0) {
+                    commaIndex = i
+                    break
+                }
+            }
+        }
+
+        if (commaIndex == -1) error("invalid MAP type: $typeString")
+        val keyType = content.take(commaIndex).trim()
+        val valueType = content.substring(commaIndex + 1).trim()
+        return Pair(keyType, valueType)
+    }
+
+    /** Parses "X[]" and "X[123]" into "X", and "X[][]" into "X[]" */
+    internal fun parseListType(typeString: String): String {
+        if (!typeString.endsWith("]")) {
+            error("invalid LIST/ARRAY type: $typeString")
+        }
+
+        return typeString.take(typeString.indexOfLast { it == '[' })
+    }
+
+    /**
+     * How to filter out system tables from user-created ones when using
+     * [DataFrame.readAllSqlTables][DataFrame.Companion.readAllSqlTables] and
+     * [DataFrame.getSchemaForAllSqlTables][DataFrame.Companion.getSchemaForAllSqlTables].
+     *
+     * The names of these can sometimes be found in the specific JDBC integration.
+     */
+    override fun isSystemTable(tableMetadata: TableMetadata): Boolean =
+        tableMetadata.schemaName?.lowercase()?.contains("information_schema") == true ||
+            tableMetadata.schemaName?.lowercase()?.contains("system") == true ||
+            tableMetadata.name.lowercase().contains("system_")
+
+    /**
+     * How to retrieve the correct table metadata when using
+     * [DataFrame.readAllSqlTables][DataFrame.Companion.readAllSqlTables] and
+     * [DataFrame.getSchemaForAllSqlTables][DataFrame.Companion.getSchemaForAllSqlTables].
+     * The names of these can be found in the [DatabaseMetaData] implementation of the DuckDB JDBC integration.
+     */
+    override fun buildTableMetadata(tables: ResultSet): TableMetadata =
+        TableMetadata(
+            tables.getString("TABLE_NAME"),
+            tables.getString("TABLE_SCHEM"),
+            tables.getString("TABLE_CAT"),
+        )
+}
@@ -84,9 +84,11 @@ public fun extractDBTypeFromUrl(url: String?): DbType {
 
             MsSql.dbTypeInJdbcUrl in url -> MsSql
 
+            DuckDb.dbTypeInJdbcUrl in url -> DuckDb
+
             else -> throw IllegalArgumentException(
                 "Unsupported database type in the url: $url. " +
-                    "Only H2, MariaDB, MySQL, MSSQL, SQLite and PostgreSQL are supported!",
+                    "Only H2, MariaDB, MySQL, MSSQL, SQLite, PostgreSQL, and DuckDB are supported!",
             )
         }
     } else {
Original file line number	Diff line number	Diff line change
`@@ -84,9 +84,11 @@ public fun extractDBTypeFromUrl(url: String?): DbType {`
`84`	`84`
`85`	`85`	`MsSql.dbTypeInJdbcUrl in url -> MsSql`
`86`	`86`
	`87`	`+ DuckDb.dbTypeInJdbcUrl in url -> DuckDb`
	`88`	`+`
`87`	`89`	`else -> throw IllegalArgumentException(`
`88`	`90`	`"Unsupported database type in the url: $url. " +`
`89`		`- "Only H2, MariaDB, MySQL, MSSQL, SQLite and PostgreSQL are supported!",`
	`91`	`+ "Only H2, MariaDB, MySQL, MSSQL, SQLite, PostgreSQL, and DuckDB are supported!",`
`90`	`92`	`)`
`91`	`93`	`}`
`92`	`94`	`} else {`