Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
142c0eb
Refactored DataFrame JDBC API for enhanced DataSource handling
zaleslaw Oct 9, 2025
de3a97a
Refactored schema extraction to use `readSqlTable` and `readSqlQuery`…
zaleslaw Oct 9, 2025
477da67
Refactored and modularized schema extraction utilities into a dedicat…
zaleslaw Oct 9, 2025
448eab6
Refactor: Replace `DataFrame` with `DataFrameSchema` for schema-relat…
zaleslaw Oct 10, 2025
e02a6be
Update logging levels in validation utilities to debug and minor sche…
zaleslaw Oct 10, 2025
7f66bf4
Refactor: support custom `PreparedStatement` configuration, unify que…
zaleslaw Oct 10, 2025
b53e9f1
Refactor: enhance `DbType` with batch size and query timeout properti…
zaleslaw Oct 13, 2025
08d927e
Refactor: centralize `makeCommonSqlToKTypeMapping` in `DbType`, strea…
zaleslaw Oct 13, 2025
9f75a84
Refactored query execution logic by introducing `readDataFrameFromDat…
zaleslaw Oct 14, 2025
565e969
Refactored ResultSet-processing utilities to use mutable lists for im…
zaleslaw Oct 14, 2025
cc8c861
Add `configureStatement` missed parameters
zaleslaw Oct 14, 2025
ecbbe53
Refactored JDBC utilities: added comprehensive error handling in `rea…
zaleslaw Oct 14, 2025
3310382
Update the exception type in the ` read from non-existing table` test…
zaleslaw Oct 14, 2025
23f7c1b
Renamed schema extraction functions from `getSchemaFor*` to `from*` f…
zaleslaw Oct 14, 2025
3377e98
Rename `fromSqlTable` and `fromSqlQuery` to `readSqlTable` and `readS…
zaleslaw Oct 14, 2025
e4e84c2
Update `GenerateDataSchemaTask` to use `DataFrameSchema` methods for …
zaleslaw Oct 14, 2025
3aea719
Refactor: improve code consistency, update parameter documentation, s…
zaleslaw Oct 15, 2025
5489889
Replace `DEFAULT_LIMIT` with nullable `limit` parameter, defaulting t…
zaleslaw Oct 16, 2025
328c46f
Add `validateLimit` utility to ensure limit parameter is null or posi…
zaleslaw Oct 16, 2025
e007fa5
Add `validateLimit` calls across all JDBC read methods to enforce lim…
zaleslaw Oct 16, 2025
56d93f5
Clarify "limit" parameter documentation and rename `readDataFrameFrom…
zaleslaw Oct 16, 2025
f0e4d69
Refactor JDBC data handling: relocate and centralize `buildSchemaByTa…
zaleslaw Oct 16, 2025
6f2e2de
Ktlint with Junie
zaleslaw Oct 16, 2025
1c772a2
Linter with Junie, part 2
zaleslaw Oct 16, 2025
7009a61
Refactor and enhance JDBC: update references for improved consistency…
zaleslaw Oct 16, 2025
f928057
Add `DataFrameSchema.Companion` class to core API
zaleslaw Oct 16, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.jetbrains.kotlinx.dataframe.schema

public interface DataFrameSchema {
public companion object;

public val columns: Map<String, ColumnSchema>

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package org.jetbrains.kotlinx.dataframe.io

/**
* Represents the configuration for an internally managed JDBC database connection.
*
* This class defines connection parameters used by the library to create a `Connection`
* when the user does not provide one explicitly.
* It is designed for safe, read-only access by default.
*
* @property url The JDBC URL of the database, e.g., `"jdbc:postgresql://localhost:5432/mydb"`.
* Must follow the standard format: `jdbc:subprotocol:subname`.
*
* @property user The username used for authentication.
* Optional, default is an empty string.
*
* @property password The password used for authentication.
* Optional, default is an empty string.
*
* @property readOnly If `true` (default), the library will create the connection in read-only mode.
* This enables the following behavior:
* - `Connection.setReadOnly(true)`
* - `Connection.setAutoCommit(false)`
* - automatic `rollback()` at the end of execution
*
* If `false`, the connection will be created with JDBC defaults (usually read-write),
* but the library will still reject any queries that appear to modify data
* (e.g. contain `INSERT`, `UPDATE`, `DELETE`, etc.).
*
* Note: Connections created using this configuration are managed entirely by the library.
* Users do not have access to the underlying `Connection` instance and cannot commit or close it manually.
*
* ### Examples:
*
* ```kotlin
* // Safe read-only connection (default)
* val config = DbConnectionConfig("jdbc:sqlite::memory:")
* val df = DataFrame.readSqlQuery(config, "SELECT * FROM books")
*
* // Use default JDBC connection settings (still protected against mutations)
* val config = DbConnectionConfig(
* url = "jdbc:sqlite::memory:",
* readOnly = false
* )
* ```
*/
public data class DbConnectionConfig(
val url: String,
val user: String = "",
val password: String = "",
val readOnly: Boolean = true,
)
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
package org.jetbrains.kotlinx.dataframe.io.db

import org.jetbrains.kotlinx.dataframe.io.DbConnectionConfig
import org.jetbrains.kotlinx.dataframe.io.TableColumnMetadata
import org.jetbrains.kotlinx.dataframe.io.TableMetadata
import org.jetbrains.kotlinx.dataframe.io.getSchemaForAllSqlTables
import org.jetbrains.kotlinx.dataframe.io.readAllSqlTables
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
import java.sql.Connection
import java.sql.DatabaseMetaData
import java.sql.DriverManager
import java.sql.PreparedStatement
import java.sql.ResultSet
import kotlin.reflect.KType

Expand Down Expand Up @@ -40,6 +39,10 @@ public abstract class DbType(public val dbTypeInJdbcUrl: String) {
*/
public open val tableTypes: List<String>? = listOf("TABLE", "BASE TABLE")


public open val defaultFetchSize: Int = 1000
public open val defaultQueryTimeout: Int? = null // null = no timeout

/**
* Returns a [ColumnSchema] produced from [tableColumnMetadata].
*/
Expand Down Expand Up @@ -70,14 +73,70 @@ public abstract class DbType(public val dbTypeInJdbcUrl: String) {
*/
public abstract fun convertSqlTypeToKType(tableColumnMetadata: TableColumnMetadata): KType?


/**
* Builds a SELECT query for reading from a table.
*
* @param [tableName] the name of the table to query.
* @param [limit] the maximum number of rows to retrieve. If 0 or negative, no limit is applied.
* @return the SQL query string.
*/
public open fun buildSelectTableQueryWithLimit(tableName: String, limit: Int): String {
val quotedTableName = quoteIdentifier(tableName)
return if (limit > 0) {
buildSqlQueryWithLimit("SELECT * FROM $quotedTableName", limit)
} else {
"SELECT * FROM $quotedTableName"
}
}

/**
* Configures a [PreparedStatement] for optimal read performance.
* This method is called automatically before statement execution.
*
* @param [statement] the prepared statement to configure.
*/
public open fun configureReadStatement(
statement: PreparedStatement
) {
// Set fetch size for better streaming performance
statement.fetchSize = defaultFetchSize


if (defaultQueryTimeout != null) {
statement.queryTimeout = defaultQueryTimeout!!
}


// Set the fetch direction (forward-only for read-only operations)
statement.fetchDirection = ResultSet.FETCH_FORWARD
}

/**
* Quotes an identifier (table or column name) according to database-specific rules.
*
* Examples:
* - PostgreSQL: "tableName" or "schema"."table"
* - MySQL: `tableName` or `schema`.`table`
* - MS SQL: [tableName] or [schema].[table]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

escape [],

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

image still shows this in the IDE image but it renders correctly, so I'm happy :)

* - SQLite/H2: no quotes for simple names
*
* @param [name] the identifier to quote (can contain dots for schema.table).
* @return the quoted identifier.
*/
public open fun quoteIdentifier(name: String): String {
// Default: no quoting (works for SQLite, H2, simple names)
return name
}

/**
* Constructs a SQL query with a limit clause.
*
* @param sqlQuery The original SQL query.
* @param limit The maximum number of rows to retrieve from the query. Default is 1.
* @return A new SQL query with the limit clause added.
*/
public open fun sqlQueryLimit(sqlQuery: String, limit: Int = 1): String = "$sqlQuery LIMIT $limit"
public open fun buildSqlQueryWithLimit(sqlQuery: String, limit: Int = 1): String = "$sqlQuery LIMIT $limit"

/**
* Creates a database connection using the provided configuration.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ import org.duckdb.DuckDBResultSetMetaData
import org.duckdb.JsonNode
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.io.DbConnectionConfig
import org.jetbrains.kotlinx.dataframe.io.TableColumnMetadata
import org.jetbrains.kotlinx.dataframe.io.TableMetadata
import org.jetbrains.kotlinx.dataframe.io.db.TableColumnMetadata
import org.jetbrains.kotlinx.dataframe.io.db.TableMetadata
import org.jetbrains.kotlinx.dataframe.io.db.DuckDb.convertSqlTypeToKType
import org.jetbrains.kotlinx.dataframe.io.getSchemaForAllSqlTables
import org.jetbrains.kotlinx.dataframe.io.readAllSqlTables
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
package org.jetbrains.kotlinx.dataframe.io.db

import org.jetbrains.kotlinx.dataframe.io.TableColumnMetadata
import org.jetbrains.kotlinx.dataframe.io.TableMetadata
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
import java.sql.ResultSet
import java.util.Locale
Expand Down Expand Up @@ -67,5 +65,5 @@ public open class H2(public val dialect: DbType = MySql) : DbType("h2") {
override fun convertSqlTypeToKType(tableColumnMetadata: TableColumnMetadata): KType? =
dialect.convertSqlTypeToKType(tableColumnMetadata)

public override fun sqlQueryLimit(sqlQuery: String, limit: Int): String = dialect.sqlQueryLimit(sqlQuery, limit)
public override fun buildSqlQueryWithLimit(sqlQuery: String, limit: Int): String = dialect.buildSqlQueryWithLimit(sqlQuery, limit)
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package org.jetbrains.kotlinx.dataframe.io.db

import org.jetbrains.kotlinx.dataframe.io.TableColumnMetadata
import org.jetbrains.kotlinx.dataframe.io.TableMetadata
import org.jetbrains.kotlinx.dataframe.io.db.TableColumnMetadata
import org.jetbrains.kotlinx.dataframe.io.db.TableMetadata
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
import java.sql.ResultSet
import kotlin.reflect.KType
Expand Down Expand Up @@ -40,4 +40,9 @@ public object MariaDb : DbType("mariadb") {
}
return null
}

override fun quoteIdentifier(name: String): String {
// schema.table -> `schema`.`table`
return name.split(".").joinToString(".") { "`$it`" }
}
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package org.jetbrains.kotlinx.dataframe.io.db

import org.jetbrains.kotlinx.dataframe.io.TableColumnMetadata
import org.jetbrains.kotlinx.dataframe.io.TableMetadata
import org.jetbrains.kotlinx.dataframe.io.db.TableColumnMetadata
import org.jetbrains.kotlinx.dataframe.io.db.TableMetadata
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
import java.sql.ResultSet
import java.util.Locale
Expand Down Expand Up @@ -49,6 +49,11 @@ public object MsSql : DbType("sqlserver") {

override fun convertSqlTypeToKType(tableColumnMetadata: TableColumnMetadata): KType? = null

public override fun sqlQueryLimit(sqlQuery: String, limit: Int): String =
public override fun buildSqlQueryWithLimit(sqlQuery: String, limit: Int): String =
sqlQuery.replace("SELECT", "SELECT TOP $limit", ignoreCase = true)

override fun quoteIdentifier(name: String): String {
// schema.table -> [schema].[table]
return name.split(".").joinToString(".") { "[$it]" }
}
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package org.jetbrains.kotlinx.dataframe.io.db

import org.jetbrains.kotlinx.dataframe.io.TableColumnMetadata
import org.jetbrains.kotlinx.dataframe.io.TableMetadata
import org.jetbrains.kotlinx.dataframe.io.db.TableColumnMetadata
import org.jetbrains.kotlinx.dataframe.io.db.TableMetadata
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
import java.sql.ResultSet
import java.util.Locale
Expand Down Expand Up @@ -55,4 +55,9 @@ public object MySql : DbType("mysql") {
}
return null
}

override fun quoteIdentifier(name: String): String {
// schema.table -> `schema`.`table`
return name.split(".").joinToString(".") { "`$it`" }
}
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package org.jetbrains.kotlinx.dataframe.io.db

import org.jetbrains.kotlinx.dataframe.io.TableColumnMetadata
import org.jetbrains.kotlinx.dataframe.io.TableMetadata
import org.jetbrains.kotlinx.dataframe.io.db.TableColumnMetadata
import org.jetbrains.kotlinx.dataframe.io.db.TableMetadata
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
import java.sql.ResultSet
import java.util.Locale
Expand Down Expand Up @@ -47,4 +47,9 @@ public object PostgreSql : DbType("postgresql") {

return null
}

override fun quoteIdentifier(name: String): String {
// schema.table -> "schema"."table"
return name.split(".").joinToString(".") { "\"$it\"" }
}
}
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
package org.jetbrains.kotlinx.dataframe.io.db

import org.jetbrains.kotlinx.dataframe.io.DbConnectionConfig
import org.jetbrains.kotlinx.dataframe.io.TableColumnMetadata
import org.jetbrains.kotlinx.dataframe.io.TableMetadata
import org.jetbrains.kotlinx.dataframe.io.db.TableColumnMetadata
import org.jetbrains.kotlinx.dataframe.io.db.TableMetadata
import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema
import org.sqlite.SQLiteConfig
import java.sql.Connection
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package org.jetbrains.kotlinx.dataframe.io.db

/**
* Represents a column in a database table to keep all required meta-information.
*
* @property [name] the name of the column.
* @property [sqlTypeName] the SQL data type of the column.
* @property [jdbcType] the JDBC data type of the column produced from [java.sql.Types].
* @property [size] the size of the column.
* @property [javaClassName] the class name in Java.
* @property [isNullable] true if column could contain nulls.
*/
public data class TableColumnMetadata(
val name: String,
val sqlTypeName: String,
val jdbcType: Int,
val size: Int,
val javaClassName: String,
val isNullable: Boolean = false,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package org.jetbrains.kotlinx.dataframe.io.db

/**
* Represents a table metadata to store information about a database table,
* including its name, schema name, and catalogue name.
*
* NOTE: we need to extract both, [schemaName] and [catalogue]
* because the different databases have different implementations of metadata.
*
* @property [name] the name of the table.
* @property [schemaName] the name of the schema the table belongs to (optional).
* @property [catalogue] the name of the catalogue the table belongs to (optional).
*/
public data class TableMetadata(val name: String, val schemaName: String?, val catalogue: String?)
Loading
Loading