Skip to content

[SPARK-52832][SQL] Fix JDBC dialect identifier quoting #51533

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,12 @@ private case class DatabricksDialect() extends JdbcDialect with NoLegacyJDBCErro
}

override def quoteIdentifier(colName: String): String = {
s"`$colName`"
// Per Databricks documentation:
// https://docs.databricks.com/aws/en/sql/language-manual/sql-ref-identifiers
//
// "Any character from the Unicode character set. Use ` to escape ` itself."
val escapedColName = colName.replace("`", "``")
s"`$escapedColName`"
}

override def supportsLimit: Boolean = true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,9 @@ abstract class JdbcDialect extends Serializable with Logging {
* name is a reserved keyword, or in case it contains characters that require quotes (e.g. space).
*/
def quoteIdentifier(colName: String): String = {
s""""$colName""""
// By ANSI standard, quotes are escaped with another quotes.
val escapedColName = colName.replace("\"", "\"\"")
s""""$escapedColName""""
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,14 @@ private case class MySQLDialect() extends JdbcDialect with SQLConfHelper with No
}

override def quoteIdentifier(colName: String): String = {
s"`$colName`"
// Per MySQL documentation: https://dev.mysql.com/doc/refman/8.4/en/identifiers.html
//
// Identifier quote characters can be included within an identifier if you quote the
// identifier. If the character to be included within the identifier is the same as
// that used to quote the identifier itself, then you need to double the character.
// The following statement creates a table named a`b that contains a column named c"d:
val escapedColName = colName.replace("`", "``")
s"`$escapedColName`"
}

override def schemasExists(conn: Connection, options: JDBCOptions, schema: String): Boolean = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,11 @@ private case class OracleDialect() extends JdbcDialect with SQLConfHelper with N
case _ => super.classifyException(e, condition, messageParameters, description, isRuntime)
}
}

override def quoteIdentifier(colName: String): String = {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we can remove it now as it's the same as default.

val escapedColName = colName.replace("\"", "\"\"")
s""""$escapedColName""""
}
}

private[jdbc] object OracleDialect {
Expand Down
28 changes: 17 additions & 11 deletions sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -803,17 +803,23 @@ class JDBCSuite extends QueryTest with SharedSparkSession {
}

test("quote column names by jdbc dialect") {
val MySQL = JdbcDialects.get("jdbc:mysql://127.0.0.1/db")
val Postgres = JdbcDialects.get("jdbc:postgresql://127.0.0.1/db")
val Derby = JdbcDialects.get("jdbc:derby:db")

val columns = Seq("abc", "key")
val MySQLColumns = columns.map(MySQL.quoteIdentifier(_))
val PostgresColumns = columns.map(Postgres.quoteIdentifier(_))
val DerbyColumns = columns.map(Derby.quoteIdentifier(_))
assert(MySQLColumns === Seq("`abc`", "`key`"))
assert(PostgresColumns === Seq(""""abc"""", """"key""""))
assert(DerbyColumns === Seq(""""abc"""", """"key""""))
val mySQLDialect = JdbcDialects.get("jdbc:mysql://127.0.0.1/db")
val postgresDialect = JdbcDialects.get("jdbc:postgresql://127.0.0.1/db")
val derbyDialect = JdbcDialects.get("jdbc:derby:db")
val oracleDialect = JdbcDialects.get("jdbc:oracle:thin:@//localhost:1521/orcl")
val databricksDialect = JdbcDialects.get("jdbc:databricks://host/db")

val columns = Seq("abc", "key", "double_quote\"", "back`")
val mySQLColumns = columns.map(mySQLDialect.quoteIdentifier)
val postgresColumns = columns.map(postgresDialect.quoteIdentifier)
val derbyColumns = columns.map(derbyDialect.quoteIdentifier)
val oracleColumns = columns.map(oracleDialect.quoteIdentifier)
val databricksColumns = columns.map(databricksDialect.quoteIdentifier)
assertResult(Seq("`abc`", "`key`", "`double_quote\"`", "`back```"))(mySQLColumns)
assertResult(Seq("\"abc\"", "\"key\"", "\"double_quote\"\"\"", "\"back`\""))(postgresColumns)
assertResult(Seq("\"abc\"", "\"key\"", "\"double_quote\"\"\"", "\"back`\""))(derbyColumns)
assertResult(Seq("\"abc\"", "\"key\"", "\"double_quote\"\"\"", "\"back`\""))(oracleColumns)
assertResult(Seq("`abc`", "`key`", "`double_quote\"`", "`back```"))(databricksColumns)
}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

assertResult is used because it gives better error message, it accepts expected and actual parameters, so it can tell what is expected values in error message.a


test("compile filters") {
Expand Down