Skip to content

Commit ee7a17c

Browse files
vinodkcdongjoon-hyun
authored andcommitted
[SPARK-54552][CONNECT] Fix SparkConnectResultSet.getString to handle BINARY data type with UTF_8
### What changes were proposed in this pull request? Fixed `SparkConnectResultSet.getString()` to properly convert BINARY data to UTF-8 strings instead of returning byte array object references (e.g., "[B<hashcode>"). ### Why are the changes needed? The current implementation violates JDBC specification behavior. Users calling getString() on BINARY columns expect UTF-8 decoded strings, not Java object references. Before ``` SELECT binary('xDeAdBeEf') spark-sql: `\xDeAdBeEf` beeline with STS: `\xDeAdBeEf` beeline with Connect Server: `[B4d518c66` ``` After ``` SELECT binary('xDeAdBeEf') spark-sql: `\xDeAdBeEf` beeline with STS: `\xDeAdBeEf` beeline with Connect Server: `\xDeAdBeEf` ``` ### Does this PR introduce _any_ user-facing change? Yes. getString() on BINARY columns now returns UTF-8 decoded strings instead of byte array references like "[B1a2b3c4d". ### How was this patch tested? Added new test ### Was this patch authored or co-authored using generative AI tooling? No Closes #53262 from vinodkc/br_fix_getString_BINARY. Authored-by: vinodkc <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]> (cherry picked from commit f5b9ea8) Signed-off-by: Dongjoon Hyun <[email protected]>
1 parent b572b7c commit ee7a17c

File tree

2 files changed

+31
-1
lines changed

2 files changed

+31
-1
lines changed

sql/connect/client/jdbc/src/main/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectResultSet.scala

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,13 @@ class SparkConnectResultSet(
114114
}
115115

116116
override def getString(columnIndex: Int): String = {
117-
getColumnValue(columnIndex, null: String) { idx => String.valueOf(currentRow.get(idx)) }
117+
getColumnValue(columnIndex, null: String) { idx =>
118+
currentRow.get(idx) match {
119+
case bytes: Array[Byte] =>
120+
new String(bytes, java.nio.charset.StandardCharsets.UTF_8)
121+
case other => String.valueOf(other)
122+
}
123+
}
118124
}
119125

120126
override def getBoolean(columnIndex: Int): Boolean = {

sql/connect/client/jdbc/src/test/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectJdbcDataTypeSuite.scala

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,11 @@ class SparkConnectJdbcDataTypeSuite extends ConnectFunSuite with RemoteSparkSess
385385
assert(bytes.length === testBytes.length)
386386
assert(bytes.sameElements(testBytes))
387387
assert(!rs.wasNull)
388+
389+
val stringValue = rs.getString(1)
390+
val expectedString = new String(testBytes, java.nio.charset.StandardCharsets.UTF_8)
391+
assert(stringValue === expectedString)
392+
388393
assert(!rs.next())
389394

390395
val metaData = rs.getMetaData
@@ -396,6 +401,22 @@ class SparkConnectJdbcDataTypeSuite extends ConnectFunSuite with RemoteSparkSess
396401
}
397402
}
398403

404+
test("get binary type with UTF-8 text") {
405+
val textBytes = "\\xDeAdBeEf".getBytes(java.nio.charset.StandardCharsets.UTF_8)
406+
val hexString = textBytes.map(b => "%02X".format(b)).mkString
407+
withExecuteQuery(s"SELECT CAST(X'$hexString' AS BINARY)") { rs =>
408+
assert(rs.next())
409+
val bytes = rs.getBytes(1)
410+
assert(bytes !== null)
411+
assert(bytes.sameElements(textBytes))
412+
413+
val stringValue = rs.getString(1)
414+
assert(stringValue === "\\xDeAdBeEf")
415+
416+
assert(!rs.next())
417+
}
418+
}
419+
399420
test("get binary type with null") {
400421
withExecuteQuery("SELECT cast(null as binary)") { rs =>
401422
assert(rs.next())
@@ -437,6 +458,9 @@ class SparkConnectJdbcDataTypeSuite extends ConnectFunSuite with RemoteSparkSess
437458
assert(bytes !== null)
438459
assert(bytes.length === 0)
439460
assert(!rs.wasNull)
461+
462+
val stringValue = rs.getString(1)
463+
assert(stringValue === "")
440464
assert(!rs.next())
441465
}
442466
}

0 commit comments

Comments
 (0)