From ce5a441260ae3c3f20c17a9b7833c51479cd6271 Mon Sep 17 00:00:00 2001 From: comphead Date: Wed, 29 Oct 2025 12:49:55 -0700 Subject: [PATCH 1/2] chore: display schema for datasets being compared --- .../org/apache/comet/fuzz/QueryRunner.scala | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/fuzz-testing/src/main/scala/org/apache/comet/fuzz/QueryRunner.scala b/fuzz-testing/src/main/scala/org/apache/comet/fuzz/QueryRunner.scala index f4f3452962..1eab66fcb1 100644 --- a/fuzz-testing/src/main/scala/org/apache/comet/fuzz/QueryRunner.scala +++ b/fuzz-testing/src/main/scala/org/apache/comet/fuzz/QueryRunner.scala @@ -154,13 +154,16 @@ object QueryComparison { while (i < sparkRows.length) { val l = sparkRows(i) val r = cometRows(i) + // Check the schema is equal for first row only - if (i == 0 && l.schema != r.schema) { - output.write( - s"[ERROR] Spark produced schema ${l.schema} and " + - s"Comet produced schema ${r.schema} rows.\n") + if (i == 0) { + showSchema(output, l.schema.treeString, r.schema.treeString) + + if (l.schema != r.schema) { + output.write("[ERROR] Spark produced different schema than Comet.\n") - return false + return false + } } assert(l.length == r.length) @@ -254,4 +257,11 @@ object QueryComparison { w.write("### Comet Plan\n") w.write(s"```\n$cometPlan\n```\n") } + + def showSchema(w: BufferedWriter, sparkSchema: String, cometSchema: String): Unit = { + w.write("### Spark Schema\n") + w.write(s"```\n$sparkSchema\n```\n") + w.write("### Comet Schema\n") + w.write(s"```\n$cometSchema\n```\n") + } } From 1661bc563b4525e7c42ea20c9e43b8f4df210899 Mon Sep 17 00:00:00 2001 From: comphead Date: Wed, 29 Oct 2025 12:56:51 -0700 Subject: [PATCH 2/2] chore: display schema for datasets being compared --- .../scala/org/apache/comet/fuzz/ComparisonTool.scala | 6 ++++++ .../main/scala/org/apache/comet/fuzz/QueryRunner.scala | 10 +++------- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/fuzz-testing/src/main/scala/org/apache/comet/fuzz/ComparisonTool.scala b/fuzz-testing/src/main/scala/org/apache/comet/fuzz/ComparisonTool.scala index a4fd011fee..03b6f7334f 100644 --- a/fuzz-testing/src/main/scala/org/apache/comet/fuzz/ComparisonTool.scala +++ b/fuzz-testing/src/main/scala/org/apache/comet/fuzz/ComparisonTool.scala @@ -117,6 +117,12 @@ object ComparisonTool { // Compare the results if (QueryComparison.assertSameRows(sparkRows, cometRows, output)) { output.write(s"Subfolder $subfolderName: ${sparkRows.length} rows matched\n\n") + } else { + // Output schema if dataframes are not equal + QueryComparison.showSchema( + output, + sparkDf.schema.treeString, + cometDf.schema.treeString) } } catch { case e: Exception => diff --git a/fuzz-testing/src/main/scala/org/apache/comet/fuzz/QueryRunner.scala b/fuzz-testing/src/main/scala/org/apache/comet/fuzz/QueryRunner.scala index 1eab66fcb1..23bfbc1c0a 100644 --- a/fuzz-testing/src/main/scala/org/apache/comet/fuzz/QueryRunner.scala +++ b/fuzz-testing/src/main/scala/org/apache/comet/fuzz/QueryRunner.scala @@ -156,14 +156,10 @@ object QueryComparison { val r = cometRows(i) // Check the schema is equal for first row only - if (i == 0) { - showSchema(output, l.schema.treeString, r.schema.treeString) + if (i == 0 && l.schema != r.schema) { + output.write("[ERROR] Spark produced different schema than Comet.\n") - if (l.schema != r.schema) { - output.write("[ERROR] Spark produced different schema than Comet.\n") - - return false - } + return false } assert(l.length == r.length)