diff --git a/fuzz-testing/src/main/scala/org/apache/comet/fuzz/ComparisonTool.scala b/fuzz-testing/src/main/scala/org/apache/comet/fuzz/ComparisonTool.scala index a4fd011fee..03b6f7334f 100644 --- a/fuzz-testing/src/main/scala/org/apache/comet/fuzz/ComparisonTool.scala +++ b/fuzz-testing/src/main/scala/org/apache/comet/fuzz/ComparisonTool.scala @@ -117,6 +117,12 @@ object ComparisonTool { // Compare the results if (QueryComparison.assertSameRows(sparkRows, cometRows, output)) { output.write(s"Subfolder $subfolderName: ${sparkRows.length} rows matched\n\n") + } else { + // Output schema if dataframes are not equal + QueryComparison.showSchema( + output, + sparkDf.schema.treeString, + cometDf.schema.treeString) } } catch { case e: Exception => diff --git a/fuzz-testing/src/main/scala/org/apache/comet/fuzz/QueryRunner.scala b/fuzz-testing/src/main/scala/org/apache/comet/fuzz/QueryRunner.scala index f4f3452962..23bfbc1c0a 100644 --- a/fuzz-testing/src/main/scala/org/apache/comet/fuzz/QueryRunner.scala +++ b/fuzz-testing/src/main/scala/org/apache/comet/fuzz/QueryRunner.scala @@ -154,11 +154,10 @@ object QueryComparison { while (i < sparkRows.length) { val l = sparkRows(i) val r = cometRows(i) + // Check the schema is equal for first row only if (i == 0 && l.schema != r.schema) { - output.write( - s"[ERROR] Spark produced schema ${l.schema} and " + - s"Comet produced schema ${r.schema} rows.\n") + output.write("[ERROR] Spark produced different schema than Comet.\n") return false } @@ -254,4 +253,11 @@ object QueryComparison { w.write("### Comet Plan\n") w.write(s"```\n$cometPlan\n```\n") } + + def showSchema(w: BufferedWriter, sparkSchema: String, cometSchema: String): Unit = { + w.write("### Spark Schema\n") + w.write(s"```\n$sparkSchema\n```\n") + w.write("### Comet Schema\n") + w.write(s"```\n$cometSchema\n```\n") + } }