-
Notifications
You must be signed in to change notification settings - Fork 246
Closed
Labels
bugSomething isn't workingSomething isn't working
Description
Describe the bug
- Take from GitHub CI of fix: [iceberg] Switch to OSS Spark and run Iceberg Spark tests in parallel #1987
> Task :iceberg-spark:iceberg-spark-3.5_2.13:test
TestStoragePartitionedJoins > testJoinsWithBucketingOnBinaryColumn() > catalogName = testhadoop, implementation = org.apache.iceberg.spark.SparkCatalog, config = {type=hadoop, cache-enabled=false}, planningMode = LOCAL FAILED
org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in stage 134.0 failed 1 times, most recent failure: Lost task 1.0 in stage 134.0 (TID 3081) (localhost executor driver): org.apache.comet.CometNativeException: overflow
at comet::errors::init::{{closure}}(__internal__:0)
at std::panicking::rust_panic_with_hook(__internal__:0)
at std::panicking::begin_panic_handler::{{closure}}(__internal__:0)
at std::sys::backtrace::__rust_end_short_backtrace(__internal__:0)
at __rustc::rust_begin_unwind(__internal__:0)
at core::panicking::panic_fmt(__internal__:0)
at core::option::expect_failed(__internal__:0)
at arrow_select::take::take_bytes(__internal__:0)
at arrow_select::take::take_impl(__internal__:0)
at arrow_select::take::take(__internal__:0)
at arrow_select::take::take_arrays(__internal__:0)
at datafusion_physical_plan::sorts::sort::sort_batch(__internal__:0)
at <datafusion_physical_plan::stream::RecordBatchStreamAdapter<S> as futures_core::stream::Stream>::poll_next(__internal__:0)
at <datafusion_physical_plan::stream::RecordBatchStreamAdapter<S> as futures_core::stream::Stream>::poll_next(__internal__:0)
at comet::execution::jni_api::Java_org_apache_comet_Native_executePlan::{{closure}}::{{closure}}(__internal__:0)
at Java_org_apache_comet_Native_executePlan(__internal__:0)
at <unknown>(__internal__:0)
at org.apache.comet.Native.executePlan(Native Method)
at org.apache.comet.CometExecIterator.$anonfun$getNextBatch$2(CometExecIterator.scala:155)
at org.apache.comet.CometExecIterator.$anonfun$getNextBatch$2$adapted(CometExecIterator.scala:154)
at org.apache.comet.vector.NativeUtil.getNextBatch(NativeUtil.scala:157)
at org.apache.comet.CometExecIterator.$anonfun$getNextBatch$1(CometExecIterator.scala:154)
at org.apache.comet.Tracing$.withTrace(Tracing.scala:31)
at org.apache.comet.CometExecIterator.getNextBatch(CometExecIterator.scala:152)
at org.apache.comet.CometExecIterator.hasNext(CometExecIterator.scala:203)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.cometcolumnartorow_nextBatch_0$(Unknown Source)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.processNext(Unknown Source)
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at org.apache.spark.sql.execution.WholeStageCodegenEvaluatorFactory$WholeStageCodegenPartitionEvaluator$$anon$1.hasNext(WholeStageCodegenEvaluatorFactory.scala:43)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage3.smj_findNextJoinRows_0$(Unknown Source)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage3.processNext(Unknown Source)
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at org.apache.spark.sql.execution.WholeStageCodegenEvaluatorFactory$WholeStageCodegenPartitionEvaluator$$anon$1.hasNext(WholeStageCodegenEvaluatorFactory.scala:43)
at scala.collection.Iterator$$anon$9.hasNext(Iterator.scala:583)
at scala.collection.Iterator$$anon$9.hasNext(Iterator.scala:583)
at org.apache.spark.util.random.SamplingUtils$.reservoirSampleAndCount(SamplingUtils.scala:41)
at org.apache.spark.RangePartitioner$.$anonfun$sketch$1(Partitioner.scala:322)
at org.apache.spark.RangePartitioner$.$anonfun$sketch$1$adapted(Partitioner.scala:320)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$2(RDD.scala:910)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$2$adapted(RDD.scala:910)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:367)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:331)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
at org.apache.spark.scheduler.Task.run(Task.scala:141)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:621)
at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:624)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at java.base/java.lang.Thread.run(Thread.java:829)
Driver stacktrace:
at app//org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2898)
at app//org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2834)
at app//org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2833)
at app//scala.collection.immutable.List.foreach(List.scala:334)
at app//org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2833)
at app//org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1253)
at app//org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1253)
at app//scala.Option.foreach(Option.scala:437)
at app//org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1253)
at app//org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:3102)
at app//org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3036)
at app//org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3025)
at app//org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
at app//org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:995)
at app//org.apache.spark.SparkContext.runJob(SparkContext.scala:2393)
at app//org.apache.spark.SparkContext.runJob(SparkContext.scala:2414)
at app//org.apache.spark.SparkContext.runJob(SparkContext.scala:2433)
at app//org.apache.spark.SparkContext.runJob(SparkContext.scala:2458)
at app//org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:1049)
at app//org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at app//org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at app//org.apache.spark.rdd.RDD.withScope(RDD.scala:410)
at app//org.apache.spark.rdd.RDD.collect(RDD.scala:1048)
at app//org.apache.spark.RangePartitioner$.sketch(Partitioner.scala:320)
at app//org.apache.spark.RangePartitioner.<init>(Partitioner.scala:187)
at app//org.apache.spark.sql.comet.execution.shuffle.CometShuffleExchangeExec$.prepareJVMShuffleDependency(CometShuffleExchangeExec.scala:317)
at app//org.apache.spark.sql.comet.execution.shuffle.CometShuffleExchangeExec.shuffleDependency$lzycompute(CometShuffleExchangeExec.scala:153)
at app//org.apache.spark.sql.comet.execution.shuffle.CometShuffleExchangeExec.shuffleDependency(CometShuffleExchangeExec.scala:132)
at app//org.apache.spark.sql.comet.execution.shuffle.CometShuffleExchangeExec.doExecuteColumnar(CometShuffleExchangeExec.scala:186)
at app//org.apache.spark.sql.execution.SparkPlan.$anonfun$executeColumnar$1(SparkPlan.scala:222)
at app//org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:246)
at app//org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at app//org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:243)
at app//org.apache.spark.sql.execution.SparkPlan.executeColumnar(SparkPlan.scala:218)
at app//org.apache.spark.sql.comet.CometNativeExec.doExecuteColumnar(operators.scala:265)
at app//org.apache.spark.sql.execution.SparkPlan.$anonfun$executeColumnar$1(SparkPlan.scala:222)
at app//org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:246)
at app//org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at app//org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:243)
at app//org.apache.spark.sql.execution.SparkPlan.executeColumnar(SparkPlan.scala:218)
at app//org.apache.spark.sql.execution.InputAdapter.doExecuteColumnar(WholeStageCodegenExec.scala:521)
at app//org.apache.spark.sql.execution.SparkPlan.$anonfun$executeColumnar$1(SparkPlan.scala:222)
at app//org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:246)
at app//org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at app//org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:243)
at app//org.apache.spark.sql.execution.SparkPlan.executeColumnar(SparkPlan.scala:218)
at app//org.apache.spark.sql.comet.CometColumnarToRowExec.inputRDDs(CometColumnarToRowExec.scala:306)
at app//org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:751)
at app//org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:195)
at app//org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:246)
at app//org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at app//org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:243)
at app//org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:191)
at app//org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:364)
at app//org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:445)
at app//org.apache.spark.sql.Dataset.collectFromPlan(Dataset.scala:4333)
at app//org.apache.spark.sql.Dataset.$anonfun$collectAsList$1(Dataset.scala:3587)
at app//org.apache.spark.sql.Dataset.$anonfun$withAction$2(Dataset.scala:4323)
at app//org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:546)
at app//org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:4321)
at app//org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:125)
at app//org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:201)
at app//org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:108)
at app//org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)
at app//org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:66)
at app//org.apache.spark.sql.Dataset.withAction(Dataset.scala:4321)
at app//org.apache.spark.sql.Dataset.collectAsList(Dataset.scala:3586)
at app//org.apache.iceberg.spark.TestBase.sql(TestBase.java:128)
at app//org.apache.iceberg.spark.TestBase.lambda$executeAndKeepPlan$3(TestBase.java:256)
at app//org.apache.iceberg.spark.TestBase.executeAndKeepPlan(TestBase.java:275)
at app//org.apache.iceberg.spark.TestBase.executeAndKeepPlan(TestBase.java:256)
at app//org.apache.iceberg.spark.sql.TestStoragePartitionedJoins.lambda$assertPartitioningAwarePlan$0(TestStoragePartitionedJoins.java:640)
at app//org.apache.iceberg.spark.TestBase.withSQLConf(TestBase.java:214)
at app//org.apache.iceberg.spark.sql.TestStoragePartitionedJoins.assertPartitioningAwarePlan(TestStoragePartitionedJoins.java:637)
at app//org.apache.iceberg.spark.sql.TestStoragePartitionedJoins.checkJoin(TestStoragePartitionedJoins.java:612)
at app//org.apache.iceberg.spark.sql.TestStoragePartitionedJoins.testJoinsWithBucketingOnBinaryColumn(TestStoragePartitionedJoins.java:173)
Caused by:
org.apache.comet.CometNativeException: overflow
at comet::errors::init::{{closure}}(__internal__:0)
at std::panicking::rust_panic_with_hook(__internal__:0)
at std::panicking::begin_panic_handler::{{closure}}(__internal__:0)
at std::sys::backtrace::__rust_end_short_backtrace(__internal__:0)
at __rustc::rust_begin_unwind(__internal__:0)
at core::panicking::panic_fmt(__internal__:0)
at core::option::expect_failed(__internal__:0)
at arrow_select::take::take_bytes(__internal__:0)
at arrow_select::take::take_impl(__internal__:0)
at arrow_select::take::take(__internal__:0)
at arrow_select::take::take_arrays(__internal__:0)
at datafusion_physical_plan::sorts::sort::sort_batch(__internal__:0)
at <datafusion_physical_plan::stream::RecordBatchStreamAdapter<S> as futures_core::stream::Stream>::poll_next(__internal__:0)
at <datafusion_physical_plan::stream::RecordBatchStreamAdapter<S> as futures_core::stream::Stream>::poll_next(__internal__:0)
at comet::execution::jni_api::Java_org_apache_comet_Native_executePlan::{{closure}}::{{closure}}(__internal__:0)
at Java_org_apache_comet_Native_executePlan(__internal__:0)
at <unknown>(__internal__:0)
at app//org.apache.comet.Native.executePlan(Native Method)
at app//org.apache.comet.CometExecIterator.$anonfun$getNextBatch$2(CometExecIterator.scala:155)
at app//org.apache.comet.CometExecIterator.$anonfun$getNextBatch$2$adapted(CometExecIterator.scala:154)
at app//org.apache.comet.vector.NativeUtil.getNextBatch(NativeUtil.scala:157)
at app//org.apache.comet.CometExecIterator.$anonfun$getNextBatch$1(CometExecIterator.scala:154)
at app//org.apache.comet.Tracing$.withTrace(Tracing.scala:31)
at app//org.apache.comet.CometExecIterator.getNextBatch(CometExecIterator.scala:152)
at app//org.apache.comet.CometExecIterator.hasNext(CometExecIterator.scala:203)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.cometcolumnartorow_nextBatch_0$(Unknown Source)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.processNext(Unknown Source)
at app//org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at app//org.apache.spark.sql.execution.WholeStageCodegenEvaluatorFactory$WholeStageCodegenPartitionEvaluator$$anon$1.hasNext(WholeStageCodegenEvaluatorFactory.scala:43)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage3.smj_findNextJoinRows_0$(Unknown Source)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage3.processNext(Unknown Source)
at app//org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at app//org.apache.spark.sql.execution.WholeStageCodegenEvaluatorFactory$WholeStageCodegenPartitionEvaluator$$anon$1.hasNext(WholeStageCodegenEvaluatorFactory.scala:43)
at app//scala.collection.Iterator$$anon$9.hasNext(Iterator.scala:583)
at app//scala.collection.Iterator$$anon$9.hasNext(Iterator.scala:583)
at app//org.apache.spark.util.random.SamplingUtils$.reservoirSampleAndCount(SamplingUtils.scala:41)
at app//org.apache.spark.RangePartitioner$.$anonfun$sketch$1(Partitioner.scala:322)
at app//org.apache.spark.RangePartitioner$.$anonfun$sketch$1$adapted(Partitioner.scala:320)
at app//org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$2(RDD.scala:910)
at app//org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$2$adapted(RDD.scala:910)
at app//org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at app//org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:367)
at app//org.apache.spark.rdd.RDD.iterator(RDD.scala:331)
at app//org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
at app//org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
at app//org.apache.spark.scheduler.Task.run(Task.scala:141)
at app//org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:621)
at app//org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
at app//org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
at app//org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)
at app//org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:624)
at [email protected]/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at [email protected]/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
at [email protected]/java.lang.Thread.run(Thread.java:829)
Steps to reproduce
SparkSession configs used:
.config("spark.plugins", "org.apache.spark.CometPlugin")
.config("spark.shuffle.manager", "org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager")
.config("spark.comet.explainFallback.enabled", "true")
.config("spark.sql.iceberg.parquet.reader-type", "COMET")
.config("spark.memory.offHeap.enabled", "true")
.config("spark.memory.offHeap.size", "10g")
.config("spark.comet.use.lazyMaterialization", "false")
.config("spark.comet.schemaEvolution.enabled", "true")
Expected behavior
No response
Additional context
No response
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working