diff --git a/datafusion-examples/examples/async_udf.rs b/datafusion-examples/examples/async_udf.rs index f1fc3f88852c..22e759de40f7 100644 --- a/datafusion-examples/examples/async_udf.rs +++ b/datafusion-examples/examples/async_udf.rs @@ -29,7 +29,6 @@ use datafusion::common::cast::as_string_view_array; use datafusion::common::error::Result; use datafusion::common::not_impl_err; use datafusion::common::utils::take_function_args; -use datafusion::config::ConfigOptions; use datafusion::execution::SessionStateBuilder; use datafusion::logical_expr::async_udf::{AsyncScalarUDF, AsyncScalarUDFImpl}; use datafusion::logical_expr::{ @@ -195,11 +194,7 @@ impl AsyncScalarUDFImpl for AskLLM { /// is processing the query, so you may wish to make actual network requests /// on a different `Runtime`, as explained in the `thread_pools.rs` example /// in this directory. - async fn invoke_async_with_args( - &self, - args: ScalarFunctionArgs, - _option: &ConfigOptions, - ) -> Result { + async fn invoke_async_with_args(&self, args: ScalarFunctionArgs) -> Result { // in a real UDF you would likely want to special case constant // arguments to improve performance, but this example converts the // arguments to arrays for simplicity. diff --git a/datafusion-examples/examples/sql_frontend.rs b/datafusion-examples/examples/sql_frontend.rs index 3955d5038cfb..1fc9ce24ecbb 100644 --- a/datafusion-examples/examples/sql_frontend.rs +++ b/datafusion-examples/examples/sql_frontend.rs @@ -83,7 +83,7 @@ pub fn main() -> Result<()> { let config = OptimizerContext::default().with_skip_failing_rules(false); let analyzed_plan = Analyzer::new().execute_and_check( logical_plan, - config.options(), + &config.options(), observe_analyzer, )?; // Note that the Analyzer has added a CAST to the plan to align the types diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index e5ddb8459bc7..be36f37d93b4 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -923,7 +923,7 @@ impl<'a> TryInto> for &'a FormatOptions } /// A key value pair, with a corresponding description -#[derive(Debug)] +#[derive(Debug, Hash, PartialEq, Eq)] pub struct ConfigEntry { /// A unique string to identify this config value pub key: String, diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index 32231e583fb8..9eb1bccc403f 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -1646,7 +1646,7 @@ impl SessionContext { /// [`ConfigOptions`]: crate::config::ConfigOptions pub fn state(&self) -> SessionState { let mut state = self.state.read().clone(); - state.execution_props_mut().start_execution(); + state.mark_start_execution(); state } diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs index a28cc38919dd..b1229d50635d 100644 --- a/datafusion/core/src/execution/session_state.rs +++ b/datafusion/core/src/execution/session_state.rs @@ -574,7 +574,7 @@ impl SessionState { // analyze & capture output of each rule let analyzer_result = self.analyzer.execute_and_check( e.plan.as_ref().clone(), - self.options(), + &self.options(), |analyzed_plan, analyzer| { let analyzer_name = analyzer.name().to_string(); let plan_type = PlanType::AnalyzedLogicalPlan { analyzer_name }; @@ -636,7 +636,7 @@ impl SessionState { } else { let analyzed_plan = self.analyzer.execute_and_check( plan.clone(), - self.options(), + &self.options(), |_, _| {}, )?; self.optimizer.optimize(analyzed_plan, self, |_, _| {}) @@ -738,10 +738,16 @@ impl SessionState { } /// return the configuration options - pub fn config_options(&self) -> &ConfigOptions { + pub fn config_options(&self) -> &Arc { self.config.options() } + /// Mark the start of the execution + pub fn mark_start_execution(&mut self) { + let config = Arc::clone(self.config.options()); + self.execution_props.mark_start_execution(config); + } + /// Return the table options pub fn table_options(&self) -> &TableOptions { &self.table_options @@ -1891,8 +1897,8 @@ impl OptimizerConfig for SessionState { &self.execution_props.alias_generator } - fn options(&self) -> &ConfigOptions { - self.config_options() + fn options(&self) -> Arc { + Arc::clone(self.config.options()) } fn function_registry(&self) -> Option<&dyn FunctionRegistry> { diff --git a/datafusion/core/tests/fuzz_cases/equivalence/ordering.rs b/datafusion/core/tests/fuzz_cases/equivalence/ordering.rs index 0d500fd7f441..171839b390ff 100644 --- a/datafusion/core/tests/fuzz_cases/equivalence/ordering.rs +++ b/datafusion/core/tests/fuzz_cases/equivalence/ordering.rs @@ -21,6 +21,7 @@ use crate::fuzz_cases::equivalence::utils::{ is_table_same_after_sort, TestScalarUDF, }; use arrow::compute::SortOptions; +use datafusion_common::config::ConfigOptions; use datafusion_common::Result; use datafusion_expr::{Operator, ScalarUDF}; use datafusion_physical_expr::equivalence::{ @@ -110,6 +111,7 @@ fn test_ordering_satisfy_with_equivalence_complex_random() -> Result<()> { Arc::clone(&test_fun), vec![col_a], &test_schema, + Arc::new(ConfigOptions::default()), )?); let a_plus_b = Arc::new(BinaryExpr::new( col("a", &test_schema)?, diff --git a/datafusion/core/tests/fuzz_cases/equivalence/projection.rs b/datafusion/core/tests/fuzz_cases/equivalence/projection.rs index d776796a1b75..f783ab1cc1b4 100644 --- a/datafusion/core/tests/fuzz_cases/equivalence/projection.rs +++ b/datafusion/core/tests/fuzz_cases/equivalence/projection.rs @@ -20,6 +20,7 @@ use crate::fuzz_cases::equivalence::utils::{ is_table_same_after_sort, TestScalarUDF, }; use arrow::compute::SortOptions; +use datafusion_common::config::ConfigOptions; use datafusion_common::Result; use datafusion_expr::{Operator, ScalarUDF}; use datafusion_physical_expr::equivalence::ProjectionMapping; @@ -49,6 +50,7 @@ fn project_orderings_random() -> Result<()> { Arc::clone(&test_fun), vec![col_a], &test_schema, + Arc::new(ConfigOptions::default()), )?); // a + b let a_plus_b = Arc::new(BinaryExpr::new( @@ -122,6 +124,7 @@ fn ordering_satisfy_after_projection_random() -> Result<()> { Arc::clone(&test_fun), vec![col_a], &test_schema, + Arc::new(ConfigOptions::default()), )?) as PhysicalExprRef; // a + b let a_plus_b = Arc::new(BinaryExpr::new( diff --git a/datafusion/core/tests/fuzz_cases/equivalence/properties.rs b/datafusion/core/tests/fuzz_cases/equivalence/properties.rs index e35ce3a6f8c9..382c4da94321 100644 --- a/datafusion/core/tests/fuzz_cases/equivalence/properties.rs +++ b/datafusion/core/tests/fuzz_cases/equivalence/properties.rs @@ -28,6 +28,7 @@ use datafusion_physical_expr::expressions::{col, BinaryExpr}; use datafusion_physical_expr::{LexOrdering, ScalarFunctionExpr}; use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr; +use datafusion_common::config::ConfigOptions; use itertools::Itertools; #[test] @@ -49,6 +50,7 @@ fn test_find_longest_permutation_random() -> Result<()> { Arc::clone(&test_fun), vec![col_a], &test_schema, + Arc::new(ConfigOptions::default()), )?) as _; let a_plus_b = Arc::new(BinaryExpr::new( diff --git a/datafusion/core/tests/optimizer/mod.rs b/datafusion/core/tests/optimizer/mod.rs index 3b39c9adfa32..5934c9ab92da 100644 --- a/datafusion/core/tests/optimizer/mod.rs +++ b/datafusion/core/tests/optimizer/mod.rs @@ -160,7 +160,7 @@ fn test_sql(sql: &str) -> Result { let analyzer = Analyzer::new(); let optimizer = Optimizer::new(); // analyze and optimize the logical plan - let plan = analyzer.execute_and_check(plan, config.options(), |_, _| {})?; + let plan = analyzer.execute_and_check(plan, &config.options(), |_, _| {})?; optimizer.optimize(plan, &config, |_, _| {}) } diff --git a/datafusion/core/tests/parquet/file_statistics.rs b/datafusion/core/tests/parquet/file_statistics.rs index a60beaf665e5..65c7e12eb03c 100644 --- a/datafusion/core/tests/parquet/file_statistics.rs +++ b/datafusion/core/tests/parquet/file_statistics.rs @@ -38,6 +38,7 @@ use datafusion_execution::runtime_env::RuntimeEnvBuilder; use datafusion_expr::{col, lit, Expr}; use datafusion::datasource::physical_plan::FileScanConfig; +use datafusion_common::config::ConfigOptions; use datafusion_physical_optimizer::filter_pushdown::FilterPushdown; use datafusion_physical_optimizer::PhysicalOptimizerRule; use datafusion_physical_plan::filter::FilterExec; @@ -55,7 +56,7 @@ async fn check_stats_precision_with_filter_pushdown() { let table = get_listing_table(&table_path, None, &opt).await; let (_, _, state) = get_cache_runtime_state(); - let mut options = state.config().options().clone(); + let mut options: ConfigOptions = state.config().options().as_ref().clone(); options.execution.parquet.pushdown_filters = true; // Scan without filter, stats are exact diff --git a/datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs b/datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs index c3512d6d9e7f..07d08b4f94b2 100644 --- a/datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs +++ b/datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs @@ -85,6 +85,7 @@ fn test_pushdown_volatile_functions_not_allowed() { // Test that we do not push down filters with volatile functions // Use random() as an example of a volatile function let scan = TestScanBuilder::new(schema()).with_support(true).build(); + let cfg = Arc::new(ConfigOptions::default()); let predicate = Arc::new(BinaryExpr::new( Arc::new(Column::new_with_schema("a", &schema()).unwrap()), Operator::Eq, @@ -93,6 +94,7 @@ fn test_pushdown_volatile_functions_not_allowed() { Arc::new(ScalarUDF::from(RandomFunc::new())), vec![], &schema(), + cfg, ) .unwrap(), ), diff --git a/datafusion/core/tests/physical_optimizer/projection_pushdown.rs b/datafusion/core/tests/physical_optimizer/projection_pushdown.rs index 1030a239523c..5d396b6b0247 100644 --- a/datafusion/core/tests/physical_optimizer/projection_pushdown.rs +++ b/datafusion/core/tests/physical_optimizer/projection_pushdown.rs @@ -128,6 +128,7 @@ fn test_update_matching_exprs() -> Result<()> { )), ], Field::new("f", DataType::Int32, true).into(), + Arc::new(ConfigOptions::default()), )), Arc::new(CaseExpr::try_new( Some(Arc::new(Column::new("d", 2))), @@ -193,6 +194,7 @@ fn test_update_matching_exprs() -> Result<()> { )), ], Field::new("f", DataType::Int32, true).into(), + Arc::new(ConfigOptions::default()), )), Arc::new(CaseExpr::try_new( Some(Arc::new(Column::new("d", 3))), @@ -261,6 +263,7 @@ fn test_update_projected_exprs() -> Result<()> { )), ], Field::new("f", DataType::Int32, true).into(), + Arc::new(ConfigOptions::default()), )), Arc::new(CaseExpr::try_new( Some(Arc::new(Column::new("d", 2))), @@ -326,6 +329,7 @@ fn test_update_projected_exprs() -> Result<()> { )), ], Field::new("f", DataType::Int32, true).into(), + Arc::new(ConfigOptions::default()), )), Arc::new(CaseExpr::try_new( Some(Arc::new(Column::new("d_new", 3))), diff --git a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs index dd8283613ae8..32c2f1d302b4 100644 --- a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs +++ b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs @@ -28,7 +28,7 @@ use arrow::array::{ use arrow::compute::kernels::numeric::add; use arrow::datatypes::{DataType, Field, Schema}; use arrow_schema::extension::{Bool8, CanonicalExtensionType, ExtensionType}; -use arrow_schema::{ArrowError, FieldRef}; +use arrow_schema::{ArrowError, FieldRef, SchemaRef}; use datafusion::common::test_util::batches_to_string; use datafusion::execution::context::{FunctionFactory, RegisterFunction, SessionState}; use datafusion::prelude::*; @@ -1786,3 +1786,58 @@ async fn test_extension_based_udf() -> Result<()> { ctx.deregister_table("t")?; Ok(()) } + +#[tokio::test] +async fn test_config_options_work_for_scalar_func() -> Result<()> { + #[derive(Debug)] + struct TestScalarUDF { + signature: Signature, + } + + impl ScalarUDFImpl for TestScalarUDF { + fn as_any(&self) -> &dyn Any { + self + } + fn name(&self) -> &str { + "TestScalarUDF" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> Result { + Ok(DataType::Utf8) + } + + fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { + let tz = args.config_options.execution.time_zone.clone(); + Ok(ColumnarValue::Scalar(ScalarValue::from(tz))) + } + } + + let udf = ScalarUDF::from(TestScalarUDF { + signature: Signature::uniform(1, vec![DataType::Utf8], Volatility::Stable), + }); + + let mut config = SessionConfig::new(); + config.options_mut().execution.time_zone = "AEST".into(); + + let ctx = SessionContext::new_with_config(config); + + ctx.register_udf(udf.clone()); + + let df = ctx.read_empty()?; + let df = df.select(vec![udf.call(vec![lit("a")]).alias("a")])?; + let actual = df.collect().await?; + + let expected_schema = Schema::new(vec![Field::new("a", DataType::Utf8, false)]); + let expected = RecordBatch::try_new( + SchemaRef::from(expected_schema), + vec![create_array!(Utf8, ["AEST"])], + )?; + + assert_eq!(expected, actual[0]); + + Ok(()) +} diff --git a/datafusion/execution/src/config.rs b/datafusion/execution/src/config.rs index ccda6dc4d37d..491b1aca69ea 100644 --- a/datafusion/execution/src/config.rs +++ b/datafusion/execution/src/config.rs @@ -91,8 +91,11 @@ use datafusion_common::{ /// [`SessionContext::new_with_config`]: https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html#method.new_with_config #[derive(Clone, Debug)] pub struct SessionConfig { - /// Configuration options - options: ConfigOptions, + /// Configuration options for the current session. + /// + /// A new copy is created on write, if there are other outstanding + /// references to the same options. + options: Arc, /// Opaque extensions. extensions: AnyMap, } @@ -100,7 +103,7 @@ pub struct SessionConfig { impl Default for SessionConfig { fn default() -> Self { Self { - options: ConfigOptions::new(), + options: Arc::new(ConfigOptions::new()), // Assume no extensions by default. extensions: HashMap::with_capacity_and_hasher( 0, @@ -139,7 +142,7 @@ impl SessionConfig { /// let config = SessionConfig::new(); /// assert!(config.options().execution.batch_size > 0); /// ``` - pub fn options(&self) -> &ConfigOptions { + pub fn options(&self) -> &Arc { &self.options } @@ -155,7 +158,7 @@ impl SessionConfig { /// assert_eq!(config.options().execution.batch_size, 1024); /// ``` pub fn options_mut(&mut self) -> &mut ConfigOptions { - &mut self.options + Arc::make_mut(&mut self.options) } /// Set a configuration option @@ -180,7 +183,7 @@ impl SessionConfig { /// Set a generic `str` configuration option pub fn set_str(mut self, key: &str, value: &str) -> Self { - self.options.set(key, value).unwrap(); + self.options_mut().set(key, value).unwrap(); self } @@ -188,7 +191,7 @@ impl SessionConfig { pub fn with_batch_size(mut self, n: usize) -> Self { // batch size must be greater than zero assert!(n > 0); - self.options.execution.batch_size = n; + self.options_mut().execution.batch_size = n; self } @@ -196,7 +199,7 @@ impl SessionConfig { /// /// [`target_partitions`]: datafusion_common::config::ExecutionOptions::target_partitions pub fn with_target_partitions(mut self, n: usize) -> Self { - self.options.execution.target_partitions = if n == 0 { + self.options_mut().execution.target_partitions = if n == 0 { datafusion_common::config::ExecutionOptions::default().target_partitions } else { n @@ -272,62 +275,64 @@ impl SessionConfig { catalog: impl Into, schema: impl Into, ) -> Self { - self.options.catalog.default_catalog = catalog.into(); - self.options.catalog.default_schema = schema.into(); + self.options_mut().catalog.default_catalog = catalog.into(); + self.options_mut().catalog.default_schema = schema.into(); self } /// Controls whether the default catalog and schema will be automatically created pub fn with_create_default_catalog_and_schema(mut self, create: bool) -> Self { - self.options.catalog.create_default_catalog_and_schema = create; + self.options_mut().catalog.create_default_catalog_and_schema = create; self } /// Enables or disables the inclusion of `information_schema` virtual tables pub fn with_information_schema(mut self, enabled: bool) -> Self { - self.options.catalog.information_schema = enabled; + self.options_mut().catalog.information_schema = enabled; self } /// Enables or disables the use of repartitioning for joins to improve parallelism pub fn with_repartition_joins(mut self, enabled: bool) -> Self { - self.options.optimizer.repartition_joins = enabled; + self.options_mut().optimizer.repartition_joins = enabled; self } /// Enables or disables the use of repartitioning for aggregations to improve parallelism pub fn with_repartition_aggregations(mut self, enabled: bool) -> Self { - self.options.optimizer.repartition_aggregations = enabled; + self.options_mut().optimizer.repartition_aggregations = enabled; self } /// Sets minimum file range size for repartitioning scans pub fn with_repartition_file_min_size(mut self, size: usize) -> Self { - self.options.optimizer.repartition_file_min_size = size; + self.options_mut().optimizer.repartition_file_min_size = size; self } /// Enables or disables the allowing unordered symmetric hash join pub fn with_allow_symmetric_joins_without_pruning(mut self, enabled: bool) -> Self { - self.options.optimizer.allow_symmetric_joins_without_pruning = enabled; + self.options_mut() + .optimizer + .allow_symmetric_joins_without_pruning = enabled; self } /// Enables or disables the use of repartitioning for file scans pub fn with_repartition_file_scans(mut self, enabled: bool) -> Self { - self.options.optimizer.repartition_file_scans = enabled; + self.options_mut().optimizer.repartition_file_scans = enabled; self } /// Enables or disables the use of repartitioning for window functions to improve parallelism pub fn with_repartition_windows(mut self, enabled: bool) -> Self { - self.options.optimizer.repartition_windows = enabled; + self.options_mut().optimizer.repartition_windows = enabled; self } /// Enables or disables the use of per-partition sorting to improve parallelism pub fn with_repartition_sorts(mut self, enabled: bool) -> Self { - self.options.optimizer.repartition_sorts = enabled; + self.options_mut().optimizer.repartition_sorts = enabled; self } @@ -336,7 +341,7 @@ impl SessionConfig { /// /// [prefer_existing_sort]: datafusion_common::config::OptimizerOptions::prefer_existing_sort pub fn with_prefer_existing_sort(mut self, enabled: bool) -> Self { - self.options.optimizer.prefer_existing_sort = enabled; + self.options_mut().optimizer.prefer_existing_sort = enabled; self } @@ -344,13 +349,13 @@ impl SessionConfig { /// /// [prefer_existing_union]: datafusion_common::config::OptimizerOptions::prefer_existing_union pub fn with_prefer_existing_union(mut self, enabled: bool) -> Self { - self.options.optimizer.prefer_existing_union = enabled; + self.options_mut().optimizer.prefer_existing_union = enabled; self } /// Enables or disables the use of pruning predicate for parquet readers to skip row groups pub fn with_parquet_pruning(mut self, enabled: bool) -> Self { - self.options.execution.parquet.pruning = enabled; + self.options_mut().execution.parquet.pruning = enabled; self } @@ -366,7 +371,7 @@ impl SessionConfig { /// Enables or disables the use of bloom filter for parquet readers to skip row groups pub fn with_parquet_bloom_filter_pruning(mut self, enabled: bool) -> Self { - self.options.execution.parquet.bloom_filter_on_read = enabled; + self.options_mut().execution.parquet.bloom_filter_on_read = enabled; self } @@ -377,13 +382,13 @@ impl SessionConfig { /// Enables or disables the use of page index for parquet readers to skip parquet data pages pub fn with_parquet_page_index_pruning(mut self, enabled: bool) -> Self { - self.options.execution.parquet.enable_page_index = enabled; + self.options_mut().execution.parquet.enable_page_index = enabled; self } /// Enables or disables the collection of statistics after listing files pub fn with_collect_statistics(mut self, enabled: bool) -> Self { - self.options.execution.collect_statistics = enabled; + self.options_mut().execution.collect_statistics = enabled; self } @@ -394,7 +399,7 @@ impl SessionConfig { /// Enables or disables the coalescence of small batches into larger batches pub fn with_coalesce_batches(mut self, enabled: bool) -> Self { - self.options.execution.coalesce_batches = enabled; + self.options_mut().execution.coalesce_batches = enabled; self } @@ -406,7 +411,7 @@ impl SessionConfig { /// Enables or disables the round robin repartition for increasing parallelism pub fn with_round_robin_repartition(mut self, enabled: bool) -> Self { - self.options.optimizer.enable_round_robin_repartition = enabled; + self.options_mut().optimizer.enable_round_robin_repartition = enabled; self } @@ -424,7 +429,7 @@ impl SessionConfig { mut self, sort_spill_reservation_bytes: usize, ) -> Self { - self.options.execution.sort_spill_reservation_bytes = + self.options_mut().execution.sort_spill_reservation_bytes = sort_spill_reservation_bytes; self } @@ -433,7 +438,7 @@ impl SessionConfig { /// /// [`spill_compression`]: datafusion_common::config::ExecutionOptions::spill_compression pub fn with_spill_compression(mut self, spill_compression: SpillCompression) -> Self { - self.options.execution.spill_compression = spill_compression; + self.options_mut().execution.spill_compression = spill_compression; self } @@ -445,7 +450,7 @@ impl SessionConfig { mut self, sort_in_place_threshold_bytes: usize, ) -> Self { - self.options.execution.sort_in_place_threshold_bytes = + self.options_mut().execution.sort_in_place_threshold_bytes = sort_in_place_threshold_bytes; self } @@ -455,7 +460,8 @@ impl SessionConfig { mut self, enforce_batch_size_in_joins: bool, ) -> Self { - self.options.execution.enforce_batch_size_in_joins = enforce_batch_size_in_joins; + self.options_mut().execution.enforce_batch_size_in_joins = + enforce_batch_size_in_joins; self } @@ -593,6 +599,7 @@ impl SessionConfig { impl From for SessionConfig { fn from(options: ConfigOptions) -> Self { + let options = Arc::new(options); Self { options, ..Default::default() diff --git a/datafusion/expr/src/async_udf.rs b/datafusion/expr/src/async_udf.rs index 753ad7b778b9..a62d4d5341f0 100644 --- a/datafusion/expr/src/async_udf.rs +++ b/datafusion/expr/src/async_udf.rs @@ -22,7 +22,6 @@ use crate::{ use arrow::array::ArrayRef; use arrow::datatypes::{DataType, FieldRef}; use async_trait::async_trait; -use datafusion_common::config::ConfigOptions; use datafusion_common::error::Result; use datafusion_common::internal_err; use datafusion_expr_common::columnar_value::ColumnarValue; @@ -49,11 +48,7 @@ pub trait AsyncScalarUDFImpl: ScalarUDFImpl { } /// Invoke the function asynchronously with the async arguments - async fn invoke_async_with_args( - &self, - args: ScalarFunctionArgs, - option: &ConfigOptions, - ) -> Result; + async fn invoke_async_with_args(&self, args: ScalarFunctionArgs) -> Result; } /// A scalar UDF that must be invoked using async methods @@ -100,9 +95,8 @@ impl AsyncScalarUDF { pub async fn invoke_async_with_args( &self, args: ScalarFunctionArgs, - option: &ConfigOptions, ) -> Result { - self.inner.invoke_async_with_args(args, option).await + self.inner.invoke_async_with_args(args).await } } diff --git a/datafusion/expr/src/execution_props.rs b/datafusion/expr/src/execution_props.rs index d672bd1acc46..d8a8c6bb49e1 100644 --- a/datafusion/expr/src/execution_props.rs +++ b/datafusion/expr/src/execution_props.rs @@ -18,6 +18,7 @@ use crate::var_provider::{VarProvider, VarType}; use chrono::{DateTime, TimeZone, Utc}; use datafusion_common::alias::AliasGenerator; +use datafusion_common::config::ConfigOptions; use datafusion_common::HashMap; use std::sync::Arc; @@ -35,6 +36,8 @@ pub struct ExecutionProps { pub query_execution_start_time: DateTime, /// Alias generator used by subquery optimizer rules pub alias_generator: Arc, + /// Snapshot of config options when the query started + pub config_options: Option>, /// Providers for scalar variables pub var_providers: Option>>, } @@ -53,6 +56,7 @@ impl ExecutionProps { // not being updated / propagated correctly query_execution_start_time: Utc.timestamp_nanos(0), alias_generator: Arc::new(AliasGenerator::new()), + config_options: None, var_providers: None, } } @@ -66,11 +70,18 @@ impl ExecutionProps { self } + #[deprecated(since = "50.0.0", note = "Use mark_start_execution instead")] + pub fn start_execution(&mut self) -> &Self { + let default_config = Arc::new(ConfigOptions::default()); + self.mark_start_execution(default_config) + } + /// Marks the execution of query started timestamp. /// This also instantiates a new alias generator. - pub fn start_execution(&mut self) -> &Self { + pub fn mark_start_execution(&mut self, config_options: Arc) -> &Self { self.query_execution_start_time = Utc::now(); self.alias_generator = Arc::new(AliasGenerator::new()); + self.config_options = Some(config_options); &*self } @@ -99,6 +110,12 @@ impl ExecutionProps { .as_ref() .and_then(|var_providers| var_providers.get(&var_type).cloned()) } + + /// Returns the configuration properties for this execution + /// if the execution has started + pub fn config_options(&self) -> Option<&Arc> { + self.config_options.as_ref() + } } #[cfg(test)] @@ -107,6 +124,6 @@ mod test { #[test] fn debug() { let props = ExecutionProps::new(); - assert_eq!("ExecutionProps { query_execution_start_time: 1970-01-01T00:00:00Z, alias_generator: AliasGenerator { next_id: 1 }, var_providers: None }", format!("{props:?}")); + assert_eq!("ExecutionProps { query_execution_start_time: 1970-01-01T00:00:00Z, alias_generator: AliasGenerator { next_id: 1 }, config_options: None, var_providers: None }", format!("{props:?}")); } } diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index c3d09d920c33..40e0da2678eb 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -23,6 +23,7 @@ use crate::simplify::{ExprSimplifyResult, SimplifyInfo}; use crate::sort_properties::{ExprProperties, SortProperties}; use crate::{udf_equals_hash, ColumnarValue, Documentation, Expr, Signature}; use arrow::datatypes::{DataType, Field, FieldRef}; +use datafusion_common::config::ConfigOptions; use datafusion_common::{not_impl_err, ExprSchema, Result, ScalarValue}; use datafusion_expr_common::interval_arithmetic::Interval; use std::any::Any; @@ -311,6 +312,8 @@ pub struct ScalarFunctionArgs { /// or `return_field_from_args`) when creating the physical expression /// from the logical expression pub return_field: FieldRef, + /// The config options at execution time + pub config_options: Arc, } impl ScalarFunctionArgs { diff --git a/datafusion/ffi/src/session_config.rs b/datafusion/ffi/src/session_config.rs index aea03cf94e0a..a07b66c60196 100644 --- a/datafusion/ffi/src/session_config.rs +++ b/datafusion/ffi/src/session_config.rs @@ -15,17 +15,17 @@ // specific language governing permissions and limitations // under the License. -use std::{ - collections::HashMap, - ffi::{c_char, c_void, CString}, -}; - use abi_stable::{ std_types::{RHashMap, RString}, StableAbi, }; use datafusion::{config::ConfigOptions, error::Result}; use datafusion::{error::DataFusionError, prelude::SessionConfig}; +use std::sync::Arc; +use std::{ + collections::HashMap, + ffi::{c_char, c_void, CString}, +}; /// A stable struct for sharing [`SessionConfig`] across FFI boundaries. /// Instead of attempting to expose the entire SessionConfig interface, we @@ -85,11 +85,9 @@ unsafe extern "C" fn release_fn_wrapper(config: &mut FFI_SessionConfig) { unsafe extern "C" fn clone_fn_wrapper(config: &FFI_SessionConfig) -> FFI_SessionConfig { let old_private_data = config.private_data as *mut SessionConfigPrivateData; - let old_config = &(*old_private_data).config; + let old_config = Arc::clone(&(*old_private_data).config); - let private_data = Box::new(SessionConfigPrivateData { - config: old_config.clone(), - }); + let private_data = Box::new(SessionConfigPrivateData { config: old_config }); FFI_SessionConfig { config_options: config_options_fn_wrapper, @@ -100,7 +98,7 @@ unsafe extern "C" fn clone_fn_wrapper(config: &FFI_SessionConfig) -> FFI_Session } struct SessionConfigPrivateData { - pub config: ConfigOptions, + pub config: Arc, } impl From<&SessionConfig> for FFI_SessionConfig { @@ -120,7 +118,7 @@ impl From<&SessionConfig> for FFI_SessionConfig { } let private_data = Box::new(SessionConfigPrivateData { - config: session.options().clone(), + config: Arc::clone(session.options()), }); Self { diff --git a/datafusion/ffi/src/udf/mod.rs b/datafusion/ffi/src/udf/mod.rs index 1c835bd3ec90..4d634e0be258 100644 --- a/datafusion/ffi/src/udf/mod.rs +++ b/datafusion/ffi/src/udf/mod.rs @@ -32,6 +32,7 @@ use arrow::{ ffi::{from_ffi, to_ffi, FFI_ArrowSchema}, }; use arrow_schema::FieldRef; +use datafusion::config::ConfigOptions; use datafusion::logical_expr::{udf_equals_hash, ReturnFieldArgs}; use datafusion::{ error::DataFusionError, @@ -207,6 +208,8 @@ unsafe extern "C" fn invoke_with_args_fn_wrapper( arg_fields, number_rows, return_field, + // TODO: pass config options: https://github.com/apache/datafusion/issues/17035 + config_options: Arc::new(ConfigOptions::default()), }; let result = rresult_return!(udf @@ -378,6 +381,8 @@ impl ScalarUDFImpl for ForeignScalarUDF { arg_fields, number_rows, return_field, + // TODO: pass config options: https://github.com/apache/datafusion/issues/17035 + config_options: _config_options, } = invoke_args; let args = args diff --git a/datafusion/functions-nested/benches/map.rs b/datafusion/functions-nested/benches/map.rs index 55dd7ad14460..ca12dde1f5c3 100644 --- a/datafusion/functions-nested/benches/map.rs +++ b/datafusion/functions-nested/benches/map.rs @@ -21,16 +21,16 @@ use arrow::array::{Int32Array, ListArray, StringArray}; use arrow::buffer::{OffsetBuffer, ScalarBuffer}; use arrow::datatypes::{DataType, Field}; use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use rand::prelude::ThreadRng; -use rand::Rng; -use std::collections::HashSet; -use std::sync::Arc; - +use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_expr::planner::ExprPlanner; use datafusion_expr::{ColumnarValue, Expr, ScalarFunctionArgs}; use datafusion_functions_nested::map::map_udf; use datafusion_functions_nested::planner::NestedFunctionPlanner; +use rand::prelude::ThreadRng; +use rand::Rng; +use std::collections::HashSet; +use std::sync::Arc; fn keys(rng: &mut ThreadRng) -> Vec { let mut keys = HashSet::with_capacity(1000); @@ -105,6 +105,7 @@ fn criterion_benchmark(c: &mut Criterion) { Field::new("a", values.data_type(), true).into(), ]; let return_field = Field::new("f", return_type, true).into(); + let config_options = Arc::new(ConfigOptions::default()); b.iter(|| { black_box( @@ -114,6 +115,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: 1, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .expect("map should work on valid values"), ); diff --git a/datafusion/functions/benches/ascii.rs b/datafusion/functions/benches/ascii.rs index 1c7023f4497e..55471817d277 100644 --- a/datafusion/functions/benches/ascii.rs +++ b/datafusion/functions/benches/ascii.rs @@ -20,6 +20,7 @@ mod helper; use arrow::datatypes::{DataType, Field}; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::ScalarFunctionArgs; use helper::gen_string_array; use std::sync::Arc; @@ -46,6 +47,7 @@ fn criterion_benchmark(c: &mut Criterion) { let arg_fields = vec![Field::new("a", args_string_ascii[0].data_type(), true).into()]; let return_field = Field::new("f", DataType::Utf8, true).into(); + let config_options = Arc::new(ConfigOptions::default()); c.bench_function( format!("ascii/string_ascii_only (null_density={null_density})").as_str(), @@ -56,6 +58,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: N_ROWS, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), })) }) }, @@ -76,6 +79,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: N_ROWS, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), })) }) }, @@ -102,6 +106,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: N_ROWS, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), })) }) }, @@ -122,6 +127,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: N_ROWS, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), })) }) }, diff --git a/datafusion/functions/benches/character_length.rs b/datafusion/functions/benches/character_length.rs index b4a9e917f416..edb61c013e24 100644 --- a/datafusion/functions/benches/character_length.rs +++ b/datafusion/functions/benches/character_length.rs @@ -19,6 +19,7 @@ extern crate criterion; use arrow::datatypes::{DataType, Field}; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::ScalarFunctionArgs; use helper::gen_string_array; use std::sync::Arc; @@ -30,6 +31,7 @@ fn criterion_benchmark(c: &mut Criterion) { let character_length = datafusion_functions::unicode::character_length(); let return_field = Arc::new(Field::new("f", DataType::Utf8, true)); + let config_options = Arc::new(ConfigOptions::default()); let n_rows = 8192; for str_len in [8, 32, 128, 4096] { @@ -51,6 +53,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: n_rows, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), })) }) }, @@ -74,6 +77,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: n_rows, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), })) }) }, @@ -97,6 +101,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: n_rows, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), })) }) }, @@ -120,6 +125,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: n_rows, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), })) }) }, diff --git a/datafusion/functions/benches/chr.rs b/datafusion/functions/benches/chr.rs index 6a956bb78812..ec3f188f9084 100644 --- a/datafusion/functions/benches/chr.rs +++ b/datafusion/functions/benches/chr.rs @@ -24,6 +24,7 @@ use datafusion_functions::string::chr; use rand::{Rng, SeedableRng}; use arrow::datatypes::{DataType, Field}; +use datafusion_common::config::ConfigOptions; use rand::rngs::StdRng; use std::sync::Arc; @@ -55,6 +56,7 @@ fn criterion_benchmark(c: &mut Criterion) { .enumerate() .map(|(idx, arg)| Field::new(format!("arg_{idx}"), arg.data_type(), true).into()) .collect::>(); + let config_options = Arc::new(ConfigOptions::default()); c.bench_function("chr", |b| { b.iter(|| { @@ -65,6 +67,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: size, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), }) .unwrap(), ) diff --git a/datafusion/functions/benches/concat.rs b/datafusion/functions/benches/concat.rs index d350c03c497b..15f9ffbd7802 100644 --- a/datafusion/functions/benches/concat.rs +++ b/datafusion/functions/benches/concat.rs @@ -19,6 +19,7 @@ use arrow::array::ArrayRef; use arrow::datatypes::{DataType, Field}; use arrow::util::bench_util::create_string_array_with_len; use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::string::concat; @@ -44,6 +45,7 @@ fn criterion_benchmark(c: &mut Criterion) { Field::new(format!("arg_{idx}"), arg.data_type(), true).into() }) .collect::>(); + let config_options = Arc::new(ConfigOptions::default()); let mut group = c.benchmark_group("concat function"); group.bench_function(BenchmarkId::new("concat", size), |b| { @@ -56,6 +58,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: size, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), }) .unwrap(), ) diff --git a/datafusion/functions/benches/cot.rs b/datafusion/functions/benches/cot.rs index a32e0d834672..937d092cc028 100644 --- a/datafusion/functions/benches/cot.rs +++ b/datafusion/functions/benches/cot.rs @@ -26,6 +26,7 @@ use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::math::cot; use arrow::datatypes::{DataType, Field}; +use datafusion_common::config::ConfigOptions; use std::sync::Arc; fn criterion_benchmark(c: &mut Criterion) { @@ -40,6 +41,7 @@ fn criterion_benchmark(c: &mut Criterion) { Field::new(format!("arg_{idx}"), arg.data_type(), true).into() }) .collect::>(); + let config_options = Arc::new(ConfigOptions::default()); c.bench_function(&format!("cot f32 array: {size}"), |b| { b.iter(|| { @@ -50,6 +52,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: size, return_field: Field::new("f", DataType::Float32, true).into(), + config_options: Arc::clone(&config_options), }) .unwrap(), ) @@ -75,6 +78,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: size, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .unwrap(), ) diff --git a/datafusion/functions/benches/date_bin.rs b/datafusion/functions/benches/date_bin.rs index ac766a002576..ea8705984f38 100644 --- a/datafusion/functions/benches/date_bin.rs +++ b/datafusion/functions/benches/date_bin.rs @@ -22,12 +22,12 @@ use std::sync::Arc; use arrow::array::{Array, ArrayRef, TimestampSecondArray}; use arrow::datatypes::Field; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; -use rand::rngs::ThreadRng; -use rand::Rng; - use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::datetime::date_bin; +use rand::rngs::ThreadRng; +use rand::Rng; fn timestamps(rng: &mut ThreadRng) -> TimestampSecondArray { let mut seconds = vec![]; @@ -55,6 +55,8 @@ fn criterion_benchmark(c: &mut Criterion) { Field::new("a", interval.data_type(), true).into(), Field::new("b", timestamps.data_type(), true).into(), ]; + let config_options = Arc::new(ConfigOptions::default()); + b.iter(|| { black_box( udf.invoke_with_args(ScalarFunctionArgs { @@ -62,6 +64,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: batch_len, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .expect("date_bin should work on valid values"), ) diff --git a/datafusion/functions/benches/date_trunc.rs b/datafusion/functions/benches/date_trunc.rs index ad4d0d0fbb79..70d372429b2d 100644 --- a/datafusion/functions/benches/date_trunc.rs +++ b/datafusion/functions/benches/date_trunc.rs @@ -22,12 +22,12 @@ use std::sync::Arc; use arrow::array::{Array, ArrayRef, TimestampSecondArray}; use arrow::datatypes::Field; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; -use rand::rngs::ThreadRng; -use rand::Rng; - use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::datetime::date_trunc; +use rand::rngs::ThreadRng; +use rand::Rng; fn timestamps(rng: &mut ThreadRng) -> TimestampSecondArray { let mut seconds = vec![]; @@ -60,6 +60,8 @@ fn criterion_benchmark(c: &mut Criterion) { .return_type(&args.iter().map(|arg| arg.data_type()).collect::>()) .unwrap(); let return_field = Arc::new(Field::new("f", return_type, true)); + let config_options = Arc::new(ConfigOptions::default()); + b.iter(|| { black_box( udf.invoke_with_args(ScalarFunctionArgs { @@ -67,6 +69,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: batch_len, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .expect("date_trunc should work on valid values"), ) diff --git a/datafusion/functions/benches/encoding.rs b/datafusion/functions/benches/encoding.rs index 830e0324766f..dc2529cd9fd7 100644 --- a/datafusion/functions/benches/encoding.rs +++ b/datafusion/functions/benches/encoding.rs @@ -21,12 +21,15 @@ use arrow::array::Array; use arrow::datatypes::{DataType, Field}; use arrow::util::bench_util::create_string_array_with_len; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::encoding; use std::sync::Arc; fn criterion_benchmark(c: &mut Criterion) { let decode = encoding::decode(); + let config_options = Arc::new(ConfigOptions::default()); + for size in [1024, 4096, 8192] { let str_array = Arc::new(create_string_array_with_len::(size, 0.2, 32)); c.bench_function(&format!("base64_decode/{size}"), |b| { @@ -40,6 +43,7 @@ fn criterion_benchmark(c: &mut Criterion) { ], number_rows: size, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), }) .unwrap(); @@ -57,6 +61,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: size, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), }) .unwrap(), ) @@ -75,6 +80,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields, number_rows: size, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), }) .unwrap(); @@ -93,6 +99,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: size, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .unwrap(), ) diff --git a/datafusion/functions/benches/find_in_set.rs b/datafusion/functions/benches/find_in_set.rs index bad540f049e2..df7d7cc09dd2 100644 --- a/datafusion/functions/benches/find_in_set.rs +++ b/datafusion/functions/benches/find_in_set.rs @@ -23,6 +23,7 @@ use arrow::util::bench_util::{ create_string_array_with_len, create_string_view_array_with_len, }; use criterion::{black_box, criterion_group, criterion_main, Criterion, SamplingMode}; +use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use rand::distr::Alphanumeric; @@ -165,6 +166,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: n_rows, return_field: Arc::clone(&return_field), + config_options: Arc::new(ConfigOptions::default()), })) }) }); @@ -182,6 +184,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: n_rows, return_field: Arc::clone(&return_field), + config_options: Arc::new(ConfigOptions::default()), })) }) }); @@ -203,6 +206,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: n_rows, return_field: Arc::clone(&return_field), + config_options: Arc::new(ConfigOptions::default()), })) }) }); @@ -213,6 +217,8 @@ fn criterion_benchmark(c: &mut Criterion) { .map(|arg| Field::new("a", arg.data_type().clone(), true).into()) .collect::>(); let return_field = Arc::new(Field::new("f", DataType::Int32, true)); + let config_options = Arc::new(ConfigOptions::default()); + group.bench_function(format!("string_view_len_{str_len}"), |b| { b.iter(|| { black_box(find_in_set.invoke_with_args(ScalarFunctionArgs { @@ -220,6 +226,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: n_rows, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), })) }) }); diff --git a/datafusion/functions/benches/gcd.rs b/datafusion/functions/benches/gcd.rs index f700d31123a9..913ed523543e 100644 --- a/datafusion/functions/benches/gcd.rs +++ b/datafusion/functions/benches/gcd.rs @@ -23,6 +23,7 @@ use arrow::{ datatypes::DataType, }; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::math::gcd; @@ -42,6 +43,7 @@ fn criterion_benchmark(c: &mut Criterion) { let array_a = ColumnarValue::Array(generate_i64_array(n_rows)); let array_b = ColumnarValue::Array(generate_i64_array(n_rows)); let udf = gcd(); + let config_options = Arc::new(ConfigOptions::default()); c.bench_function("gcd both array", |b| { b.iter(|| { @@ -54,6 +56,7 @@ fn criterion_benchmark(c: &mut Criterion) { ], number_rows: 0, return_field: Field::new("f", DataType::Int64, true).into(), + config_options: Arc::clone(&config_options), }) .expect("date_bin should work on valid values"), ) @@ -74,6 +77,7 @@ fn criterion_benchmark(c: &mut Criterion) { ], number_rows: 0, return_field: Field::new("f", DataType::Int64, true).into(), + config_options: Arc::clone(&config_options), }) .expect("date_bin should work on valid values"), ) @@ -94,6 +98,7 @@ fn criterion_benchmark(c: &mut Criterion) { ], number_rows: 0, return_field: Field::new("f", DataType::Int64, true).into(), + config_options: Arc::clone(&config_options), }) .expect("date_bin should work on valid values"), ) diff --git a/datafusion/functions/benches/initcap.rs b/datafusion/functions/benches/initcap.rs index f89b11dff8fb..7562e990ca16 100644 --- a/datafusion/functions/benches/initcap.rs +++ b/datafusion/functions/benches/initcap.rs @@ -23,6 +23,7 @@ use arrow::util::bench_util::{ create_string_array_with_len, create_string_view_array_with_len, }; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::unicode; use std::sync::Arc; @@ -56,6 +57,7 @@ fn criterion_benchmark(c: &mut Criterion) { Field::new(format!("arg_{idx}"), arg.data_type(), true).into() }) .collect::>(); + let config_options = Arc::new(ConfigOptions::default()); c.bench_function( format!("initcap string view shorter than 12 [size={size}]").as_str(), @@ -66,6 +68,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: size, return_field: Field::new("f", DataType::Utf8View, true).into(), + config_options: Arc::clone(&config_options), })) }) }, @@ -81,6 +84,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: size, return_field: Field::new("f", DataType::Utf8View, true).into(), + config_options: Arc::clone(&config_options), })) }) }, @@ -94,6 +98,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: size, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), })) }) }); diff --git a/datafusion/functions/benches/isnan.rs b/datafusion/functions/benches/isnan.rs index 49d0a9e326dd..f59c7af939ab 100644 --- a/datafusion/functions/benches/isnan.rs +++ b/datafusion/functions/benches/isnan.rs @@ -23,6 +23,7 @@ use arrow::{ util::bench_util::create_primitive_array, }; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::math::isnan; use std::sync::Arc; @@ -39,6 +40,7 @@ fn criterion_benchmark(c: &mut Criterion) { Field::new(format!("arg_{idx}"), arg.data_type(), true).into() }) .collect::>(); + let config_options = Arc::new(ConfigOptions::default()); c.bench_function(&format!("isnan f32 array: {size}"), |b| { b.iter(|| { @@ -49,6 +51,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: size, return_field: Field::new("f", DataType::Boolean, true).into(), + config_options: Arc::clone(&config_options), }) .unwrap(), ) @@ -72,6 +75,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: size, return_field: Field::new("f", DataType::Boolean, true).into(), + config_options: Arc::clone(&config_options), }) .unwrap(), ) diff --git a/datafusion/functions/benches/iszero.rs b/datafusion/functions/benches/iszero.rs index 6d1d34c7a832..9752a9364b9f 100644 --- a/datafusion/functions/benches/iszero.rs +++ b/datafusion/functions/benches/iszero.rs @@ -23,6 +23,7 @@ use arrow::{ util::bench_util::create_primitive_array, }; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::math::iszero; use std::sync::Arc; @@ -41,6 +42,7 @@ fn criterion_benchmark(c: &mut Criterion) { }) .collect::>(); let return_field = Arc::new(Field::new("f", DataType::Boolean, true)); + let config_options = Arc::new(ConfigOptions::default()); c.bench_function(&format!("iszero f32 array: {size}"), |b| { b.iter(|| { @@ -51,6 +53,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: batch_len, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .unwrap(), ) @@ -77,6 +80,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: batch_len, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .unwrap(), ) diff --git a/datafusion/functions/benches/lower.rs b/datafusion/functions/benches/lower.rs index cdf1529c108c..83d437c6caa6 100644 --- a/datafusion/functions/benches/lower.rs +++ b/datafusion/functions/benches/lower.rs @@ -23,6 +23,7 @@ use arrow::util::bench_util::{ create_string_array_with_len, create_string_view_array_with_len, }; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::string; use std::sync::Arc; @@ -122,6 +123,8 @@ fn create_args5( fn criterion_benchmark(c: &mut Criterion) { let lower = string::lower(); + let config_options = Arc::new(ConfigOptions::default()); + for size in [1024, 4096, 8192] { let args = create_args1(size, 32); let arg_fields = args @@ -140,6 +143,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: size, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), })) }) }); @@ -161,6 +165,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: size, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), })) }) }); @@ -184,6 +189,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: size, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), })) }) }, @@ -217,6 +223,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: size, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), })) }), ); @@ -231,6 +238,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: size, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), })) }), ); @@ -246,6 +254,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: size, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), })) }), ); diff --git a/datafusion/functions/benches/ltrim.rs b/datafusion/functions/benches/ltrim.rs index 7a44f40a689a..2712223506b9 100644 --- a/datafusion/functions/benches/ltrim.rs +++ b/datafusion/functions/benches/ltrim.rs @@ -23,6 +23,7 @@ use criterion::{ black_box, criterion_group, criterion_main, measurement::Measurement, BenchmarkGroup, Criterion, SamplingMode, }; +use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDF}; use datafusion_functions::string; @@ -137,6 +138,8 @@ fn run_with_string_type( .enumerate() .map(|(idx, arg)| Field::new(format!("arg_{idx}"), arg.data_type(), true).into()) .collect::>(); + let config_options = Arc::new(ConfigOptions::default()); + group.bench_function( format!( "{string_type} [size={size}, len_before={len}, len_after={remaining_len}]", @@ -149,6 +152,7 @@ fn run_with_string_type( arg_fields: arg_fields.clone(), number_rows: size, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), })) }) }, diff --git a/datafusion/functions/benches/make_date.rs b/datafusion/functions/benches/make_date.rs index e1f609fbb35c..f0494a9d3b4e 100644 --- a/datafusion/functions/benches/make_date.rs +++ b/datafusion/functions/benches/make_date.rs @@ -22,12 +22,12 @@ use std::sync::Arc; use arrow::array::{Array, ArrayRef, Int32Array}; use arrow::datatypes::{DataType, Field}; use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use rand::rngs::ThreadRng; -use rand::Rng; - +use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::datetime::make_date; +use rand::rngs::ThreadRng; +use rand::Rng; fn years(rng: &mut ThreadRng) -> Int32Array { let mut years = vec![]; @@ -69,6 +69,7 @@ fn criterion_benchmark(c: &mut Criterion) { Field::new("a", days.data_type(), true).into(), ]; let return_field = Field::new("f", DataType::Date32, true).into(); + let config_options = Arc::new(ConfigOptions::default()); b.iter(|| { black_box( @@ -78,6 +79,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: batch_len, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .expect("make_date should work on valid values"), ) @@ -97,6 +99,8 @@ fn criterion_benchmark(c: &mut Criterion) { Field::new("a", days.data_type(), true).into(), ]; let return_field = Field::new("f", DataType::Date32, true).into(); + let config_options = Arc::new(ConfigOptions::default()); + b.iter(|| { black_box( make_date() @@ -105,6 +109,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: batch_len, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .expect("make_date should work on valid values"), ) @@ -124,6 +129,8 @@ fn criterion_benchmark(c: &mut Criterion) { Field::new("a", days.data_type(), true).into(), ]; let return_field = Field::new("f", DataType::Date32, true).into(); + let config_options = Arc::new(ConfigOptions::default()); + b.iter(|| { black_box( make_date() @@ -132,6 +139,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: batch_len, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .expect("make_date should work on valid values"), ) @@ -148,6 +156,7 @@ fn criterion_benchmark(c: &mut Criterion) { Field::new("a", day.data_type(), true).into(), ]; let return_field = Field::new("f", DataType::Date32, true).into(); + let config_options = Arc::new(ConfigOptions::default()); b.iter(|| { black_box( @@ -157,6 +166,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: 1, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .expect("make_date should work on valid values"), ) diff --git a/datafusion/functions/benches/nullif.rs b/datafusion/functions/benches/nullif.rs index 4ac977af9d42..93ec687c4d0e 100644 --- a/datafusion/functions/benches/nullif.rs +++ b/datafusion/functions/benches/nullif.rs @@ -20,6 +20,7 @@ extern crate criterion; use arrow::datatypes::{DataType, Field}; use arrow::util::bench_util::create_string_array_with_len; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::core::nullif; @@ -40,6 +41,7 @@ fn criterion_benchmark(c: &mut Criterion) { Field::new(format!("arg_{idx}"), arg.data_type(), true).into() }) .collect::>(); + let config_options = Arc::new(ConfigOptions::default()); c.bench_function(&format!("nullif scalar array: {size}"), |b| { b.iter(|| { @@ -50,6 +52,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: size, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), }) .unwrap(), ) diff --git a/datafusion/functions/benches/pad.rs b/datafusion/functions/benches/pad.rs index d954ff452ed5..125559269a4f 100644 --- a/datafusion/functions/benches/pad.rs +++ b/datafusion/functions/benches/pad.rs @@ -21,6 +21,7 @@ use arrow::util::bench_util::{ create_string_array_with_len, create_string_view_array_with_len, }; use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_common::DataFusionError; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::unicode::{lpad, rpad}; @@ -106,12 +107,14 @@ fn invoke_pad_with_args( .enumerate() .map(|(idx, arg)| Field::new(format!("arg_{idx}"), arg.data_type(), true).into()) .collect::>(); + let config_options = Arc::new(ConfigOptions::default()); let scalar_args = ScalarFunctionArgs { args: args.clone(), arg_fields, number_rows, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), }; if left_pad { diff --git a/datafusion/functions/benches/random.rs b/datafusion/functions/benches/random.rs index dc1e280b93b1..ac92aed586ba 100644 --- a/datafusion/functions/benches/random.rs +++ b/datafusion/functions/benches/random.rs @@ -19,14 +19,16 @@ extern crate criterion; use arrow::datatypes::{DataType, Field}; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl}; use datafusion_functions::math::random::RandomFunc; use std::sync::Arc; fn criterion_benchmark(c: &mut Criterion) { let random_func = RandomFunc::new(); - let return_field = Field::new("f", DataType::Float64, true).into(); + let config_options = Arc::new(ConfigOptions::default()); + // Benchmark to evaluate 1M rows in batch size 8192 let iterations = 1_000_000 / 8192; // Calculate how many iterations are needed to reach approximately 1M rows c.bench_function("random_1M_rows_batch_8192", |b| { @@ -39,6 +41,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: vec![], number_rows: 8192, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .unwrap(), ); @@ -59,6 +62,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: vec![], number_rows: 128, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .unwrap(), ); diff --git a/datafusion/functions/benches/repeat.rs b/datafusion/functions/benches/repeat.rs index 175933f5f745..991a5a467c0e 100644 --- a/datafusion/functions/benches/repeat.rs +++ b/datafusion/functions/benches/repeat.rs @@ -23,6 +23,7 @@ use arrow::util::bench_util::{ create_string_array_with_len, create_string_view_array_with_len, }; use criterion::{black_box, criterion_group, criterion_main, Criterion, SamplingMode}; +use datafusion_common::config::ConfigOptions; use datafusion_common::DataFusionError; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::string; @@ -66,12 +67,14 @@ fn invoke_repeat_with_args( .enumerate() .map(|(idx, arg)| Field::new(format!("arg_{idx}"), arg.data_type(), true).into()) .collect::>(); + let config_options = Arc::new(ConfigOptions::default()); string::repeat().invoke_with_args(ScalarFunctionArgs { args, arg_fields, number_rows: repeat_times as usize, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), }) } diff --git a/datafusion/functions/benches/reverse.rs b/datafusion/functions/benches/reverse.rs index 640366011305..acac674a6de0 100644 --- a/datafusion/functions/benches/reverse.rs +++ b/datafusion/functions/benches/reverse.rs @@ -20,12 +20,15 @@ mod helper; use arrow::datatypes::{DataType, Field}; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::ScalarFunctionArgs; use helper::gen_string_array; +use std::sync::Arc; fn criterion_benchmark(c: &mut Criterion) { // All benches are single batch run with 8192 rows let reverse = datafusion_functions::unicode::reverse(); + let config_options = Arc::new(ConfigOptions::default()); const N_ROWS: usize = 8192; const NULL_DENSITY: f32 = 0.1; @@ -53,6 +56,7 @@ fn criterion_benchmark(c: &mut Criterion) { ).into()], number_rows: N_ROWS, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), })) }) }, @@ -74,6 +78,7 @@ fn criterion_benchmark(c: &mut Criterion) { ], number_rows: N_ROWS, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), })) }) }, @@ -100,6 +105,7 @@ fn criterion_benchmark(c: &mut Criterion) { ).into()], number_rows: N_ROWS, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), })) }) }, @@ -123,6 +129,7 @@ fn criterion_benchmark(c: &mut Criterion) { ).into()], number_rows: N_ROWS, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), })) }) }, diff --git a/datafusion/functions/benches/signum.rs b/datafusion/functions/benches/signum.rs index 10079bcc81c7..d56f3930d267 100644 --- a/datafusion/functions/benches/signum.rs +++ b/datafusion/functions/benches/signum.rs @@ -23,6 +23,7 @@ use arrow::{ util::bench_util::create_primitive_array, }; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::math::signum; use std::sync::Arc; @@ -41,6 +42,7 @@ fn criterion_benchmark(c: &mut Criterion) { }) .collect::>(); let return_field = Field::new("f", DataType::Float32, true).into(); + let config_options = Arc::new(ConfigOptions::default()); c.bench_function(&format!("signum f32 array: {size}"), |b| { b.iter(|| { @@ -51,6 +53,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: batch_len, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .unwrap(), ) @@ -78,6 +81,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: batch_len, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .unwrap(), ) diff --git a/datafusion/functions/benches/strpos.rs b/datafusion/functions/benches/strpos.rs index df32db1182f1..fc31abb23d84 100644 --- a/datafusion/functions/benches/strpos.rs +++ b/datafusion/functions/benches/strpos.rs @@ -20,6 +20,7 @@ extern crate criterion; use arrow::array::{StringArray, StringViewArray}; use arrow::datatypes::{DataType, Field}; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use rand::distr::Alphanumeric; use rand::prelude::StdRng; @@ -114,6 +115,8 @@ fn criterion_benchmark(c: &mut Criterion) { let arg_fields = vec![Field::new("a", args_string_ascii[0].data_type(), true).into()]; let return_field = Field::new("f", DataType::Int32, true).into(); + let config_options = Arc::new(ConfigOptions::default()); + c.bench_function( &format!("strpos_StringArray_ascii_str_len_{str_len}"), |b| { @@ -123,6 +126,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: n_rows, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), })) }) }, @@ -140,6 +144,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: n_rows, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), })) }) }); @@ -158,6 +163,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: n_rows, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), })) }) }, @@ -177,6 +183,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: n_rows, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), })) }) }, diff --git a/datafusion/functions/benches/substr.rs b/datafusion/functions/benches/substr.rs index 342e18b0d9a2..f14f10894649 100644 --- a/datafusion/functions/benches/substr.rs +++ b/datafusion/functions/benches/substr.rs @@ -23,6 +23,7 @@ use arrow::util::bench_util::{ create_string_array_with_len, create_string_view_array_with_len, }; use criterion::{black_box, criterion_group, criterion_main, Criterion, SamplingMode}; +use datafusion_common::config::ConfigOptions; use datafusion_common::DataFusionError; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::unicode; @@ -106,12 +107,14 @@ fn invoke_substr_with_args( .enumerate() .map(|(idx, arg)| Field::new(format!("arg_{idx}"), arg.data_type(), true).into()) .collect::>(); + let config_options = Arc::new(ConfigOptions::default()); unicode::substr().invoke_with_args(ScalarFunctionArgs { args: args.clone(), arg_fields, number_rows, return_field: Field::new("f", DataType::Utf8View, true).into(), + config_options: Arc::clone(&config_options), }) } diff --git a/datafusion/functions/benches/substr_index.rs b/datafusion/functions/benches/substr_index.rs index e772fb38fc40..2cc381e4545e 100644 --- a/datafusion/functions/benches/substr_index.rs +++ b/datafusion/functions/benches/substr_index.rs @@ -22,13 +22,13 @@ use std::sync::Arc; use arrow::array::{ArrayRef, Int64Array, StringArray}; use arrow::datatypes::{DataType, Field}; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; +use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; +use datafusion_functions::unicode::substr_index; use rand::distr::{Alphanumeric, Uniform}; use rand::prelude::Distribution; use rand::Rng; -use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; -use datafusion_functions::unicode::substr_index; - struct Filter { dist: Dist, test: Test, @@ -98,6 +98,7 @@ fn criterion_benchmark(c: &mut Criterion) { Field::new(format!("arg_{idx}"), arg.data_type(), true).into() }) .collect::>(); + let config_options = Arc::new(ConfigOptions::default()); b.iter(|| { black_box( @@ -107,6 +108,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: batch_len, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), }) .expect("substr_index should work on valid values"), ) diff --git a/datafusion/functions/benches/to_char.rs b/datafusion/functions/benches/to_char.rs index d19714ce6166..0f4cc264cbe0 100644 --- a/datafusion/functions/benches/to_char.rs +++ b/datafusion/functions/benches/to_char.rs @@ -24,14 +24,14 @@ use arrow::datatypes::{DataType, Field}; use chrono::prelude::*; use chrono::TimeDelta; use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use rand::prelude::IndexedRandom; -use rand::rngs::ThreadRng; -use rand::Rng; - +use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_common::ScalarValue::TimestampNanosecond; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::datetime::to_char; +use rand::prelude::IndexedRandom; +use rand::rngs::ThreadRng; +use rand::Rng; fn random_date_in_range( rng: &mut ThreadRng, @@ -81,6 +81,8 @@ fn patterns(rng: &mut ThreadRng) -> StringArray { } fn criterion_benchmark(c: &mut Criterion) { + let config_options = Arc::new(ConfigOptions::default()); + c.bench_function("to_char_array_array_1000", |b| { let mut rng = rand::rng(); let data_arr = data(&mut rng); @@ -99,6 +101,7 @@ fn criterion_benchmark(c: &mut Criterion) { ], number_rows: batch_len, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), }) .expect("to_char should work on valid values"), ) @@ -124,6 +127,7 @@ fn criterion_benchmark(c: &mut Criterion) { ], number_rows: batch_len, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), }) .expect("to_char should work on valid values"), ) @@ -155,6 +159,7 @@ fn criterion_benchmark(c: &mut Criterion) { ], number_rows: 1, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), }) .expect("to_char should work on valid values"), ) diff --git a/datafusion/functions/benches/to_hex.rs b/datafusion/functions/benches/to_hex.rs index 4a02b74ca42d..cad9addab10e 100644 --- a/datafusion/functions/benches/to_hex.rs +++ b/datafusion/functions/benches/to_hex.rs @@ -20,6 +20,7 @@ extern crate criterion; use arrow::datatypes::{DataType, Field, Int32Type, Int64Type}; use arrow::util::bench_util::create_primitive_array; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::string; use std::sync::Arc; @@ -30,6 +31,8 @@ fn criterion_benchmark(c: &mut Criterion) { let i32_array = Arc::new(create_primitive_array::(size, 0.2)); let batch_len = i32_array.len(); let i32_args = vec![ColumnarValue::Array(i32_array)]; + let config_options = Arc::new(ConfigOptions::default()); + c.bench_function(&format!("to_hex i32 array: {size}"), |b| { b.iter(|| { let args_cloned = i32_args.clone(); @@ -39,6 +42,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: vec![Field::new("a", DataType::Int32, false).into()], number_rows: batch_len, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), }) .unwrap(), ) @@ -56,6 +60,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: vec![Field::new("a", DataType::Int64, false).into()], number_rows: batch_len, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), }) .unwrap(), ) diff --git a/datafusion/functions/benches/to_timestamp.rs b/datafusion/functions/benches/to_timestamp.rs index d89811348489..7e15d896f83e 100644 --- a/datafusion/functions/benches/to_timestamp.rs +++ b/datafusion/functions/benches/to_timestamp.rs @@ -24,7 +24,7 @@ use arrow::array::{Array, ArrayRef, StringArray}; use arrow::compute::cast; use arrow::datatypes::{DataType, Field, TimeUnit}; use criterion::{black_box, criterion_group, criterion_main, Criterion}; - +use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::datetime::to_timestamp; @@ -113,6 +113,8 @@ fn criterion_benchmark(c: &mut Criterion) { Field::new("f", DataType::Timestamp(TimeUnit::Nanosecond, None), true).into(); let arg_field = Field::new("a", DataType::Utf8, false).into(); let arg_fields = vec![arg_field]; + let config_options = Arc::new(ConfigOptions::default()); + c.bench_function("to_timestamp_no_formats_utf8", |b| { let arr_data = data(); let batch_len = arr_data.len(); @@ -126,6 +128,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: batch_len, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .expect("to_timestamp should work on valid values"), ) @@ -145,6 +148,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: batch_len, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .expect("to_timestamp should work on valid values"), ) @@ -164,6 +168,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: batch_len, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .expect("to_timestamp should work on valid values"), ) @@ -196,6 +201,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: batch_len, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .expect("to_timestamp should work on valid values"), ) @@ -236,6 +242,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: batch_len, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .expect("to_timestamp should work on valid values"), ) @@ -277,6 +284,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: batch_len, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .expect("to_timestamp should work on valid values"), ) diff --git a/datafusion/functions/benches/trunc.rs b/datafusion/functions/benches/trunc.rs index 897e21c1e1d9..160eac913d2b 100644 --- a/datafusion/functions/benches/trunc.rs +++ b/datafusion/functions/benches/trunc.rs @@ -26,6 +26,7 @@ use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::math::trunc; use arrow::datatypes::DataType; +use datafusion_common::config::ConfigOptions; use std::sync::Arc; fn criterion_benchmark(c: &mut Criterion) { @@ -35,6 +36,8 @@ fn criterion_benchmark(c: &mut Criterion) { let f32_args = vec![ColumnarValue::Array(f32_array)]; let arg_fields = vec![Field::new("a", DataType::Float32, false).into()]; let return_field = Field::new("f", DataType::Float32, true).into(); + let config_options = Arc::new(ConfigOptions::default()); + c.bench_function(&format!("trunc f32 array: {size}"), |b| { b.iter(|| { black_box( @@ -44,6 +47,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: size, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .unwrap(), ) @@ -62,6 +66,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: size, return_field: Arc::clone(&return_field), + config_options: Arc::clone(&config_options), }) .unwrap(), ) diff --git a/datafusion/functions/benches/upper.rs b/datafusion/functions/benches/upper.rs index bf2c4161001e..700f70b4b4f3 100644 --- a/datafusion/functions/benches/upper.rs +++ b/datafusion/functions/benches/upper.rs @@ -20,6 +20,7 @@ extern crate criterion; use arrow::datatypes::{DataType, Field}; use arrow::util::bench_util::create_string_array_with_len; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::string; use std::sync::Arc; @@ -35,6 +36,8 @@ fn create_args(size: usize, str_len: usize) -> Vec { fn criterion_benchmark(c: &mut Criterion) { let upper = string::upper(); + let config_options = Arc::new(ConfigOptions::default()); + for size in [1024, 4096, 8192] { let args = create_args(size, 32); c.bench_function("upper_all_values_are_ascii", |b| { @@ -45,6 +48,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: vec![Field::new("a", DataType::Utf8, true).into()], number_rows: size, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), })) }) }); diff --git a/datafusion/functions/benches/uuid.rs b/datafusion/functions/benches/uuid.rs index 942af122562a..f9345a97eb53 100644 --- a/datafusion/functions/benches/uuid.rs +++ b/datafusion/functions/benches/uuid.rs @@ -19,11 +19,15 @@ extern crate criterion; use arrow::datatypes::{DataType, Field}; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::ScalarFunctionArgs; use datafusion_functions::string; +use std::sync::Arc; fn criterion_benchmark(c: &mut Criterion) { let uuid = string::uuid(); + let config_options = Arc::new(ConfigOptions::default()); + c.bench_function("uuid", |b| { b.iter(|| { black_box(uuid.invoke_with_args(ScalarFunctionArgs { @@ -31,6 +35,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: vec![], number_rows: 1024, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::clone(&config_options), })) }) }); diff --git a/datafusion/functions/src/core/union_extract.rs b/datafusion/functions/src/core/union_extract.rs index be49f8226712..a3d1ec82ffbb 100644 --- a/datafusion/functions/src/core/union_extract.rs +++ b/datafusion/functions/src/core/union_extract.rs @@ -169,10 +169,11 @@ fn find_field<'a>(fields: &'a UnionFields, name: &str) -> Result<(i8, &'a FieldR #[cfg(test)] mod tests { - use arrow::datatypes::{DataType, Field, UnionFields, UnionMode}; + use datafusion_common::config::ConfigOptions; use datafusion_common::{Result, ScalarValue}; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl}; + use std::sync::Arc; use super::UnionExtractFun; @@ -207,6 +208,7 @@ mod tests { arg_fields, number_rows: 1, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::new(ConfigOptions::default()), })?; assert_scalar(result, ScalarValue::Utf8(None)); @@ -229,6 +231,7 @@ mod tests { arg_fields, number_rows: 1, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::new(ConfigOptions::default()), })?; assert_scalar(result, ScalarValue::Utf8(None)); @@ -250,6 +253,7 @@ mod tests { arg_fields, number_rows: 1, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::new(ConfigOptions::default()), })?; assert_scalar(result, ScalarValue::new_utf8("42")); diff --git a/datafusion/functions/src/core/union_tag.rs b/datafusion/functions/src/core/union_tag.rs index 3a4d96de2bc0..5d589d2167cb 100644 --- a/datafusion/functions/src/core/union_tag.rs +++ b/datafusion/functions/src/core/union_tag.rs @@ -156,6 +156,7 @@ impl ScalarUDFImpl for UnionTagFunc { mod tests { use super::UnionTagFunc; use arrow::datatypes::{DataType, Field, UnionFields, UnionMode}; + use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl}; use std::sync::Arc; @@ -182,6 +183,7 @@ mod tests { number_rows: 1, return_field: Field::new("res", return_type, true).into(), arg_fields: vec![], + config_options: Arc::new(ConfigOptions::default()), }) .unwrap(); @@ -204,6 +206,7 @@ mod tests { number_rows: 1, return_field: Field::new("res", return_type, true).into(), arg_fields: vec![], + config_options: Arc::new(ConfigOptions::default()), }) .unwrap(); diff --git a/datafusion/functions/src/core/version.rs b/datafusion/functions/src/core/version.rs index b3abe246b4b3..d68dbfc546ea 100644 --- a/datafusion/functions/src/core/version.rs +++ b/datafusion/functions/src/core/version.rs @@ -98,7 +98,9 @@ impl ScalarUDFImpl for VersionFunc { mod test { use super::*; use arrow::datatypes::Field; + use datafusion_common::config::ConfigOptions; use datafusion_expr::ScalarUDF; + use std::sync::Arc; #[tokio::test] async fn test_version_udf() { @@ -109,6 +111,7 @@ mod test { arg_fields: vec![], number_rows: 0, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::new(ConfigOptions::default()), }) .unwrap(); diff --git a/datafusion/functions/src/datetime/date_bin.rs b/datafusion/functions/src/datetime/date_bin.rs index 1c801dfead72..d71bf31f95b9 100644 --- a/datafusion/functions/src/datetime/date_bin.rs +++ b/datafusion/functions/src/datetime/date_bin.rs @@ -512,6 +512,7 @@ mod tests { use datafusion_expr::{ColumnarValue, ScalarUDFImpl}; use chrono::TimeDelta; + use datafusion_common::config::ConfigOptions; fn invoke_date_bin_with_args( args: Vec, @@ -528,6 +529,7 @@ mod tests { arg_fields, number_rows, return_field: Arc::clone(return_field), + config_options: Arc::new(ConfigOptions::default()), }; DateBinFunc::new().invoke_with_args(args) } diff --git a/datafusion/functions/src/datetime/date_trunc.rs b/datafusion/functions/src/datetime/date_trunc.rs index d3d52e237e15..05e8ab74936f 100644 --- a/datafusion/functions/src/datetime/date_trunc.rs +++ b/datafusion/functions/src/datetime/date_trunc.rs @@ -559,6 +559,7 @@ mod tests { use arrow::array::{Array, TimestampNanosecondArray}; use arrow::compute::kernels::cast_utils::string_to_timestamp_nanos; use arrow::datatypes::{DataType, Field, TimeUnit}; + use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, ScalarUDFImpl}; @@ -814,6 +815,7 @@ mod tests { true, ) .into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = DateTruncFunc::new().invoke_with_args(args).unwrap(); if let ColumnarValue::Array(result) = result { @@ -1001,6 +1003,7 @@ mod tests { true, ) .into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = DateTruncFunc::new().invoke_with_args(args).unwrap(); if let ColumnarValue::Array(result) = result { diff --git a/datafusion/functions/src/datetime/from_unixtime.rs b/datafusion/functions/src/datetime/from_unixtime.rs index c1497040261c..16eea0be8be6 100644 --- a/datafusion/functions/src/datetime/from_unixtime.rs +++ b/datafusion/functions/src/datetime/from_unixtime.rs @@ -164,6 +164,7 @@ mod test { use crate::datetime::from_unixtime::FromUnixtimeFunc; use arrow::datatypes::TimeUnit::Second; use arrow::datatypes::{DataType, Field}; + use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_common::ScalarValue::Int64; use datafusion_expr::{ColumnarValue, ScalarUDFImpl}; @@ -177,6 +178,7 @@ mod test { arg_fields: vec![arg_field], number_rows: 1, return_field: Field::new("f", DataType::Timestamp(Second, None), true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = FromUnixtimeFunc::new().invoke_with_args(args).unwrap(); @@ -209,6 +211,7 @@ mod test { true, ) .into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = FromUnixtimeFunc::new().invoke_with_args(args).unwrap(); diff --git a/datafusion/functions/src/datetime/make_date.rs b/datafusion/functions/src/datetime/make_date.rs index d2bc08549f90..677b54cd15f0 100644 --- a/datafusion/functions/src/datetime/make_date.rs +++ b/datafusion/functions/src/datetime/make_date.rs @@ -231,6 +231,7 @@ mod tests { use crate::datetime::make_date::MakeDateFunc; use arrow::array::{Array, Date32Array, Int32Array, Int64Array, UInt32Array}; use arrow::datatypes::{DataType, Field}; + use datafusion_common::config::ConfigOptions; use datafusion_common::{DataFusionError, ScalarValue}; use datafusion_expr::{ColumnarValue, ScalarUDFImpl}; use std::sync::Arc; @@ -248,6 +249,7 @@ mod tests { arg_fields, number_rows, return_field: Field::new("f", DataType::Date32, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; MakeDateFunc::new().invoke_with_args(args) } diff --git a/datafusion/functions/src/datetime/to_char.rs b/datafusion/functions/src/datetime/to_char.rs index 219a9b576423..2f7e5fa56eb1 100644 --- a/datafusion/functions/src/datetime/to_char.rs +++ b/datafusion/functions/src/datetime/to_char.rs @@ -306,6 +306,7 @@ mod tests { }; use arrow::datatypes::{DataType, Field, TimeUnit}; use chrono::{NaiveDateTime, Timelike}; + use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, ScalarUDFImpl}; use std::sync::Arc; @@ -395,6 +396,7 @@ mod tests { arg_fields, number_rows: 1, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = ToCharFunc::new() .invoke_with_args(args) @@ -483,6 +485,7 @@ mod tests { arg_fields, number_rows: batch_len, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = ToCharFunc::new() .invoke_with_args(args) @@ -619,6 +622,7 @@ mod tests { arg_fields, number_rows: batch_len, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = ToCharFunc::new() .invoke_with_args(args) @@ -646,6 +650,7 @@ mod tests { arg_fields, number_rows: batch_len, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = ToCharFunc::new() .invoke_with_args(args) @@ -670,6 +675,7 @@ mod tests { arg_fields: vec![arg_field], number_rows: 1, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = ToCharFunc::new().invoke_with_args(args); assert_eq!( @@ -690,6 +696,7 @@ mod tests { arg_fields, number_rows: 1, return_field: Field::new("f", DataType::Utf8, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = ToCharFunc::new().invoke_with_args(args); assert_eq!( diff --git a/datafusion/functions/src/datetime/to_date.rs b/datafusion/functions/src/datetime/to_date.rs index c9fd17dbef11..d1b2720867d2 100644 --- a/datafusion/functions/src/datetime/to_date.rs +++ b/datafusion/functions/src/datetime/to_date.rs @@ -162,15 +162,15 @@ impl ScalarUDFImpl for ToDateFunc { #[cfg(test)] mod tests { + use super::ToDateFunc; use arrow::array::{Array, Date32Array, GenericStringArray, StringViewArray}; use arrow::datatypes::{DataType, Field}; use arrow::{compute::kernels::cast_utils::Parser, datatypes::Date32Type}; + use datafusion_common::config::ConfigOptions; use datafusion_common::{DataFusionError, ScalarValue}; use datafusion_expr::{ColumnarValue, ScalarUDFImpl}; use std::sync::Arc; - use super::ToDateFunc; - fn invoke_to_date_with_args( args: Vec, number_rows: usize, @@ -185,6 +185,7 @@ mod tests { arg_fields, number_rows, return_field: Field::new("f", DataType::Date32, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; ToDateFunc::new().invoke_with_args(args) } diff --git a/datafusion/functions/src/datetime/to_local_time.rs b/datafusion/functions/src/datetime/to_local_time.rs index b9ebe537d459..b6d4404d6d46 100644 --- a/datafusion/functions/src/datetime/to_local_time.rs +++ b/datafusion/functions/src/datetime/to_local_time.rs @@ -411,6 +411,7 @@ mod tests { use arrow::compute::kernels::cast_utils::string_to_timestamp_nanos; use arrow::datatypes::{DataType, Field, TimeUnit}; use chrono::NaiveDateTime; + use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl}; @@ -545,6 +546,7 @@ mod tests { arg_fields: vec![arg_field], number_rows: 1, return_field: Field::new("f", expected.data_type(), true).into(), + config_options: Arc::new(ConfigOptions::default()), }) .unwrap(); match res { @@ -615,6 +617,7 @@ mod tests { true, ) .into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = ToLocalTimeFunc::new().invoke_with_args(args).unwrap(); if let ColumnarValue::Array(result) = result { diff --git a/datafusion/functions/src/datetime/to_timestamp.rs b/datafusion/functions/src/datetime/to_timestamp.rs index 9e2c46187e70..9bd94c8ca8d9 100644 --- a/datafusion/functions/src/datetime/to_timestamp.rs +++ b/datafusion/functions/src/datetime/to_timestamp.rs @@ -656,6 +656,7 @@ mod tests { use arrow::array::{ArrayRef, Int64Array, StringBuilder}; use arrow::datatypes::{Field, TimeUnit}; use chrono::Utc; + use datafusion_common::config::ConfigOptions; use datafusion_common::{assert_contains, DataFusionError, ScalarValue}; use datafusion_expr::ScalarFunctionImplementation; @@ -1034,6 +1035,7 @@ mod tests { arg_fields: vec![arg_field], number_rows: 4, return_field: Field::new("f", rt, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let res = udf .invoke_with_args(args) @@ -1083,6 +1085,7 @@ mod tests { arg_fields: vec![arg_field], number_rows: 5, return_field: Field::new("f", rt, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let res = udf .invoke_with_args(args) diff --git a/datafusion/functions/src/math/log.rs b/datafusion/functions/src/math/log.rs index 23e267a323b9..186d0d3c4717 100644 --- a/datafusion/functions/src/math/log.rs +++ b/datafusion/functions/src/math/log.rs @@ -260,6 +260,7 @@ mod tests { use arrow::compute::SortOptions; use arrow::datatypes::Field; use datafusion_common::cast::{as_float32_array, as_float64_array}; + use datafusion_common::config::ConfigOptions; use datafusion_common::DFSchema; use datafusion_expr::execution_props::ExecutionProps; use datafusion_expr::simplify::SimplifyContext; @@ -281,6 +282,7 @@ mod tests { arg_fields, number_rows: 4, return_field: Field::new("f", DataType::Float64, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let _ = LogFunc::new().invoke_with_args(args); } @@ -295,6 +297,7 @@ mod tests { arg_fields: vec![arg_field], number_rows: 1, return_field: Field::new("f", DataType::Float64, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = LogFunc::new().invoke_with_args(args); @@ -311,6 +314,7 @@ mod tests { arg_fields: vec![arg_field], number_rows: 1, return_field: Field::new("f", DataType::Float32, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = LogFunc::new() .invoke_with_args(args) @@ -340,6 +344,7 @@ mod tests { arg_fields: vec![arg_field], number_rows: 1, return_field: Field::new("f", DataType::Float64, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = LogFunc::new() .invoke_with_args(args) @@ -373,6 +378,7 @@ mod tests { arg_fields, number_rows: 1, return_field: Field::new("f", DataType::Float32, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = LogFunc::new() .invoke_with_args(args) @@ -406,6 +412,7 @@ mod tests { arg_fields, number_rows: 1, return_field: Field::new("f", DataType::Float64, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = LogFunc::new() .invoke_with_args(args) @@ -437,6 +444,7 @@ mod tests { arg_fields: vec![arg_field], number_rows: 4, return_field: Field::new("f", DataType::Float64, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = LogFunc::new() .invoke_with_args(args) @@ -471,6 +479,7 @@ mod tests { arg_fields: vec![arg_field], number_rows: 4, return_field: Field::new("f", DataType::Float32, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = LogFunc::new() .invoke_with_args(args) @@ -511,6 +520,7 @@ mod tests { arg_fields, number_rows: 4, return_field: Field::new("f", DataType::Float64, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = LogFunc::new() .invoke_with_args(args) @@ -551,6 +561,7 @@ mod tests { arg_fields, number_rows: 4, return_field: Field::new("f", DataType::Float32, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = LogFunc::new() .invoke_with_args(args) diff --git a/datafusion/functions/src/math/power.rs b/datafusion/functions/src/math/power.rs index 465844704f59..87f27f5a793c 100644 --- a/datafusion/functions/src/math/power.rs +++ b/datafusion/functions/src/math/power.rs @@ -189,11 +189,11 @@ fn is_log(func: &ScalarUDF) -> bool { #[cfg(test)] mod tests { + use super::*; use arrow::array::Float64Array; use arrow::datatypes::Field; use datafusion_common::cast::{as_float64_array, as_int64_array}; - - use super::*; + use datafusion_common::config::ConfigOptions; #[test] fn test_power_f64() { @@ -213,6 +213,7 @@ mod tests { arg_fields, number_rows: 4, return_field: Field::new("f", DataType::Float64, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = PowerFunc::new() .invoke_with_args(args) @@ -248,6 +249,7 @@ mod tests { arg_fields, number_rows: 4, return_field: Field::new("f", DataType::Int64, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = PowerFunc::new() .invoke_with_args(args) diff --git a/datafusion/functions/src/math/signum.rs b/datafusion/functions/src/math/signum.rs index ec6ef5a78c6a..71d32413afc8 100644 --- a/datafusion/functions/src/math/signum.rs +++ b/datafusion/functions/src/math/signum.rs @@ -140,6 +140,7 @@ mod test { use arrow::array::{ArrayRef, Float32Array, Float64Array}; use arrow::datatypes::{DataType, Field}; use datafusion_common::cast::{as_float32_array, as_float64_array}; + use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl}; use crate::math::signum::SignumFunc; @@ -163,6 +164,7 @@ mod test { arg_fields, number_rows: array.len(), return_field: Field::new("f", DataType::Float32, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = SignumFunc::new() .invoke_with_args(args) @@ -209,6 +211,7 @@ mod test { arg_fields, number_rows: array.len(), return_field: Field::new("f", DataType::Float64, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = SignumFunc::new() .invoke_with_args(args) diff --git a/datafusion/functions/src/regex/regexpcount.rs b/datafusion/functions/src/regex/regexpcount.rs index 4f2efdfeee33..a069455281bd 100644 --- a/datafusion/functions/src/regex/regexpcount.rs +++ b/datafusion/functions/src/regex/regexpcount.rs @@ -581,6 +581,7 @@ mod tests { use super::*; use arrow::array::{GenericStringArray, StringViewArray}; use arrow::datatypes::Field; + use datafusion_common::config::ConfigOptions; use datafusion_expr::ScalarFunctionArgs; #[test] @@ -626,6 +627,7 @@ mod tests { arg_fields, number_rows: args.len(), return_field: Field::new("f", Int64, true).into(), + config_options: Arc::new(ConfigOptions::default()), }) } diff --git a/datafusion/functions/src/regex/regexpinstr.rs b/datafusion/functions/src/regex/regexpinstr.rs index 76117d07747f..577a8f5bc33d 100644 --- a/datafusion/functions/src/regex/regexpinstr.rs +++ b/datafusion/functions/src/regex/regexpinstr.rs @@ -451,6 +451,7 @@ mod tests { use arrow::array::Int64Array; use arrow::array::{GenericStringArray, StringViewArray}; use arrow::datatypes::Field; + use datafusion_common::config::ConfigOptions; use datafusion_expr::ScalarFunctionArgs; #[test] fn test_regexp_instr() { @@ -492,6 +493,7 @@ mod tests { arg_fields, number_rows: args.len(), return_field: Arc::new(Field::new("f", Int64, true)), + config_options: Arc::new(ConfigOptions::default()), }) } diff --git a/datafusion/functions/src/string/concat.rs b/datafusion/functions/src/string/concat.rs index 64a527eac198..06ec82f1b3ed 100644 --- a/datafusion/functions/src/string/concat.rs +++ b/datafusion/functions/src/string/concat.rs @@ -377,6 +377,7 @@ mod tests { use arrow::array::{Array, LargeStringArray, StringViewArray}; use arrow::array::{ArrayRef, StringArray}; use arrow::datatypes::Field; + use datafusion_common::config::ConfigOptions; use DataType::*; #[test] @@ -485,6 +486,7 @@ mod tests { arg_fields, number_rows: 3, return_field: Field::new("f", Utf8, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = ConcatFunc::new().invoke_with_args(args)?; diff --git a/datafusion/functions/src/string/concat_ws.rs b/datafusion/functions/src/string/concat_ws.rs index 1f45f8501e1f..c1ecac7ae99d 100644 --- a/datafusion/functions/src/string/concat_ws.rs +++ b/datafusion/functions/src/string/concat_ws.rs @@ -409,6 +409,7 @@ mod tests { use arrow::array::{Array, ArrayRef, StringArray}; use arrow::datatypes::DataType::Utf8; use arrow::datatypes::Field; + use datafusion_common::config::ConfigOptions; use datafusion_common::Result; use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl}; @@ -493,6 +494,7 @@ mod tests { arg_fields, number_rows: 3, return_field: Field::new("f", Utf8, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = ConcatWsFunc::new().invoke_with_args(args)?; @@ -529,6 +531,7 @@ mod tests { arg_fields, number_rows: 3, return_field: Field::new("f", Utf8, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = ConcatWsFunc::new().invoke_with_args(args)?; diff --git a/datafusion/functions/src/string/contains.rs b/datafusion/functions/src/string/contains.rs index 215f8f7a25b9..b2aefb8ee374 100644 --- a/datafusion/functions/src/string/contains.rs +++ b/datafusion/functions/src/string/contains.rs @@ -153,6 +153,7 @@ mod test { use crate::expr_fn::contains; use arrow::array::{BooleanArray, StringArray}; use arrow::datatypes::{DataType, Field}; + use datafusion_common::config::ConfigOptions; use datafusion_common::ScalarValue; use datafusion_expr::{ColumnarValue, Expr, ScalarFunctionArgs, ScalarUDFImpl}; use std::sync::Arc; @@ -175,6 +176,7 @@ mod test { arg_fields, number_rows: 2, return_field: Field::new("f", DataType::Boolean, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let actual = udf.invoke_with_args(args).unwrap(); diff --git a/datafusion/functions/src/string/lower.rs b/datafusion/functions/src/string/lower.rs index 536c29a7cb25..139275892933 100644 --- a/datafusion/functions/src/string/lower.rs +++ b/datafusion/functions/src/string/lower.rs @@ -100,6 +100,7 @@ mod tests { use arrow::array::{Array, ArrayRef, StringArray}; use arrow::datatypes::DataType::Utf8; use arrow::datatypes::Field; + use datafusion_common::config::ConfigOptions; use std::sync::Arc; fn to_lower(input: ArrayRef, expected: ArrayRef) -> Result<()> { @@ -111,6 +112,7 @@ mod tests { args: vec![ColumnarValue::Array(input)], arg_fields, return_field: Field::new("f", Utf8, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = match func.invoke_with_args(args)? { diff --git a/datafusion/functions/src/string/upper.rs b/datafusion/functions/src/string/upper.rs index 882fb45eda4a..99d505c5aafd 100644 --- a/datafusion/functions/src/string/upper.rs +++ b/datafusion/functions/src/string/upper.rs @@ -99,6 +99,7 @@ mod tests { use arrow::array::{Array, ArrayRef, StringArray}; use arrow::datatypes::DataType::Utf8; use arrow::datatypes::Field; + use datafusion_common::config::ConfigOptions; use std::sync::Arc; fn to_upper(input: ArrayRef, expected: ArrayRef) -> Result<()> { @@ -110,6 +111,7 @@ mod tests { args: vec![ColumnarValue::Array(input)], arg_fields: vec![arg_field], return_field: Field::new("f", Utf8, true).into(), + config_options: Arc::new(ConfigOptions::default()), }; let result = match func.invoke_with_args(args)? { diff --git a/datafusion/functions/src/unicode/find_in_set.rs b/datafusion/functions/src/unicode/find_in_set.rs index 8b00c7be1ccf..3429a8293c15 100644 --- a/datafusion/functions/src/unicode/find_in_set.rs +++ b/datafusion/functions/src/unicode/find_in_set.rs @@ -349,6 +349,7 @@ mod tests { use crate::utils::test::test_function; use arrow::array::{Array, Int32Array, StringArray}; use arrow::datatypes::{DataType::Int32, Field}; + use datafusion_common::config::ConfigOptions; use datafusion_common::{Result, ScalarValue}; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl}; use std::sync::Arc; @@ -483,6 +484,7 @@ mod tests { arg_fields, number_rows: cardinality, return_field: Field::new("f", return_type, true).into(), + config_options: Arc::new(ConfigOptions::default()), }); assert!(result.is_ok()); diff --git a/datafusion/functions/src/utils.rs b/datafusion/functions/src/utils.rs index 583ff48bff39..0e9ef8dacd51 100644 --- a/datafusion/functions/src/utils.rs +++ b/datafusion/functions/src/utils.rs @@ -128,8 +128,9 @@ pub mod test { /// $EXPECTED_TYPE is the expected value type /// $EXPECTED_DATA_TYPE is the expected result type /// $ARRAY_TYPE is the column type after function applied + /// $CONFIG_OPTIONS config options to pass to function macro_rules! test_function { - ($FUNC:expr, $ARGS:expr, $EXPECTED:expr, $EXPECTED_TYPE:ty, $EXPECTED_DATA_TYPE:expr, $ARRAY_TYPE:ident) => { + ($FUNC:expr, $ARGS:expr, $EXPECTED:expr, $EXPECTED_TYPE:ty, $EXPECTED_DATA_TYPE:expr, $ARRAY_TYPE:ident, $CONFIG_OPTIONS:expr) => { let expected: Result> = $EXPECTED; let func = $FUNC; @@ -174,7 +175,13 @@ pub mod test { let return_type = return_field.data_type(); assert_eq!(return_type, &$EXPECTED_DATA_TYPE); - let result = func.invoke_with_args(datafusion_expr::ScalarFunctionArgs{args: $ARGS, arg_fields, number_rows: cardinality, return_field}); + let result = func.invoke_with_args(datafusion_expr::ScalarFunctionArgs{ + args: $ARGS, + arg_fields, + number_rows: cardinality, + return_field, + config_options: $CONFIG_OPTIONS + }); assert_eq!(result.is_ok(), true, "function returned an error: {}", result.unwrap_err()); let result = result.unwrap().to_array(cardinality).expect("Failed to convert to array"); @@ -198,7 +205,13 @@ pub mod test { let return_field = return_field.unwrap(); // invoke is expected error - cannot use .expect_err() due to Debug not being implemented - match func.invoke_with_args(datafusion_expr::ScalarFunctionArgs{args: $ARGS, arg_fields, number_rows: cardinality, return_field}) { + match func.invoke_with_args(datafusion_expr::ScalarFunctionArgs{ + args: $ARGS, + arg_fields, + number_rows: cardinality, + return_field, + config_options: $CONFIG_OPTIONS}) + { Ok(_) => assert!(false, "expected error"), Err(error) => { assert!(expected_error.strip_backtrace().starts_with(&error.strip_backtrace())); @@ -208,6 +221,18 @@ pub mod test { } }; }; + + ($FUNC:expr, $ARGS:expr, $EXPECTED:expr, $EXPECTED_TYPE:ty, $EXPECTED_DATA_TYPE:expr, $ARRAY_TYPE:ident) => { + test_function!( + $FUNC, + $ARGS, + $EXPECTED, + $EXPECTED_TYPE, + $EXPECTED_DATA_TYPE, + $ARRAY_TYPE, + std::sync::Arc::new(datafusion_common::config::ConfigOptions::default()) + ) + }; } use arrow::datatypes::DataType; diff --git a/datafusion/optimizer/src/optimizer.rs b/datafusion/optimizer/src/optimizer.rs index 4d2c2c7c79cd..49806d6db344 100644 --- a/datafusion/optimizer/src/optimizer.rs +++ b/datafusion/optimizer/src/optimizer.rs @@ -107,7 +107,7 @@ pub trait OptimizerConfig { /// Return alias generator used to generate unique aliases for subqueries fn alias_generator(&self) -> &Arc; - fn options(&self) -> &ConfigOptions; + fn options(&self) -> Arc; fn function_registry(&self) -> Option<&dyn FunctionRegistry> { None @@ -125,7 +125,7 @@ pub struct OptimizerContext { /// Alias generator used to generate unique aliases for subqueries alias_generator: Arc, - options: ConfigOptions, + options: Arc, } impl OptimizerContext { @@ -137,13 +137,15 @@ impl OptimizerContext { Self { query_execution_start_time: Utc::now(), alias_generator: Arc::new(AliasGenerator::new()), - options, + options: Arc::new(options), } } /// Specify whether to enable the filter_null_keys rule pub fn filter_null_keys(mut self, filter_null_keys: bool) -> Self { - self.options.optimizer.filter_null_join_keys = filter_null_keys; + Arc::make_mut(&mut self.options) + .optimizer + .filter_null_join_keys = filter_null_keys; self } @@ -160,13 +162,13 @@ impl OptimizerContext { /// Specify whether the optimizer should skip rules that produce /// errors, or fail the query pub fn with_skip_failing_rules(mut self, b: bool) -> Self { - self.options.optimizer.skip_failed_rules = b; + Arc::make_mut(&mut self.options).optimizer.skip_failed_rules = b; self } /// Specify how many times to attempt to optimize the plan pub fn with_max_passes(mut self, v: u8) -> Self { - self.options.optimizer.max_passes = v as usize; + Arc::make_mut(&mut self.options).optimizer.max_passes = v as usize; self } } @@ -187,8 +189,8 @@ impl OptimizerConfig for OptimizerContext { &self.alias_generator } - fn options(&self) -> &ConfigOptions { - &self.options + fn options(&self) -> Arc { + Arc::clone(&self.options) } } diff --git a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs index ccf90893e17e..b828888305d2 100644 --- a/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs +++ b/datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs @@ -69,6 +69,7 @@ impl OptimizerRule for SimplifyExpressions { ) -> Result, DataFusionError> { let mut execution_props = ExecutionProps::new(); execution_props.query_execution_start_time = config.query_execution_start_time(); + execution_props.config_options = Some(config.options()); Self::optimize_internal(plan, &execution_props) } } diff --git a/datafusion/optimizer/tests/optimizer_integration.rs b/datafusion/optimizer/tests/optimizer_integration.rs index 95a9db6c8abd..d6d7b58f0bc3 100644 --- a/datafusion/optimizer/tests/optimizer_integration.rs +++ b/datafusion/optimizer/tests/optimizer_integration.rs @@ -526,7 +526,7 @@ fn test_sql(sql: &str) -> Result { let analyzer = Analyzer::new(); let optimizer = Optimizer::new(); // analyze and optimize the logical plan - let plan = analyzer.execute_and_check(plan, config.options(), |_, _| {})?; + let plan = analyzer.execute_and_check(plan, &config.options(), |_, _| {})?; optimizer.optimize(plan, &config, observe) } diff --git a/datafusion/physical-expr/src/async_scalar_function.rs b/datafusion/physical-expr/src/async_scalar_function.rs index 547b9c13da62..00134565ea44 100644 --- a/datafusion/physical-expr/src/async_scalar_function.rs +++ b/datafusion/physical-expr/src/async_scalar_function.rs @@ -114,7 +114,7 @@ impl AsyncFuncExpr { pub async fn invoke_with_args( &self, batch: &RecordBatch, - option: &ConfigOptions, + config_options: Arc, ) -> Result { let Some(scalar_function_expr) = self.func.as_any().downcast_ref::() @@ -162,15 +162,13 @@ impl AsyncFuncExpr { .collect::>>()?; result_batches.push( async_udf - .invoke_async_with_args( - ScalarFunctionArgs { - args, - arg_fields: arg_fields.clone(), - number_rows: current_batch.num_rows(), - return_field: Arc::clone(&self.return_field), - }, - option, - ) + .invoke_async_with_args(ScalarFunctionArgs { + args, + arg_fields: arg_fields.clone(), + number_rows: current_batch.num_rows(), + return_field: Arc::clone(&self.return_field), + config_options: Arc::clone(&config_options), + }) .await?, ); } @@ -183,15 +181,13 @@ impl AsyncFuncExpr { result_batches.push( async_udf - .invoke_async_with_args( - ScalarFunctionArgs { - args: args.to_vec(), - arg_fields, - number_rows: batch.num_rows(), - return_field: Arc::clone(&self.return_field), - }, - option, - ) + .invoke_async_with_args(ScalarFunctionArgs { + args: args.to_vec(), + arg_fields, + number_rows: batch.num_rows(), + return_field: Arc::clone(&self.return_field), + config_options: Arc::clone(&config_options), + }) .await?, ); } diff --git a/datafusion/physical-expr/src/equivalence/ordering.rs b/datafusion/physical-expr/src/equivalence/ordering.rs index 875c2a76e5eb..aa65c4a80ae9 100644 --- a/datafusion/physical-expr/src/equivalence/ordering.rs +++ b/datafusion/physical-expr/src/equivalence/ordering.rs @@ -338,6 +338,7 @@ mod tests { use arrow::compute::SortOptions; use arrow::datatypes::{DataType, Field, Schema}; + use datafusion_common::config::ConfigOptions; use datafusion_common::Result; use datafusion_expr::{Operator, ScalarUDF}; @@ -390,16 +391,19 @@ mod tests { Arc::clone(&test_fun), vec![Arc::clone(col_a)], &test_schema, + Arc::new(ConfigOptions::default()), )?) as PhysicalExprRef; let floor_f = Arc::new(ScalarFunctionExpr::try_new( Arc::clone(&test_fun), vec![Arc::clone(col_f)], &test_schema, + Arc::new(ConfigOptions::default()), )?) as PhysicalExprRef; let exp_a = Arc::new(ScalarFunctionExpr::try_new( Arc::clone(&test_fun), vec![Arc::clone(col_a)], &test_schema, + Arc::new(ConfigOptions::default()), )?) as PhysicalExprRef; let a_plus_b = Arc::new(BinaryExpr::new( diff --git a/datafusion/physical-expr/src/equivalence/projection.rs b/datafusion/physical-expr/src/equivalence/projection.rs index 38bb1fef8074..6fe56052292f 100644 --- a/datafusion/physical-expr/src/equivalence/projection.rs +++ b/datafusion/physical-expr/src/equivalence/projection.rs @@ -167,6 +167,7 @@ mod tests { use arrow::compute::SortOptions; use arrow::datatypes::{DataType, Field, Schema, TimeUnit}; + use datafusion_common::config::ConfigOptions; use datafusion_expr::{Operator, ScalarUDF}; #[test] @@ -689,6 +690,7 @@ mod tests { test_fun, vec![Arc::clone(col_c)], &schema, + Arc::new(ConfigOptions::default()), )?) as PhysicalExprRef; let option_asc = SortOptions { diff --git a/datafusion/physical-expr/src/equivalence/properties/dependency.rs b/datafusion/physical-expr/src/equivalence/properties/dependency.rs index 4554e36f766d..26d5d32c6512 100644 --- a/datafusion/physical-expr/src/equivalence/properties/dependency.rs +++ b/datafusion/physical-expr/src/equivalence/properties/dependency.rs @@ -396,6 +396,7 @@ mod tests { use arrow::compute::SortOptions; use arrow::datatypes::{DataType, Field, Schema, TimeUnit}; + use datafusion_common::config::ConfigOptions; use datafusion_common::{Constraint, Constraints, Result}; use datafusion_expr::sort_properties::SortProperties; use datafusion_expr::Operator; @@ -1035,6 +1036,7 @@ mod tests { concat(), vec![Arc::clone(&col_a), Arc::clone(&col_b)], Field::new("f", DataType::Utf8, true).into(), + Arc::new(ConfigOptions::default()), )); // Assume existing ordering is [c ASC, a ASC, b ASC] @@ -1125,6 +1127,7 @@ mod tests { concat(), vec![Arc::clone(&col_a), Arc::clone(&col_b)], Field::new("f", DataType::Utf8, true).into(), + Arc::new(ConfigOptions::default()), )) as _; // Assume existing ordering is [concat(a, b) ASC, a ASC, b ASC] diff --git a/datafusion/physical-expr/src/planner.rs b/datafusion/physical-expr/src/planner.rs index fbc19b1202ee..fb491341f81d 100644 --- a/datafusion/physical-expr/src/planner.rs +++ b/datafusion/physical-expr/src/planner.rs @@ -24,6 +24,7 @@ use crate::{ }; use arrow::datatypes::Schema; +use datafusion_common::config::ConfigOptions; use datafusion_common::{ exec_err, not_impl_err, plan_err, DFSchema, Result, ScalarValue, ToDFSchema, }; @@ -317,11 +318,16 @@ pub fn create_physical_expr( Expr::ScalarFunction(ScalarFunction { func, args }) => { let physical_args = create_physical_exprs(args, input_dfschema, execution_props)?; + let config_options = match execution_props.config_options.as_ref() { + Some(config_options) => Arc::clone(config_options), + None => Arc::new(ConfigOptions::default()), + }; Ok(Arc::new(ScalarFunctionExpr::try_new( Arc::clone(func), physical_args, input_schema, + config_options, )?)) } Expr::Between(Between { diff --git a/datafusion/physical-expr/src/scalar_function.rs b/datafusion/physical-expr/src/scalar_function.rs index d014bbb74caa..f2bb09b1009c 100644 --- a/datafusion/physical-expr/src/scalar_function.rs +++ b/datafusion/physical-expr/src/scalar_function.rs @@ -31,7 +31,7 @@ use std::any::Any; use std::fmt::{self, Debug, Formatter}; -use std::hash::Hash; +use std::hash::{Hash, Hasher}; use std::sync::Arc; use crate::expressions::Literal; @@ -39,6 +39,7 @@ use crate::PhysicalExpr; use arrow::array::{Array, RecordBatch}; use arrow::datatypes::{DataType, FieldRef, Schema}; +use datafusion_common::config::ConfigOptions; use datafusion_common::{internal_err, Result, ScalarValue}; use datafusion_expr::interval_arithmetic::Interval; use datafusion_expr::sort_properties::ExprProperties; @@ -46,14 +47,16 @@ use datafusion_expr::type_coercion::functions::data_types_with_scalar_udf; use datafusion_expr::{ expr_vec_fmt, ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDF, }; +use datafusion_physical_expr_common::physical_expr::{DynEq, DynHash}; +use itertools::Itertools; /// Physical expression of a scalar function -#[derive(Eq, PartialEq, Hash)] pub struct ScalarFunctionExpr { fun: Arc, name: String, args: Vec>, return_field: FieldRef, + config_options: Arc, } impl Debug for ScalarFunctionExpr { @@ -74,12 +77,14 @@ impl ScalarFunctionExpr { fun: Arc, args: Vec>, return_field: FieldRef, + config_options: Arc, ) -> Self { Self { fun, name: name.to_owned(), args, return_field, + config_options, } } @@ -88,6 +93,7 @@ impl ScalarFunctionExpr { fun: Arc, args: Vec>, schema: &Schema, + config_options: Arc, ) -> Result { let name = fun.name().to_string(); let arg_fields = args @@ -120,6 +126,7 @@ impl ScalarFunctionExpr { name, args, return_field, + config_options, }) } @@ -156,6 +163,10 @@ impl ScalarFunctionExpr { pub fn nullable(&self) -> bool { self.return_field.is_nullable() } + + pub fn config_options(&self) -> &ConfigOptions { + &self.config_options + } } impl fmt::Display for ScalarFunctionExpr { @@ -164,6 +175,42 @@ impl fmt::Display for ScalarFunctionExpr { } } +impl DynEq for ScalarFunctionExpr { + fn dyn_eq(&self, other: &dyn Any) -> bool { + other.downcast_ref::().is_some_and(|o| { + self.fun.eq(&o.fun) + && self.name.eq(&o.name) + && self.args.eq(&o.args) + && self.return_field.eq(&o.return_field) + && self + .config_options + .entries() + .iter() + .sorted_by(|&l, &r| l.key.cmp(&r.key)) + .zip( + o.config_options + .entries() + .iter() + .sorted_by(|&l, &r| l.key.cmp(&r.key)), + ) + .filter(|(l, r)| l.ne(r)) + .count() + == 0 + }) + } +} + +impl DynHash for ScalarFunctionExpr { + fn dyn_hash(&self, mut state: &mut dyn Hasher) { + self.type_id().hash(&mut state); + self.fun.hash(&mut state); + self.name.hash(&mut state); + self.args.hash(&mut state); + self.return_field.hash(&mut state); + self.config_options.entries().hash(&mut state); + } +} + impl PhysicalExpr for ScalarFunctionExpr { /// Return a reference to Any that can be used for downcasting fn as_any(&self) -> &dyn Any { @@ -202,6 +249,7 @@ impl PhysicalExpr for ScalarFunctionExpr { arg_fields, number_rows: batch.num_rows(), return_field: Arc::clone(&self.return_field), + config_options: Arc::clone(&self.config_options), })?; if let ColumnarValue::Array(array) = &output { @@ -238,6 +286,7 @@ impl PhysicalExpr for ScalarFunctionExpr { Arc::clone(&self.fun), children, Arc::clone(&self.return_field), + Arc::clone(&self.config_options), ))) } diff --git a/datafusion/physical-plan/src/async_func.rs b/datafusion/physical-plan/src/async_func.rs index 7e9ae827d5d1..54a76e0ebb97 100644 --- a/datafusion/physical-plan/src/async_func.rs +++ b/datafusion/physical-plan/src/async_func.rs @@ -176,22 +176,23 @@ impl ExecutionPlan for AsyncFuncExec { // now, for each record batch, evaluate the async expressions and add the columns to the result let async_exprs_captured = Arc::new(self.async_exprs.clone()); let schema_captured = self.schema(); - let config_option_ref = Arc::new(context.session_config().options().clone()); + let config_options_ref = Arc::clone(context.session_config().options()); let stream_with_async_functions = input_stream.then(move |batch| { // need to clone *again* to capture the async_exprs and schema in the // stream and satisfy lifetime requirements. let async_exprs_captured = Arc::clone(&async_exprs_captured); let schema_captured = Arc::clone(&schema_captured); - let config_option = Arc::clone(&config_option_ref); + let config_options = Arc::clone(&config_options_ref); async move { let batch = batch?; // append the result of evaluating the async expressions to the output let mut output_arrays = batch.columns().to_vec(); for async_expr in async_exprs_captured.iter() { - let output = - async_expr.invoke_with_args(&batch, &config_option).await?; + let output = async_expr + .invoke_with_args(&batch, Arc::clone(&config_options)) + .await?; output_arrays.push(output.to_array(batch.num_rows())?); } let batch = RecordBatch::try_new(schema_captured, output_arrays)?; diff --git a/datafusion/proto/src/physical_plan/from_proto.rs b/datafusion/proto/src/physical_plan/from_proto.rs index a24e9e10e47a..53f4b2bd846e 100644 --- a/datafusion/proto/src/physical_plan/from_proto.rs +++ b/datafusion/proto/src/physical_plan/from_proto.rs @@ -45,6 +45,8 @@ use datafusion::physical_plan::expressions::{ }; use datafusion::physical_plan::windows::{create_window_expr, schema_add_window_field}; use datafusion::physical_plan::{Partitioning, PhysicalExpr, WindowExpr}; +use datafusion::prelude::SessionContext; +use datafusion_common::config::ConfigOptions; use datafusion_common::{not_impl_err, DataFusionError, Result}; use datafusion_proto_common::common::proto_error; @@ -72,12 +74,12 @@ impl From<&protobuf::PhysicalColumn> for Column { /// * `codec` - An extension codec used to decode custom UDFs. pub fn parse_physical_sort_expr( proto: &protobuf::PhysicalSortExprNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, input_schema: &Schema, codec: &dyn PhysicalExtensionCodec, ) -> Result { if let Some(expr) = &proto.expr { - let expr = parse_physical_expr(expr.as_ref(), registry, input_schema, codec)?; + let expr = parse_physical_expr(expr.as_ref(), ctx, input_schema, codec)?; let options = SortOptions { descending: !proto.asc, nulls_first: proto.nulls_first, @@ -99,15 +101,13 @@ pub fn parse_physical_sort_expr( /// * `codec` - An extension codec used to decode custom UDFs. pub fn parse_physical_sort_exprs( proto: &[protobuf::PhysicalSortExprNode], - registry: &dyn FunctionRegistry, + ctx: &SessionContext, input_schema: &Schema, codec: &dyn PhysicalExtensionCodec, ) -> Result> { proto .iter() - .map(|sort_expr| { - parse_physical_sort_expr(sort_expr, registry, input_schema, codec) - }) + .map(|sort_expr| parse_physical_sort_expr(sort_expr, ctx, input_schema, codec)) .collect() } @@ -123,17 +123,15 @@ pub fn parse_physical_sort_exprs( /// * `codec` - An extension codec used to decode custom UDFs. pub fn parse_physical_window_expr( proto: &protobuf::PhysicalWindowExprNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, input_schema: &Schema, codec: &dyn PhysicalExtensionCodec, ) -> Result> { - let window_node_expr = - parse_physical_exprs(&proto.args, registry, input_schema, codec)?; + let window_node_expr = parse_physical_exprs(&proto.args, ctx, input_schema, codec)?; let partition_by = - parse_physical_exprs(&proto.partition_by, registry, input_schema, codec)?; + parse_physical_exprs(&proto.partition_by, ctx, input_schema, codec)?; - let order_by = - parse_physical_sort_exprs(&proto.order_by, registry, input_schema, codec)?; + let order_by = parse_physical_sort_exprs(&proto.order_by, ctx, input_schema, codec)?; let window_frame = proto .window_frame @@ -152,13 +150,13 @@ pub fn parse_physical_window_expr( protobuf::physical_window_expr_node::WindowFunction::UserDefinedAggrFunction(udaf_name) => { WindowFunctionDefinition::AggregateUDF(match &proto.fun_definition { Some(buf) => codec.try_decode_udaf(udaf_name, buf)?, - None => registry.udaf(udaf_name).or_else(|_| codec.try_decode_udaf(udaf_name, &[]))?, + None => ctx.udaf(udaf_name).or_else(|_| codec.try_decode_udaf(udaf_name, &[]))?, }) } protobuf::physical_window_expr_node::WindowFunction::UserDefinedWindowFunction(udwf_name) => { WindowFunctionDefinition::WindowUDF(match &proto.fun_definition { Some(buf) => codec.try_decode_udwf(udwf_name, buf)?, - None => registry.udwf(udwf_name).or_else(|_| codec.try_decode_udwf(udwf_name, &[]))? + None => ctx.udwf(udwf_name).or_else(|_| codec.try_decode_udwf(udwf_name, &[]))? }) } } @@ -185,7 +183,7 @@ pub fn parse_physical_window_expr( pub fn parse_physical_exprs<'a, I>( protos: I, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, input_schema: &Schema, codec: &dyn PhysicalExtensionCodec, ) -> Result>> @@ -194,7 +192,7 @@ where { protos .into_iter() - .map(|p| parse_physical_expr(p, registry, input_schema, codec)) + .map(|p| parse_physical_expr(p, ctx, input_schema, codec)) .collect::>>() } @@ -209,7 +207,7 @@ where /// * `codec` - An extension codec used to decode custom UDFs. pub fn parse_physical_expr( proto: &protobuf::PhysicalExprNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, input_schema: &Schema, codec: &dyn PhysicalExtensionCodec, ) -> Result> { @@ -228,7 +226,7 @@ pub fn parse_physical_expr( ExprType::BinaryExpr(binary_expr) => Arc::new(BinaryExpr::new( parse_required_physical_expr( binary_expr.l.as_deref(), - registry, + ctx, "left", input_schema, codec, @@ -236,7 +234,7 @@ pub fn parse_physical_expr( logical_plan::from_proto::from_proto_binary_op(&binary_expr.op)?, parse_required_physical_expr( binary_expr.r.as_deref(), - registry, + ctx, "right", input_schema, codec, @@ -258,7 +256,7 @@ pub fn parse_physical_expr( ExprType::IsNullExpr(e) => { Arc::new(IsNullExpr::new(parse_required_physical_expr( e.expr.as_deref(), - registry, + ctx, "expr", input_schema, codec, @@ -267,7 +265,7 @@ pub fn parse_physical_expr( ExprType::IsNotNullExpr(e) => { Arc::new(IsNotNullExpr::new(parse_required_physical_expr( e.expr.as_deref(), - registry, + ctx, "expr", input_schema, codec, @@ -275,7 +273,7 @@ pub fn parse_physical_expr( } ExprType::NotExpr(e) => Arc::new(NotExpr::new(parse_required_physical_expr( e.expr.as_deref(), - registry, + ctx, "expr", input_schema, codec, @@ -283,7 +281,7 @@ pub fn parse_physical_expr( ExprType::Negative(e) => { Arc::new(NegativeExpr::new(parse_required_physical_expr( e.expr.as_deref(), - registry, + ctx, "expr", input_schema, codec, @@ -292,19 +290,19 @@ pub fn parse_physical_expr( ExprType::InList(e) => in_list( parse_required_physical_expr( e.expr.as_deref(), - registry, + ctx, "expr", input_schema, codec, )?, - parse_physical_exprs(&e.list, registry, input_schema, codec)?, + parse_physical_exprs(&e.list, ctx, input_schema, codec)?, &e.negated, input_schema, )?, ExprType::Case(e) => Arc::new(CaseExpr::try_new( e.expr .as_ref() - .map(|e| parse_physical_expr(e.as_ref(), registry, input_schema, codec)) + .map(|e| parse_physical_expr(e.as_ref(), ctx, input_schema, codec)) .transpose()?, e.when_then_expr .iter() @@ -312,14 +310,14 @@ pub fn parse_physical_expr( Ok(( parse_required_physical_expr( e.when_expr.as_ref(), - registry, + ctx, "when_expr", input_schema, codec, )?, parse_required_physical_expr( e.then_expr.as_ref(), - registry, + ctx, "then_expr", input_schema, codec, @@ -329,13 +327,13 @@ pub fn parse_physical_expr( .collect::>>()?, e.else_expr .as_ref() - .map(|e| parse_physical_expr(e.as_ref(), registry, input_schema, codec)) + .map(|e| parse_physical_expr(e.as_ref(), ctx, input_schema, codec)) .transpose()?, )?), ExprType::Cast(e) => Arc::new(CastExpr::new( parse_required_physical_expr( e.expr.as_deref(), - registry, + ctx, "expr", input_schema, codec, @@ -346,7 +344,7 @@ pub fn parse_physical_expr( ExprType::TryCast(e) => Arc::new(TryCastExpr::new( parse_required_physical_expr( e.expr.as_deref(), - registry, + ctx, "expr", input_schema, codec, @@ -356,13 +354,18 @@ pub fn parse_physical_expr( ExprType::ScalarUdf(e) => { let udf = match &e.fun_definition { Some(buf) => codec.try_decode_udf(&e.name, buf)?, - None => registry + None => ctx .udf(e.name.as_str()) .or_else(|_| codec.try_decode_udf(&e.name, &[]))?, }; let scalar_fun_def = Arc::clone(&udf); - let args = parse_physical_exprs(&e.args, registry, input_schema, codec)?; + let args = parse_physical_exprs(&e.args, ctx, input_schema, codec)?; + let config_options = + match ctx.state().execution_props().config_options.as_ref() { + Some(config_options) => Arc::clone(config_options), + None => Arc::new(ConfigOptions::default()), + }; Arc::new( ScalarFunctionExpr::new( @@ -375,6 +378,7 @@ pub fn parse_physical_expr( true, ) .into(), + config_options, ) .with_nullable(e.nullable), ) @@ -384,14 +388,14 @@ pub fn parse_physical_expr( like_expr.case_insensitive, parse_required_physical_expr( like_expr.expr.as_deref(), - registry, + ctx, "expr", input_schema, codec, )?, parse_required_physical_expr( like_expr.pattern.as_deref(), - registry, + ctx, "pattern", input_schema, codec, @@ -401,7 +405,7 @@ pub fn parse_physical_expr( let inputs: Vec> = extension .inputs .iter() - .map(|e| parse_physical_expr(e, registry, input_schema, codec)) + .map(|e| parse_physical_expr(e, ctx, input_schema, codec)) .collect::>()?; (codec.try_decode_expr(extension.expr.as_slice(), &inputs)?) as _ } @@ -412,12 +416,12 @@ pub fn parse_physical_expr( fn parse_required_physical_expr( expr: Option<&protobuf::PhysicalExprNode>, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, field: &str, input_schema: &Schema, codec: &dyn PhysicalExtensionCodec, ) -> Result> { - expr.map(|e| parse_physical_expr(e, registry, input_schema, codec)) + expr.map(|e| parse_physical_expr(e, ctx, input_schema, codec)) .transpose()? .ok_or_else(|| { DataFusionError::Internal(format!("Missing required field {field:?}")) @@ -426,18 +430,14 @@ fn parse_required_physical_expr( pub fn parse_protobuf_hash_partitioning( partitioning: Option<&protobuf::PhysicalHashRepartition>, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, input_schema: &Schema, codec: &dyn PhysicalExtensionCodec, ) -> Result> { match partitioning { Some(hash_part) => { - let expr = parse_physical_exprs( - &hash_part.hash_expr, - registry, - input_schema, - codec, - )?; + let expr = + parse_physical_exprs(&hash_part.hash_expr, ctx, input_schema, codec)?; Ok(Some(Partitioning::Hash( expr, @@ -450,7 +450,7 @@ pub fn parse_protobuf_hash_partitioning( pub fn parse_protobuf_partitioning( partitioning: Option<&protobuf::Partitioning>, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, input_schema: &Schema, codec: &dyn PhysicalExtensionCodec, ) -> Result> { @@ -464,7 +464,7 @@ pub fn parse_protobuf_partitioning( Some(protobuf::partitioning::PartitionMethod::Hash(hash_repartition)) => { parse_protobuf_hash_partitioning( Some(hash_repartition), - registry, + ctx, input_schema, codec, ) @@ -488,7 +488,7 @@ pub fn parse_protobuf_file_scan_schema( pub fn parse_protobuf_file_scan_config( proto: &protobuf::FileScanExecConf, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, codec: &dyn PhysicalExtensionCodec, file_source: Arc, ) -> Result { @@ -536,7 +536,7 @@ pub fn parse_protobuf_file_scan_config( for node_collection in &proto.output_ordering { let sort_exprs = parse_physical_sort_exprs( &node_collection.physical_sort_expr_nodes, - registry, + ctx, &schema, codec, )?; diff --git a/datafusion/proto/src/physical_plan/mod.rs b/datafusion/proto/src/physical_plan/mod.rs index 301b93d53016..c1792c0746cc 100644 --- a/datafusion/proto/src/physical_plan/mod.rs +++ b/datafusion/proto/src/physical_plan/mod.rs @@ -90,6 +90,7 @@ use datafusion_common::config::TableParquetOptions; use datafusion_common::{internal_err, not_impl_err, DataFusionError, Result}; use datafusion_expr::{AggregateUDF, ScalarUDF, WindowUDF}; +use datafusion::prelude::SessionContext; use prost::bytes::BufMut; use prost::Message; @@ -118,7 +119,7 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { fn try_into_physical_plan( &self, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { @@ -130,204 +131,159 @@ impl AsExecutionPlan for protobuf::PhysicalPlanNode { match plan { PhysicalPlanType::Explain(explain) => self.try_into_explain_physical_plan( explain, - registry, + ctx, runtime, extension_codec, ), PhysicalPlanType::Projection(projection) => self .try_into_projection_physical_plan( projection, - registry, + ctx, runtime, extension_codec, ), - PhysicalPlanType::Filter(filter) => self.try_into_filter_physical_plan( - filter, - registry, - runtime, - extension_codec, - ), - PhysicalPlanType::CsvScan(scan) => self.try_into_csv_scan_physical_plan( - scan, - registry, - runtime, - extension_codec, - ), - PhysicalPlanType::JsonScan(scan) => self.try_into_json_scan_physical_plan( - scan, - registry, - runtime, - extension_codec, - ), + PhysicalPlanType::Filter(filter) => { + self.try_into_filter_physical_plan(filter, ctx, runtime, extension_codec) + } + PhysicalPlanType::CsvScan(scan) => { + self.try_into_csv_scan_physical_plan(scan, ctx, runtime, extension_codec) + } + PhysicalPlanType::JsonScan(scan) => { + self.try_into_json_scan_physical_plan(scan, ctx, runtime, extension_codec) + } #[cfg_attr(not(feature = "parquet"), allow(unused_variables))] PhysicalPlanType::ParquetScan(scan) => self - .try_into_parquet_scan_physical_plan( - scan, - registry, - runtime, - extension_codec, - ), + .try_into_parquet_scan_physical_plan(scan, ctx, runtime, extension_codec), #[cfg_attr(not(feature = "avro"), allow(unused_variables))] - PhysicalPlanType::AvroScan(scan) => self.try_into_avro_scan_physical_plan( - scan, - registry, - runtime, - extension_codec, - ), + PhysicalPlanType::AvroScan(scan) => { + self.try_into_avro_scan_physical_plan(scan, ctx, runtime, extension_codec) + } PhysicalPlanType::CoalesceBatches(coalesce_batches) => self .try_into_coalesce_batches_physical_plan( coalesce_batches, - registry, + ctx, runtime, extension_codec, ), - PhysicalPlanType::Merge(merge) => self.try_into_merge_physical_plan( - merge, - registry, - runtime, - extension_codec, - ), + PhysicalPlanType::Merge(merge) => { + self.try_into_merge_physical_plan(merge, ctx, runtime, extension_codec) + } PhysicalPlanType::Repartition(repart) => self .try_into_repartition_physical_plan( repart, - registry, + ctx, runtime, extension_codec, ), PhysicalPlanType::GlobalLimit(limit) => self .try_into_global_limit_physical_plan( limit, - registry, + ctx, runtime, extension_codec, ), PhysicalPlanType::LocalLimit(limit) => self - .try_into_local_limit_physical_plan( - limit, - registry, - runtime, - extension_codec, - ), + .try_into_local_limit_physical_plan(limit, ctx, runtime, extension_codec), PhysicalPlanType::Window(window_agg) => self.try_into_window_physical_plan( window_agg, - registry, + ctx, runtime, extension_codec, ), PhysicalPlanType::Aggregate(hash_agg) => self .try_into_aggregate_physical_plan( hash_agg, - registry, + ctx, runtime, extension_codec, ), PhysicalPlanType::HashJoin(hashjoin) => self .try_into_hash_join_physical_plan( hashjoin, - registry, + ctx, runtime, extension_codec, ), PhysicalPlanType::SymmetricHashJoin(sym_join) => self .try_into_symmetric_hash_join_physical_plan( sym_join, - registry, + ctx, runtime, extension_codec, ), - PhysicalPlanType::Union(union) => self.try_into_union_physical_plan( - union, - registry, - runtime, - extension_codec, - ), + PhysicalPlanType::Union(union) => { + self.try_into_union_physical_plan(union, ctx, runtime, extension_codec) + } PhysicalPlanType::Interleave(interleave) => self .try_into_interleave_physical_plan( interleave, - registry, + ctx, runtime, extension_codec, ), PhysicalPlanType::CrossJoin(crossjoin) => self .try_into_cross_join_physical_plan( crossjoin, - registry, + ctx, runtime, extension_codec, ), - PhysicalPlanType::Empty(empty) => self.try_into_empty_physical_plan( - empty, - registry, - runtime, - extension_codec, - ), + PhysicalPlanType::Empty(empty) => { + self.try_into_empty_physical_plan(empty, ctx, runtime, extension_codec) + } PhysicalPlanType::PlaceholderRow(placeholder) => self .try_into_placeholder_row_physical_plan( placeholder, - registry, + ctx, runtime, extension_codec, ), PhysicalPlanType::Sort(sort) => { - self.try_into_sort_physical_plan(sort, registry, runtime, extension_codec) + self.try_into_sort_physical_plan(sort, ctx, runtime, extension_codec) } PhysicalPlanType::SortPreservingMerge(sort) => self .try_into_sort_preserving_merge_physical_plan( sort, - registry, + ctx, runtime, extension_codec, ), PhysicalPlanType::Extension(extension) => self .try_into_extension_physical_plan( extension, - registry, + ctx, runtime, extension_codec, ), PhysicalPlanType::NestedLoopJoin(join) => self .try_into_nested_loop_join_physical_plan( join, - registry, + ctx, runtime, extension_codec, ), PhysicalPlanType::Analyze(analyze) => self.try_into_analyze_physical_plan( analyze, - registry, - runtime, - extension_codec, - ), - PhysicalPlanType::JsonSink(sink) => self.try_into_json_sink_physical_plan( - sink, - registry, - runtime, - extension_codec, - ), - PhysicalPlanType::CsvSink(sink) => self.try_into_csv_sink_physical_plan( - sink, - registry, + ctx, runtime, extension_codec, ), + PhysicalPlanType::JsonSink(sink) => { + self.try_into_json_sink_physical_plan(sink, ctx, runtime, extension_codec) + } + PhysicalPlanType::CsvSink(sink) => { + self.try_into_csv_sink_physical_plan(sink, ctx, runtime, extension_codec) + } #[cfg_attr(not(feature = "parquet"), allow(unused_variables))] PhysicalPlanType::ParquetSink(sink) => self - .try_into_parquet_sink_physical_plan( - sink, - registry, - runtime, - extension_codec, - ), - PhysicalPlanType::Unnest(unnest) => self.try_into_unnest_physical_plan( - unnest, - registry, - runtime, - extension_codec, - ), + .try_into_parquet_sink_physical_plan(sink, ctx, runtime, extension_codec), + PhysicalPlanType::Unnest(unnest) => { + self.try_into_unnest_physical_plan(unnest, ctx, runtime, extension_codec) + } PhysicalPlanType::Cooperative(cooperative) => self .try_into_cooperative_physical_plan( cooperative, - registry, + ctx, runtime, extension_codec, ), @@ -559,7 +515,7 @@ impl protobuf::PhysicalPlanNode { fn try_into_explain_physical_plan( &self, explain: &protobuf::ExplainExecNode, - _registry: &dyn FunctionRegistry, + _ctx: &SessionContext, _runtime: &RuntimeEnv, _extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { @@ -577,12 +533,12 @@ impl protobuf::PhysicalPlanNode { fn try_into_projection_physical_plan( &self, projection: &protobuf::ProjectionExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { let input: Arc = - into_physical_plan(&projection.input, registry, runtime, extension_codec)?; + into_physical_plan(&projection.input, ctx, runtime, extension_codec)?; let exprs = projection .expr .iter() @@ -591,7 +547,7 @@ impl protobuf::PhysicalPlanNode { Ok(( parse_physical_expr( expr, - registry, + ctx, input.schema().as_ref(), extension_codec, )?, @@ -605,12 +561,12 @@ impl protobuf::PhysicalPlanNode { fn try_into_filter_physical_plan( &self, filter: &protobuf::FilterExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { let input: Arc = - into_physical_plan(&filter.input, registry, runtime, extension_codec)?; + into_physical_plan(&filter.input, ctx, runtime, extension_codec)?; let projection = if !filter.projection.is_empty() { Some( filter @@ -639,12 +595,7 @@ impl protobuf::PhysicalPlanNode { .expr .as_ref() .map(|expr| { - parse_physical_expr( - expr, - registry, - predicate_schema.as_ref(), - extension_codec, - ) + parse_physical_expr(expr, ctx, predicate_schema.as_ref(), extension_codec) }) .transpose()? .ok_or_else(|| { @@ -668,7 +619,7 @@ impl protobuf::PhysicalPlanNode { fn try_into_csv_scan_physical_plan( &self, scan: &protobuf::CsvScanExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, _runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { @@ -702,7 +653,7 @@ impl protobuf::PhysicalPlanNode { let conf = FileScanConfigBuilder::from(parse_protobuf_file_scan_config( scan.base_conf.as_ref().unwrap(), - registry, + ctx, extension_codec, source, )?) @@ -715,13 +666,13 @@ impl protobuf::PhysicalPlanNode { fn try_into_json_scan_physical_plan( &self, scan: &protobuf::JsonScanExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, _runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { let scan_conf = parse_protobuf_file_scan_config( scan.base_conf.as_ref().unwrap(), - registry, + ctx, extension_codec, Arc::new(JsonSource::new()), )?; @@ -732,7 +683,7 @@ impl protobuf::PhysicalPlanNode { fn try_into_parquet_scan_physical_plan( &self, scan: &protobuf::ParquetScanExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, _runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { @@ -761,7 +712,7 @@ impl protobuf::PhysicalPlanNode { .map(|expr| { parse_physical_expr( expr, - registry, + ctx, predicate_schema.as_ref(), extension_codec, ) @@ -779,7 +730,7 @@ impl protobuf::PhysicalPlanNode { } let base_config = parse_protobuf_file_scan_config( base_conf, - registry, + ctx, extension_codec, Arc::new(source), )?; @@ -793,7 +744,7 @@ impl protobuf::PhysicalPlanNode { fn try_into_avro_scan_physical_plan( &self, scan: &protobuf::AvroScanExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, _runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { @@ -801,7 +752,7 @@ impl protobuf::PhysicalPlanNode { { let conf = parse_protobuf_file_scan_config( scan.base_conf.as_ref().unwrap(), - registry, + ctx, extension_codec, Arc::new(AvroSource::new()), )?; @@ -814,16 +765,12 @@ impl protobuf::PhysicalPlanNode { fn try_into_coalesce_batches_physical_plan( &self, coalesce_batches: &protobuf::CoalesceBatchesExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { - let input: Arc = into_physical_plan( - &coalesce_batches.input, - registry, - runtime, - extension_codec, - )?; + let input: Arc = + into_physical_plan(&coalesce_batches.input, ctx, runtime, extension_codec)?; Ok(Arc::new( CoalesceBatchesExec::new(input, coalesce_batches.target_batch_size as usize) .with_fetch(coalesce_batches.fetch.map(|f| f as usize)), @@ -833,12 +780,12 @@ impl protobuf::PhysicalPlanNode { fn try_into_merge_physical_plan( &self, merge: &protobuf::CoalescePartitionsExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { let input: Arc = - into_physical_plan(&merge.input, registry, runtime, extension_codec)?; + into_physical_plan(&merge.input, ctx, runtime, extension_codec)?; Ok(Arc::new( CoalescePartitionsExec::new(input) .with_fetch(merge.fetch.map(|f| f as usize)), @@ -848,15 +795,15 @@ impl protobuf::PhysicalPlanNode { fn try_into_repartition_physical_plan( &self, repart: &protobuf::RepartitionExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { let input: Arc = - into_physical_plan(&repart.input, registry, runtime, extension_codec)?; + into_physical_plan(&repart.input, ctx, runtime, extension_codec)?; let partitioning = parse_protobuf_partitioning( repart.partitioning.as_ref(), - registry, + ctx, input.schema().as_ref(), extension_codec, )?; @@ -869,12 +816,12 @@ impl protobuf::PhysicalPlanNode { fn try_into_global_limit_physical_plan( &self, limit: &protobuf::GlobalLimitExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { let input: Arc = - into_physical_plan(&limit.input, registry, runtime, extension_codec)?; + into_physical_plan(&limit.input, ctx, runtime, extension_codec)?; let fetch = if limit.fetch >= 0 { Some(limit.fetch as usize) } else { @@ -890,24 +837,24 @@ impl protobuf::PhysicalPlanNode { fn try_into_local_limit_physical_plan( &self, limit: &protobuf::LocalLimitExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { let input: Arc = - into_physical_plan(&limit.input, registry, runtime, extension_codec)?; + into_physical_plan(&limit.input, ctx, runtime, extension_codec)?; Ok(Arc::new(LocalLimitExec::new(input, limit.fetch as usize))) } fn try_into_window_physical_plan( &self, window_agg: &protobuf::WindowAggExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { let input: Arc = - into_physical_plan(&window_agg.input, registry, runtime, extension_codec)?; + into_physical_plan(&window_agg.input, ctx, runtime, extension_codec)?; let input_schema = input.schema(); let physical_window_expr: Vec> = window_agg @@ -916,7 +863,7 @@ impl protobuf::PhysicalPlanNode { .map(|window_expr| { parse_physical_window_expr( window_expr, - registry, + ctx, input_schema.as_ref(), extension_codec, ) @@ -927,12 +874,7 @@ impl protobuf::PhysicalPlanNode { .partition_keys .iter() .map(|expr| { - parse_physical_expr( - expr, - registry, - input.schema().as_ref(), - extension_codec, - ) + parse_physical_expr(expr, ctx, input.schema().as_ref(), extension_codec) }) .collect::>>>()?; @@ -965,12 +907,12 @@ impl protobuf::PhysicalPlanNode { fn try_into_aggregate_physical_plan( &self, hash_agg: &protobuf::AggregateExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { let input: Arc = - into_physical_plan(&hash_agg.input, registry, runtime, extension_codec)?; + into_physical_plan(&hash_agg.input, ctx, runtime, extension_codec)?; let mode = protobuf::AggregateMode::try_from(hash_agg.mode).map_err(|_| { proto_error(format!( "Received a AggregateNode message with unknown AggregateMode {}", @@ -994,13 +936,8 @@ impl protobuf::PhysicalPlanNode { .iter() .zip(hash_agg.group_expr_name.iter()) .map(|(expr, name)| { - parse_physical_expr( - expr, - registry, - input.schema().as_ref(), - extension_codec, - ) - .map(|expr| (expr, name.to_string())) + parse_physical_expr(expr, ctx, input.schema().as_ref(), extension_codec) + .map(|expr| (expr, name.to_string())) }) .collect::, _>>()?; @@ -1009,13 +946,8 @@ impl protobuf::PhysicalPlanNode { .iter() .zip(hash_agg.group_expr_name.iter()) .map(|(expr, name)| { - parse_physical_expr( - expr, - registry, - input.schema().as_ref(), - extension_codec, - ) - .map(|expr| (expr, name.to_string())) + parse_physical_expr(expr, ctx, input.schema().as_ref(), extension_codec) + .map(|expr| (expr, name.to_string())) }) .collect::, _>>()?; @@ -1043,12 +975,7 @@ impl protobuf::PhysicalPlanNode { expr.expr .as_ref() .map(|e| { - parse_physical_expr( - e, - registry, - &physical_schema, - extension_codec, - ) + parse_physical_expr(e, ctx, &physical_schema, extension_codec) }) .transpose() }) @@ -1071,7 +998,7 @@ impl protobuf::PhysicalPlanNode { .map(|e| { parse_physical_expr( e, - registry, + ctx, &physical_schema, extension_codec, ) @@ -1083,7 +1010,7 @@ impl protobuf::PhysicalPlanNode { .map(|e| { parse_physical_sort_expr( e, - registry, + ctx, &physical_schema, extension_codec, ) @@ -1097,12 +1024,10 @@ impl protobuf::PhysicalPlanNode { let agg_udf = match &agg_node.fun_definition { Some(buf) => extension_codec .try_decode_udaf(udaf_name, buf)?, - None => { - registry.udaf(udaf_name).or_else(|_| { - extension_codec - .try_decode_udaf(udaf_name, &[]) - })? - } + None => ctx.udaf(udaf_name).or_else(|_| { + extension_codec + .try_decode_udaf(udaf_name, &[]) + })?, }; AggregateExprBuilder::new(agg_udf, input_phy_expr) @@ -1150,14 +1075,14 @@ impl protobuf::PhysicalPlanNode { fn try_into_hash_join_physical_plan( &self, hashjoin: &protobuf::HashJoinExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { let left: Arc = - into_physical_plan(&hashjoin.left, registry, runtime, extension_codec)?; + into_physical_plan(&hashjoin.left, ctx, runtime, extension_codec)?; let right: Arc = - into_physical_plan(&hashjoin.right, registry, runtime, extension_codec)?; + into_physical_plan(&hashjoin.right, ctx, runtime, extension_codec)?; let left_schema = left.schema(); let right_schema = right.schema(); let on: Vec<(PhysicalExprRef, PhysicalExprRef)> = hashjoin @@ -1166,13 +1091,13 @@ impl protobuf::PhysicalPlanNode { .map(|col| { let left = parse_physical_expr( &col.left.clone().unwrap(), - registry, + ctx, left_schema.as_ref(), extension_codec, )?; let right = parse_physical_expr( &col.right.clone().unwrap(), - registry, + ctx, right_schema.as_ref(), extension_codec, )?; @@ -1207,7 +1132,7 @@ impl protobuf::PhysicalPlanNode { f.expression.as_ref().ok_or_else(|| { proto_error("Unexpected empty filter expression") })?, - registry, &schema, + ctx, &schema, extension_codec, )?; let column_indices = f.column_indices @@ -1268,14 +1193,12 @@ impl protobuf::PhysicalPlanNode { fn try_into_symmetric_hash_join_physical_plan( &self, sym_join: &protobuf::SymmetricHashJoinExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { - let left = - into_physical_plan(&sym_join.left, registry, runtime, extension_codec)?; - let right = - into_physical_plan(&sym_join.right, registry, runtime, extension_codec)?; + let left = into_physical_plan(&sym_join.left, ctx, runtime, extension_codec)?; + let right = into_physical_plan(&sym_join.right, ctx, runtime, extension_codec)?; let left_schema = left.schema(); let right_schema = right.schema(); let on = sym_join @@ -1284,13 +1207,13 @@ impl protobuf::PhysicalPlanNode { .map(|col| { let left = parse_physical_expr( &col.left.clone().unwrap(), - registry, + ctx, left_schema.as_ref(), extension_codec, )?; let right = parse_physical_expr( &col.right.clone().unwrap(), - registry, + ctx, right_schema.as_ref(), extension_codec, )?; @@ -1325,7 +1248,7 @@ impl protobuf::PhysicalPlanNode { f.expression.as_ref().ok_or_else(|| { proto_error("Unexpected empty filter expression") })?, - registry, &schema, + ctx, &schema, extension_codec, )?; let column_indices = f.column_indices @@ -1350,7 +1273,7 @@ impl protobuf::PhysicalPlanNode { let left_sort_exprs = parse_physical_sort_exprs( &sym_join.left_sort_exprs, - registry, + ctx, &left_schema, extension_codec, )?; @@ -1358,7 +1281,7 @@ impl protobuf::PhysicalPlanNode { let right_sort_exprs = parse_physical_sort_exprs( &sym_join.right_sort_exprs, - registry, + ctx, &right_schema, extension_codec, )?; @@ -1398,17 +1321,13 @@ impl protobuf::PhysicalPlanNode { fn try_into_union_physical_plan( &self, union: &protobuf::UnionExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { let mut inputs: Vec> = vec![]; for input in &union.inputs { - inputs.push(input.try_into_physical_plan( - registry, - runtime, - extension_codec, - )?); + inputs.push(input.try_into_physical_plan(ctx, runtime, extension_codec)?); } Ok(Arc::new(UnionExec::new(inputs))) } @@ -1416,17 +1335,13 @@ impl protobuf::PhysicalPlanNode { fn try_into_interleave_physical_plan( &self, interleave: &protobuf::InterleaveExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { let mut inputs: Vec> = vec![]; for input in &interleave.inputs { - inputs.push(input.try_into_physical_plan( - registry, - runtime, - extension_codec, - )?); + inputs.push(input.try_into_physical_plan(ctx, runtime, extension_codec)?); } Ok(Arc::new(InterleaveExec::try_new(inputs)?)) } @@ -1434,21 +1349,21 @@ impl protobuf::PhysicalPlanNode { fn try_into_cross_join_physical_plan( &self, crossjoin: &protobuf::CrossJoinExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { let left: Arc = - into_physical_plan(&crossjoin.left, registry, runtime, extension_codec)?; + into_physical_plan(&crossjoin.left, ctx, runtime, extension_codec)?; let right: Arc = - into_physical_plan(&crossjoin.right, registry, runtime, extension_codec)?; + into_physical_plan(&crossjoin.right, ctx, runtime, extension_codec)?; Ok(Arc::new(CrossJoinExec::new(left, right))) } fn try_into_empty_physical_plan( &self, empty: &protobuf::EmptyExecNode, - _registry: &dyn FunctionRegistry, + _ctx: &SessionContext, _runtime: &RuntimeEnv, _extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { @@ -1459,7 +1374,7 @@ impl protobuf::PhysicalPlanNode { fn try_into_placeholder_row_physical_plan( &self, placeholder: &protobuf::PlaceholderRowExecNode, - _registry: &dyn FunctionRegistry, + _ctx: &SessionContext, _runtime: &RuntimeEnv, _extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { @@ -1470,11 +1385,11 @@ impl protobuf::PhysicalPlanNode { fn try_into_sort_physical_plan( &self, sort: &protobuf::SortExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { - let input = into_physical_plan(&sort.input, registry, runtime, extension_codec)?; + let input = into_physical_plan(&sort.input, ctx, runtime, extension_codec)?; let exprs = sort .expr .iter() @@ -1495,7 +1410,7 @@ impl protobuf::PhysicalPlanNode { })? .as_ref(); Ok(PhysicalSortExpr { - expr: parse_physical_expr(expr, registry, input.schema().as_ref(), extension_codec)?, + expr: parse_physical_expr(expr, ctx, input.schema().as_ref(), extension_codec)?, options: SortOptions { descending: !sort_expr.asc, nulls_first: sort_expr.nulls_first, @@ -1522,11 +1437,11 @@ impl protobuf::PhysicalPlanNode { fn try_into_sort_preserving_merge_physical_plan( &self, sort: &protobuf::SortPreservingMergeExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { - let input = into_physical_plan(&sort.input, registry, runtime, extension_codec)?; + let input = into_physical_plan(&sort.input, ctx, runtime, extension_codec)?; let exprs = sort .expr .iter() @@ -1549,7 +1464,7 @@ impl protobuf::PhysicalPlanNode { Ok(PhysicalSortExpr { expr: parse_physical_expr( expr, - registry, + ctx, input.schema().as_ref(), extension_codec, )?, @@ -1575,18 +1490,18 @@ impl protobuf::PhysicalPlanNode { fn try_into_extension_physical_plan( &self, extension: &protobuf::PhysicalExtensionNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { let inputs: Vec> = extension .inputs .iter() - .map(|i| i.try_into_physical_plan(registry, runtime, extension_codec)) + .map(|i| i.try_into_physical_plan(ctx, runtime, extension_codec)) .collect::>()?; let extension_node = - extension_codec.try_decode(extension.node.as_slice(), &inputs, registry)?; + extension_codec.try_decode(extension.node.as_slice(), &inputs, ctx)?; Ok(extension_node) } @@ -1594,14 +1509,14 @@ impl protobuf::PhysicalPlanNode { fn try_into_nested_loop_join_physical_plan( &self, join: &protobuf::NestedLoopJoinExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { let left: Arc = - into_physical_plan(&join.left, registry, runtime, extension_codec)?; + into_physical_plan(&join.left, ctx, runtime, extension_codec)?; let right: Arc = - into_physical_plan(&join.right, registry, runtime, extension_codec)?; + into_physical_plan(&join.right, ctx, runtime, extension_codec)?; let join_type = protobuf::JoinType::try_from(join.join_type).map_err(|_| { proto_error(format!( "Received a NestedLoopJoinExecNode message with unknown JoinType {}", @@ -1622,7 +1537,7 @@ impl protobuf::PhysicalPlanNode { f.expression.as_ref().ok_or_else(|| { proto_error("Unexpected empty filter expression") })?, - registry, &schema, + ctx, &schema, extension_codec, )?; let column_indices = f.column_indices @@ -1668,12 +1583,12 @@ impl protobuf::PhysicalPlanNode { fn try_into_analyze_physical_plan( &self, analyze: &protobuf::AnalyzeExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { let input: Arc = - into_physical_plan(&analyze.input, registry, runtime, extension_codec)?; + into_physical_plan(&analyze.input, ctx, runtime, extension_codec)?; Ok(Arc::new(AnalyzeExec::new( analyze.verbose, analyze.show_statistics, @@ -1685,11 +1600,11 @@ impl protobuf::PhysicalPlanNode { fn try_into_json_sink_physical_plan( &self, sink: &protobuf::JsonSinkExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { - let input = into_physical_plan(&sink.input, registry, runtime, extension_codec)?; + let input = into_physical_plan(&sink.input, ctx, runtime, extension_codec)?; let data_sink: JsonSink = sink .sink @@ -1703,7 +1618,7 @@ impl protobuf::PhysicalPlanNode { .map(|collection| { parse_physical_sort_exprs( &collection.physical_sort_expr_nodes, - registry, + ctx, &sink_schema, extension_codec, ) @@ -1723,11 +1638,11 @@ impl protobuf::PhysicalPlanNode { fn try_into_csv_sink_physical_plan( &self, sink: &protobuf::CsvSinkExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { - let input = into_physical_plan(&sink.input, registry, runtime, extension_codec)?; + let input = into_physical_plan(&sink.input, ctx, runtime, extension_codec)?; let data_sink: CsvSink = sink .sink @@ -1741,7 +1656,7 @@ impl protobuf::PhysicalPlanNode { .map(|collection| { parse_physical_sort_exprs( &collection.physical_sort_expr_nodes, - registry, + ctx, &sink_schema, extension_codec, ) @@ -1761,14 +1676,13 @@ impl protobuf::PhysicalPlanNode { fn try_into_parquet_sink_physical_plan( &self, sink: &protobuf::ParquetSinkExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { #[cfg(feature = "parquet")] { - let input = - into_physical_plan(&sink.input, registry, runtime, extension_codec)?; + let input = into_physical_plan(&sink.input, ctx, runtime, extension_codec)?; let data_sink: ParquetSink = sink .sink @@ -1782,7 +1696,7 @@ impl protobuf::PhysicalPlanNode { .map(|collection| { parse_physical_sort_exprs( &collection.physical_sort_expr_nodes, - registry, + ctx, &sink_schema, extension_codec, ) @@ -1805,12 +1719,11 @@ impl protobuf::PhysicalPlanNode { fn try_into_unnest_physical_plan( &self, unnest: &protobuf::UnnestExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { - let input = - into_physical_plan(&unnest.input, registry, runtime, extension_codec)?; + let input = into_physical_plan(&unnest.input, ctx, runtime, extension_codec)?; Ok(Arc::new(UnnestExec::new( input, @@ -1831,12 +1744,12 @@ impl protobuf::PhysicalPlanNode { fn try_into_cooperative_physical_plan( &self, field_stream: &protobuf::CooperativeExecNode, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { let input = - into_physical_plan(&field_stream.input, registry, runtime, extension_codec)?; + into_physical_plan(&field_stream.input, ctx, runtime, extension_codec)?; Ok(Arc::new(CooperativeExec::new(input))) } @@ -2853,7 +2766,7 @@ pub trait AsExecutionPlan: Debug + Send + Sync + Clone { fn try_into_physical_plan( &self, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result>; @@ -3057,12 +2970,12 @@ impl PhysicalExtensionCodec for ComposedPhysicalExtensionCodec { fn into_physical_plan( node: &Option>, - registry: &dyn FunctionRegistry, + ctx: &SessionContext, runtime: &RuntimeEnv, extension_codec: &dyn PhysicalExtensionCodec, ) -> Result> { if let Some(field) = node { - field.try_into_physical_plan(registry, runtime, extension_codec) + field.try_into_physical_plan(ctx, runtime, extension_codec) } else { Err(proto_error("Missing required field in protobuf")) } diff --git a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs index f8fa1020bc17..7646e6b9a6d5 100644 --- a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs @@ -93,7 +93,7 @@ use datafusion::physical_plan::{ }; use datafusion::prelude::{ParquetReadOptions, SessionContext}; use datafusion::scalar::ScalarValue; -use datafusion_common::config::TableParquetOptions; +use datafusion_common::config::{ConfigOptions, TableParquetOptions}; use datafusion_common::file_options::csv_writer::CsvWriterOptions; use datafusion_common::file_options::json_writer::JsonWriterOptions; use datafusion_common::parsers::CompressionTypeVariant; @@ -989,6 +989,7 @@ fn roundtrip_scalar_udf() -> Result<()> { fun_def, vec![col("a", &schema)?], Field::new("f", DataType::Int64, true).into(), + Arc::new(ConfigOptions::default()), ); let project = @@ -1117,6 +1118,7 @@ fn roundtrip_scalar_udf_extension_codec() -> Result<()> { Arc::new(ScalarUDF::from(MyRegexUdf::new(".*".to_string()))), vec![col("text", &schema)?], Field::new("f", DataType::Int64, true).into(), + Arc::new(ConfigOptions::default()), )); let filter = Arc::new(FilterExec::try_new( @@ -1219,6 +1221,7 @@ fn roundtrip_aggregate_udf_extension_codec() -> Result<()> { Arc::new(ScalarUDF::from(MyRegexUdf::new(".*".to_string()))), vec![col("text", &schema)?], Field::new("f", DataType::Int64, true).into(), + Arc::new(ConfigOptions::default()), )); let udaf = Arc::new(AggregateUDF::from(MyAggregateUDF::new( diff --git a/datafusion/spark/benches/char.rs b/datafusion/spark/benches/char.rs index b7819b5dd6a4..e30e21f69d18 100644 --- a/datafusion/spark/benches/char.rs +++ b/datafusion/spark/benches/char.rs @@ -20,6 +20,7 @@ extern crate criterion; use arrow::datatypes::{DataType, Field}; use arrow::{array::PrimitiveArray, datatypes::Int64Type}; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_common::config::ConfigOptions; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_spark::function::string::char; use rand::rngs::StdRng; @@ -54,6 +55,7 @@ fn criterion_benchmark(c: &mut Criterion) { .enumerate() .map(|(idx, arg)| Field::new(format!("arg_{idx}"), arg.data_type(), true).into()) .collect::>(); + let config_options = Arc::new(ConfigOptions::default()); c.bench_function("char", |b| { b.iter(|| { @@ -64,6 +66,7 @@ fn criterion_benchmark(c: &mut Criterion) { arg_fields: arg_fields.clone(), number_rows: size, return_field: Arc::new(Field::new("f", DataType::Utf8, true)), + config_options: Arc::clone(&config_options), }) .unwrap(), ) diff --git a/datafusion/spark/src/function/utils.rs b/datafusion/spark/src/function/utils.rs index 85af4bb927ca..0db11e6f1b4e 100644 --- a/datafusion/spark/src/function/utils.rs +++ b/datafusion/spark/src/function/utils.rs @@ -23,8 +23,9 @@ pub mod test { /// $EXPECTED_TYPE is the expected value type /// $EXPECTED_DATA_TYPE is the expected result type /// $ARRAY_TYPE is the column type after function applied + /// $CONFIG_OPTIONS config options to pass to function macro_rules! test_scalar_function { - ($FUNC:expr, $ARGS:expr, $EXPECTED:expr, $EXPECTED_TYPE:ty, $EXPECTED_DATA_TYPE:expr, $ARRAY_TYPE:ident) => { + ($FUNC:expr, $ARGS:expr, $EXPECTED:expr, $EXPECTED_TYPE:ty, $EXPECTED_DATA_TYPE:expr, $ARRAY_TYPE:ident, $CONFIG_OPTIONS:expr) => { let expected: datafusion_common::Result> = $EXPECTED; let func = $FUNC; @@ -72,6 +73,7 @@ pub mod test { number_rows: cardinality, return_field, arg_fields: arg_fields.clone(), + config_options: $CONFIG_OPTIONS, }); assert_eq!(result.is_ok(), true, "function returned an error: {}", result.unwrap_err()); @@ -101,6 +103,7 @@ pub mod test { number_rows: cardinality, return_field, arg_fields, + config_options: $CONFIG_OPTIONS, }) { Ok(_) => assert!(false, "expected error"), Err(error) => { @@ -111,6 +114,18 @@ pub mod test { } }; }; + + ($FUNC:expr, $ARGS:expr, $EXPECTED:expr, $EXPECTED_TYPE:ty, $EXPECTED_DATA_TYPE:expr, $ARRAY_TYPE:ident) => { + test_scalar_function!( + $FUNC, + $ARGS, + $EXPECTED, + $EXPECTED_TYPE, + $EXPECTED_DATA_TYPE, + $ARRAY_TYPE, + std::sync::Arc::new(datafusion_common::config::ConfigOptions::default()) + ) + }; } pub(crate) use test_scalar_function; diff --git a/docs/source/library-user-guide/functions/adding-udfs.md b/docs/source/library-user-guide/functions/adding-udfs.md index 5c95cb330179..da9b6e37a644 100644 --- a/docs/source/library-user-guide/functions/adding-udfs.md +++ b/docs/source/library-user-guide/functions/adding-udfs.md @@ -449,7 +449,6 @@ impl AsyncScalarUDFImpl for AsyncUpper { async fn invoke_async_with_args( &self, args: ScalarFunctionArgs, - _option: &ConfigOptions, ) -> Result { let value = &args.args[0]; // This function simply implements a simple string to uppercase conversion @@ -549,7 +548,6 @@ We can now transfer the async UDF into the normal scalar using `into_scalar_udf` # async fn invoke_async_with_args( # &self, # args: ScalarFunctionArgs, -# _option: &ConfigOptions, # ) -> Result { # trace!("Invoking async_upper with args: {:?}", args); # let value = &args.args[0]; diff --git a/docs/source/library-user-guide/upgrading.md b/docs/source/library-user-guide/upgrading.md index 0c9e10476b9e..eece034ce33e 100644 --- a/docs/source/library-user-guide/upgrading.md +++ b/docs/source/library-user-guide/upgrading.md @@ -24,6 +24,76 @@ **Note:** DataFusion `50.0.0` has not been released yet. The information provided in this section pertains to features and changes that have already been merged to the main branch and are awaiting release in this version. You can see the current [status of the `50.0.0 `release here](https://github.com/apache/datafusion/issues/16799) +### `SessionState`, `SessionConfig`, and `OptimizerConfig` returns `&Arc` instead of `&ConfigOptions` + +To provide broader access to `ConfigOptions` and reduce required clones, some +APIs have been changed to return a `&Arc` instead of a +`&ConfigOptions`. This allows sharing the same `ConfigOptions` across multiple +threads without needing to clone the entire `ConfigOptions` structure unless it +is modified. + +Most users will not be impacted by this change since the Rust compiler typically +automatically dereference the `Arc` when needed. However, in some cases you may +have to change your code to explicitly call `as_ref()` for example, from + +```rust +# /* comment to avoid running +let optimizer_config: &ConfigOptions = state.options(); +# */ +``` + +To + +```rust +# /* comment to avoid running +let optimizer_config: &ConfigOptions = state.options().as_ref(); +# */ +``` + +See PR [#16970](https://github.com/apache/datafusion/pull/16970) + +### API Change to `AsyncScalarUDFImpl::invoke_async_with_args` + +The `invoke_async_with_args` method of the `AsyncScalarUDFImpl` trait has been +updated to remove the `_option: &ConfigOptions` parameter to simplify the API +now that the `ConfigOptions` can be accessed through the `ScalarFunctionArgs` +parameter. + +You can change your code like this + +```rust +# /* comment to avoid running +impl AsyncScalarUDFImpl for AskLLM { + async fn invoke_async_with_args( + &self, + args: ScalarFunctionArgs, + _option: &ConfigOptions, + ) -> Result { + .. + } + ... +} +# */ +``` + +To this: + +```rust +# /* comment to avoid running + +impl AsyncScalarUDFImpl for AskLLM { + async fn invoke_async_with_args( + &self, + args: ScalarFunctionArgs, + ) -> Result { + let options = &args.config_options; + .. + } + ... +} +# */ +``` + ### Upgrade to arrow `56.0.0` and parquet `56.0.0` This version of DataFusion upgrades the underlying Apache Arrow implementation