From f97d5014878d96a88af60efda93b710e033625f4 Mon Sep 17 00:00:00 2001 From: yazan Date: Fri, 25 Jul 2025 19:23:33 +0200 Subject: [PATCH] we have now the CI ensure all doc strings remain formatted --- .github/workflows/ci.yml | 18 ++ .../examples/advanced_parquet_index.rs | 1 - .../external_dependency/query-aws-s3.rs | 1 - .../examples/flight/flight_sql_server.rs | 1 - datafusion-examples/examples/parquet_index.rs | 1 - datafusion-examples/examples/sql_query.rs | 1 - datafusion-examples/examples/thread_pools.rs | 2 +- datafusion/common/src/config.rs | 47 ++- datafusion/common/src/dfschema.rs | 22 +- datafusion/common/src/diagnostic.rs | 7 +- datafusion/common/src/error.rs | 17 +- datafusion/common/src/scalar/mod.rs | 160 +++++----- .../common/src/scalar/struct_builder.rs | 12 +- datafusion/common/src/stats.rs | 46 +-- datafusion/common/src/table_reference.rs | 12 +- datafusion/common/src/test_util.rs | 4 +- datafusion/common/src/tree_node.rs | 13 +- datafusion/common/src/types/logical.rs | 12 +- datafusion/common/src/utils/memory.rs | 8 +- datafusion/common/src/utils/mod.rs | 43 +-- datafusion/common/src/utils/proxy.rs | 16 +- datafusion/core/src/dataframe/mod.rs | 282 ++++++++++++------ datafusion/core/src/dataframe/parquet.rs | 16 +- .../core/src/datasource/listing/table.rs | 13 +- datafusion/core/src/execution/context/csv.rs | 11 +- datafusion/core/src/execution/context/mod.rs | 122 ++++---- .../core/src/execution/session_state.rs | 14 +- datafusion/core/src/lib.rs | 34 ++- .../aggregation_fuzzer/context_generator.rs | 1 - .../aggregation_fuzzer/data_generator.rs | 2 - .../fuzz_cases/aggregation_fuzzer/fuzzer.rs | 1 - .../aggregation_fuzzer/query_builder.rs | 4 +- .../physical_optimizer/enforce_sorting.rs | 1 - datafusion/core/tests/sql/mod.rs | 1 - .../tests/user_defined/user_defined_plan.rs | 1 - .../src/avro_to_arrow/reader.rs | 8 +- .../datasource-parquet/src/page_filter.rs | 1 - datafusion/datasource-parquet/src/source.rs | 2 - datafusion/datasource/src/file_scan_config.rs | 36 +-- datafusion/datasource/src/mod.rs | 1 - datafusion/datasource/src/url.rs | 1 - datafusion/datasource/src/write/mod.rs | 12 +- datafusion/doc/src/lib.rs | 2 +- datafusion/execution/src/config.rs | 13 +- datafusion/execution/src/runtime_env.rs | 6 +- .../expr-common/src/interval_arithmetic.rs | 63 ++-- datafusion/expr-common/src/signature.rs | 14 +- datafusion/expr/src/expr.rs | 61 ++-- datafusion/expr/src/expr_schema.rs | 21 +- datafusion/expr/src/logical_plan/builder.rs | 15 +- datafusion/expr/src/logical_plan/display.rs | 12 +- datafusion/expr/src/logical_plan/extension.rs | 28 +- datafusion/expr/src/logical_plan/plan.rs | 77 ++--- datafusion/expr/src/select_expr.rs | 6 +- datafusion/expr/src/udaf.rs | 1 - datafusion/expr/src/udf.rs | 8 +- datafusion/expr/src/utils.rs | 21 +- datafusion/expr/src/window_frame.rs | 1 - .../src/aggregate/groups_accumulator.rs | 5 +- .../groups_accumulator/accumulate.rs | 1 - .../aggregate/groups_accumulator/prim_op.rs | 1 - .../functions-aggregate-common/src/tdigest.rs | 3 +- datafusion/functions-aggregate/src/count.rs | 4 +- datafusion/functions-aggregate/src/median.rs | 1 - datafusion/functions-nested/src/expr_ext.rs | 6 +- datafusion/functions-nested/src/lib.rs | 1 - .../functions-window-common/src/expr.rs | 1 - .../functions-window-common/src/field.rs | 1 - .../functions-window-common/src/partition.rs | 1 - datafusion/functions-window/src/lib.rs | 1 - datafusion/functions-window/src/nth_value.rs | 3 - datafusion/functions/benches/ltrim.rs | 1 - datafusion/functions/src/core/expr_ext.rs | 3 +- datafusion/functions/src/core/nullif.rs | 1 - datafusion/functions/src/datetime/common.rs | 2 - datafusion/macros/src/user_doc.rs | 1 - datafusion/optimizer/src/push_down_filter.rs | 4 - datafusion/optimizer/src/push_down_limit.rs | 1 - .../simplify_expressions/expr_simplifier.rs | 110 ++++--- .../src/simplify_expressions/unwrap_cast.rs | 1 - .../physical-expr-common/src/physical_expr.rs | 1 - .../src/equivalence/properties/mod.rs | 9 +- .../physical-expr/src/expressions/column.rs | 6 +- .../physical-expr/src/intervals/cp_solver.rs | 8 +- datafusion/physical-expr/src/physical_expr.rs | 16 +- .../src/combine_partial_final_agg.rs | 1 - .../src/enforce_distribution.rs | 1 - .../physical-optimizer/src/join_selection.rs | 1 - .../src/aggregates/group_values/mod.rs | 1 - .../group_values/multi_group_by/mod.rs | 12 - .../group_values/single_group_by/primitive.rs | 1 - .../src/aggregates/order/partial.rs | 2 +- .../physical-plan/src/aggregates/row_hash.rs | 3 +- datafusion/physical-plan/src/coalesce/mod.rs | 1 - .../physical-plan/src/execution_plan.rs | 21 +- .../physical-plan/src/joins/hash_join.rs | 7 +- .../src/joins/stream_join_utils.rs | 1 - .../src/joins/symmetric_hash_join.rs | 1 - datafusion/physical-plan/src/joins/utils.rs | 1 - .../physical-plan/src/metrics/builder.rs | 19 +- .../physical-plan/src/metrics/custom.rs | 3 +- datafusion/physical-plan/src/metrics/mod.rs | 25 +- .../physical-plan/src/recursive_query.rs | 1 - .../physical-plan/src/repartition/mod.rs | 11 +- datafusion/physical-plan/src/sorts/cursor.rs | 2 +- datafusion/physical-plan/src/sorts/merge.rs | 1 - .../physical-plan/src/sorts/partial_sort.rs | 4 +- datafusion/physical-plan/src/sorts/sort.rs | 1 - datafusion/physical-plan/src/stream.rs | 15 +- datafusion/physical-plan/src/test/exec.rs | 1 - datafusion/physical-plan/src/union.rs | 2 +- datafusion/physical-plan/src/unnest.rs | 6 +- datafusion/proto-common/src/lib.rs | 43 +-- datafusion/proto/src/lib.rs | 14 +- datafusion/pruning/src/pruning_predicate.rs | 3 +- datafusion/session/src/session.rs | 9 +- datafusion/spark/src/lib.rs | 2 +- datafusion/sql/src/parser.rs | 13 +- datafusion/sql/src/planner.rs | 1 - datafusion/sql/src/resolve.rs | 12 +- datafusion/sql/src/unparser/expr.rs | 2 +- datafusion/sql/src/unparser/plan.rs | 7 +- datafusion/sql/src/utils.rs | 1 - datafusion/sql/tests/cases/diagnostic.rs | 4 +- .../engines/datafusion_engine/normalize.rs | 1 - datafusion/substrait/src/lib.rs | 27 +- .../consumer/substrait_consumer.rs | 1 - rustfmt.toml | 1 + 128 files changed, 966 insertions(+), 870 deletions(-) create mode 100644 .github/workflows/ci.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 000000000000..f003dd10e1a8 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,18 @@ +name: CI + +on: + push: + branches: [ main ] + pull_request: + +jobs: + fmt: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Install Rust nightly + run: rustup toolchain install nightly + + - name: Run rustfmt (nightly) + run: cargo +nightly fmt --all -- --config format_code_in_doc_comments=true diff --git a/datafusion-examples/examples/advanced_parquet_index.rs b/datafusion-examples/examples/advanced_parquet_index.rs index efaee23366a1..e4fbdcd65335 100644 --- a/datafusion-examples/examples/advanced_parquet_index.rs +++ b/datafusion-examples/examples/advanced_parquet_index.rs @@ -121,7 +121,6 @@ use url::Url; /// │ ╚═══════════════════╝ │ 1. With cached ParquetMetadata, so /// └───────────────────────┘ the ParquetSource does not re-read / /// Parquet File decode the thrift footer -/// /// ``` /// /// Within a Row Group, Column Chunks store data in DataPages. This example also diff --git a/datafusion-examples/examples/external_dependency/query-aws-s3.rs b/datafusion-examples/examples/external_dependency/query-aws-s3.rs index da2d7e4879f9..cd0b4562d5f2 100644 --- a/datafusion-examples/examples/external_dependency/query-aws-s3.rs +++ b/datafusion-examples/examples/external_dependency/query-aws-s3.rs @@ -28,7 +28,6 @@ use url::Url; /// /// - AWS_ACCESS_KEY_ID /// - AWS_SECRET_ACCESS_KEY -/// #[tokio::main] async fn main() -> Result<()> { let ctx = SessionContext::new(); diff --git a/datafusion-examples/examples/flight/flight_sql_server.rs b/datafusion-examples/examples/flight/flight_sql_server.rs index 5a573ed52320..5f404db38dbb 100644 --- a/datafusion-examples/examples/flight/flight_sql_server.rs +++ b/datafusion-examples/examples/flight/flight_sql_server.rs @@ -68,7 +68,6 @@ macro_rules! status { /// /// Based heavily on Ballista's implementation: https://github.com/apache/datafusion-ballista/blob/main/ballista/scheduler/src/flight_sql.rs /// and the example in arrow-rs: https://github.com/apache/arrow-rs/blob/master/arrow-flight/examples/flight_sql_server.rs -/// #[tokio::main] async fn main() -> Result<(), Box> { env_logger::init(); diff --git a/datafusion-examples/examples/parquet_index.rs b/datafusion-examples/examples/parquet_index.rs index a9c0d2c4dab3..9ebb37961f52 100644 --- a/datafusion-examples/examples/parquet_index.rs +++ b/datafusion-examples/examples/parquet_index.rs @@ -99,7 +99,6 @@ use url::Url; /// Thus some parquet files are │ │ /// "pruned" and thus are not └─────────────┘ /// scanned at all Parquet Files -/// /// ``` /// /// [`ListingTable`]: datafusion::datasource::listing::ListingTable diff --git a/datafusion-examples/examples/sql_query.rs b/datafusion-examples/examples/sql_query.rs index 0ac203cfb7e7..4da07d33d03d 100644 --- a/datafusion-examples/examples/sql_query.rs +++ b/datafusion-examples/examples/sql_query.rs @@ -32,7 +32,6 @@ use std::sync::Arc; /// /// [`query_memtable`]: a simple query against a [`MemTable`] /// [`query_parquet`]: a simple query against a directory with multiple Parquet files -/// #[tokio::main] async fn main() -> Result<()> { query_memtable().await?; diff --git a/datafusion-examples/examples/thread_pools.rs b/datafusion-examples/examples/thread_pools.rs index bba56b2932ab..9842cccfbfe8 100644 --- a/datafusion-examples/examples/thread_pools.rs +++ b/datafusion-examples/examples/thread_pools.rs @@ -342,7 +342,7 @@ impl CpuRuntime { /// message such as: /// /// ```text - ///A Tokio 1.x context was found, but IO is disabled. + /// A Tokio 1.x context was found, but IO is disabled. /// ``` pub fn handle(&self) -> &Handle { &self.handle diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index 5796edc283e0..db97f5c40666 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -55,7 +55,7 @@ use hex; /// /// Field 3 doc /// field3: Option, default = None /// } -///} +/// } /// ``` /// /// Will generate @@ -1139,36 +1139,35 @@ impl ConfigOptions { /// # Example /// ``` /// use datafusion_common::{ -/// config::ConfigExtension, extensions_options, -/// config::ConfigOptions, +/// config::ConfigExtension, config::ConfigOptions, extensions_options, /// }; -/// // Define a new configuration struct using the `extensions_options` macro -/// extensions_options! { -/// /// My own config options. -/// pub struct MyConfig { -/// /// Should "foo" be replaced by "bar"? -/// pub foo_to_bar: bool, default = true +/// // Define a new configuration struct using the `extensions_options` macro +/// extensions_options! { +/// /// My own config options. +/// pub struct MyConfig { +/// /// Should "foo" be replaced by "bar"? +/// pub foo_to_bar: bool, default = true /// -/// /// How many "baz" should be created? -/// pub baz_count: usize, default = 1337 -/// } -/// } +/// /// How many "baz" should be created? +/// pub baz_count: usize, default = 1337 +/// } +/// } /// -/// impl ConfigExtension for MyConfig { +/// impl ConfigExtension for MyConfig { /// const PREFIX: &'static str = "my_config"; -/// } +/// } /// -/// // set up config struct and register extension -/// let mut config = ConfigOptions::default(); -/// config.extensions.insert(MyConfig::default()); +/// // set up config struct and register extension +/// let mut config = ConfigOptions::default(); +/// config.extensions.insert(MyConfig::default()); /// -/// // overwrite config default -/// config.set("my_config.baz_count", "42").unwrap(); +/// // overwrite config default +/// config.set("my_config.baz_count", "42").unwrap(); /// -/// // check config state -/// let my_config = config.extensions.get::().unwrap(); -/// assert!(my_config.foo_to_bar,); -/// assert_eq!(my_config.baz_count, 42,); +/// // check config state +/// let my_config = config.extensions.get::().unwrap(); +/// assert!(my_config.foo_to_bar,); +/// assert_eq!(my_config.baz_count, 42,); /// ``` /// /// # Note: diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs index 88303bbcd7d2..948443669590 100644 --- a/datafusion/common/src/dfschema.rs +++ b/datafusion/common/src/dfschema.rs @@ -56,12 +56,10 @@ pub type DFSchemaRef = Arc; /// an Arrow schema. /// /// ```rust -/// use datafusion_common::{DFSchema, Column}; /// use arrow::datatypes::{DataType, Field, Schema}; +/// use datafusion_common::{Column, DFSchema}; /// -/// let arrow_schema = Schema::new(vec![ -/// Field::new("c1", DataType::Int32, false), -/// ]); +/// let arrow_schema = Schema::new(vec![Field::new("c1", DataType::Int32, false)]); /// /// let df_schema = DFSchema::try_from_qualified_schema("t1", &arrow_schema).unwrap(); /// let column = Column::from_qualified_name("t1.c1"); @@ -77,12 +75,10 @@ pub type DFSchemaRef = Arc; /// Create an unqualified schema using TryFrom: /// /// ```rust -/// use datafusion_common::{DFSchema, Column}; /// use arrow::datatypes::{DataType, Field, Schema}; +/// use datafusion_common::{Column, DFSchema}; /// -/// let arrow_schema = Schema::new(vec![ -/// Field::new("c1", DataType::Int32, false), -/// ]); +/// let arrow_schema = Schema::new(vec![Field::new("c1", DataType::Int32, false)]); /// /// let df_schema = DFSchema::try_from(arrow_schema).unwrap(); /// let column = Column::new_unqualified("c1"); @@ -94,13 +90,15 @@ pub type DFSchemaRef = Arc; /// Use the `Into` trait to convert `DFSchema` into an Arrow schema: /// /// ```rust +/// use arrow::datatypes::{Field, Schema}; /// use datafusion_common::DFSchema; -/// use arrow::datatypes::{Schema, Field}; /// use std::collections::HashMap; /// -/// let df_schema = DFSchema::from_unqualified_fields(vec![ -/// Field::new("c1", arrow::datatypes::DataType::Int32, false), -/// ].into(),HashMap::new()).unwrap(); +/// let df_schema = DFSchema::from_unqualified_fields( +/// vec![Field::new("c1", arrow::datatypes::DataType::Int32, false)].into(), +/// HashMap::new(), +/// ) +/// .unwrap(); /// let schema = Schema::from(df_schema); /// assert_eq!(schema.fields().len(), 1); /// ``` diff --git a/datafusion/common/src/diagnostic.rs b/datafusion/common/src/diagnostic.rs index 0dce8e6a56ec..b25bf1c12e44 100644 --- a/datafusion/common/src/diagnostic.rs +++ b/datafusion/common/src/diagnostic.rs @@ -30,8 +30,11 @@ use crate::Span; /// ```rust /// # use datafusion_common::{Location, Span, Diagnostic}; /// let span = Some(Span { -/// start: Location{ line: 2, column: 1 }, -/// end: Location{ line: 4, column: 15 } +/// start: Location { line: 2, column: 1 }, +/// end: Location { +/// line: 4, +/// column: 15, +/// }, /// }); /// let diagnostic = Diagnostic::new_error("Something went wrong", span) /// .with_help("Have you tried turning it on and off again?", None); diff --git a/datafusion/common/src/error.rs b/datafusion/common/src/error.rs index 88029ea474fd..3ff58151684c 100644 --- a/datafusion/common/src/error.rs +++ b/datafusion/common/src/error.rs @@ -677,7 +677,10 @@ impl DataFusionError { /// let mut builder = DataFusionError::builder(); /// builder.add_error(DataFusionError::Internal("foo".to_owned())); /// // ok_or returns the value if no errors have been added -/// assert_contains!(builder.error_or(42).unwrap_err().to_string(), "Internal error: foo"); +/// assert_contains!( +/// builder.error_or(42).unwrap_err().to_string(), +/// "Internal error: foo" +/// ); /// ``` #[derive(Debug, Default)] pub struct DataFusionErrorBuilder(Vec); @@ -695,7 +698,10 @@ impl DataFusionErrorBuilder { /// # use datafusion_common::{assert_contains, DataFusionError}; /// let mut builder = DataFusionError::builder(); /// builder.add_error(DataFusionError::Internal("foo".to_owned())); - /// assert_contains!(builder.error_or(42).unwrap_err().to_string(), "Internal error: foo"); + /// assert_contains!( + /// builder.error_or(42).unwrap_err().to_string(), + /// "Internal error: foo" + /// ); /// ``` pub fn add_error(&mut self, error: DataFusionError) { self.0.push(error); @@ -707,8 +713,11 @@ impl DataFusionErrorBuilder { /// ``` /// # use datafusion_common::{assert_contains, DataFusionError}; /// let builder = DataFusionError::builder() - /// .with_error(DataFusionError::Internal("foo".to_owned())); - /// assert_contains!(builder.error_or(42).unwrap_err().to_string(), "Internal error: foo"); + /// .with_error(DataFusionError::Internal("foo".to_owned())); + /// assert_contains!( + /// builder.error_or(42).unwrap_err().to_string(), + /// "Internal error: foo" + /// ); /// ``` pub fn with_error(mut self, error: DataFusionError) -> Self { self.0.push(error); diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index 6fdf5d6978da..ab11469c4e4a 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -167,9 +167,9 @@ pub use struct_builder::ScalarStructBuilder; /// let field_b = Field::new("b", DataType::Utf8, false); /// /// let s1 = ScalarStructBuilder::new() -/// .with_scalar(field_a, ScalarValue::from(1i32)) -/// .with_scalar(field_b, ScalarValue::from("foo")) -/// .build(); +/// .with_scalar(field_a, ScalarValue::from(1i32)) +/// .with_scalar(field_b, ScalarValue::from("foo")) +/// .build(); /// ``` /// /// ## Example: Creating a null [`ScalarValue::Struct`] using [`ScalarStructBuilder`] @@ -195,13 +195,13 @@ pub use struct_builder::ScalarStructBuilder; /// // Build a struct like: {a: 1, b: "foo"} /// // Field description /// let fields = Fields::from(vec![ -/// Field::new("a", DataType::Int32, false), -/// Field::new("b", DataType::Utf8, false), +/// Field::new("a", DataType::Int32, false), +/// Field::new("b", DataType::Utf8, false), /// ]); /// // one row arrays for each field /// let arrays: Vec = vec![ -/// Arc::new(Int32Array::from(vec![1])), -/// Arc::new(StringArray::from(vec!["foo"])), +/// Arc::new(Int32Array::from(vec![1])), +/// Arc::new(StringArray::from(vec!["foo"])), /// ]; /// // no nulls for this array /// let nulls = None; @@ -1011,8 +1011,8 @@ impl ScalarValue { /// /// Example /// ``` - /// use datafusion_common::ScalarValue; /// use arrow::datatypes::DataType; + /// use datafusion_common::ScalarValue; /// /// let scalar = ScalarValue::try_new_null(&DataType::Int32).unwrap(); /// assert_eq!(scalar.is_null(), true); @@ -1810,23 +1810,16 @@ impl ScalarValue { /// /// # Example /// ``` - /// use datafusion_common::ScalarValue; /// use arrow::array::{BooleanArray, Int32Array}; + /// use datafusion_common::ScalarValue; /// /// let arr = Int32Array::from(vec![Some(1), None, Some(10)]); /// let five = ScalarValue::Int32(Some(5)); /// - /// let result = arrow::compute::kernels::cmp::lt( - /// &arr, - /// &five.to_scalar().unwrap(), - /// ).unwrap(); + /// let result = + /// arrow::compute::kernels::cmp::lt(&arr, &five.to_scalar().unwrap()).unwrap(); /// - /// let expected = BooleanArray::from(vec![ - /// Some(true), - /// None, - /// Some(false) - /// ] - /// ); + /// let expected = BooleanArray::from(vec![Some(true), None, Some(false)]); /// /// assert_eq!(&result, &expected); /// ``` @@ -1844,26 +1837,20 @@ impl ScalarValue { /// /// # Example /// ``` - /// use datafusion_common::ScalarValue; /// use arrow::array::{ArrayRef, BooleanArray}; + /// use datafusion_common::ScalarValue; /// /// let scalars = vec![ - /// ScalarValue::Boolean(Some(true)), - /// ScalarValue::Boolean(None), - /// ScalarValue::Boolean(Some(false)), + /// ScalarValue::Boolean(Some(true)), + /// ScalarValue::Boolean(None), + /// ScalarValue::Boolean(Some(false)), /// ]; /// /// // Build an Array from the list of ScalarValues - /// let array = ScalarValue::iter_to_array(scalars.into_iter()) - /// .unwrap(); + /// let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap(); /// - /// let expected: ArrayRef = std::sync::Arc::new( - /// BooleanArray::from(vec![ - /// Some(true), - /// None, - /// Some(false) - /// ] - /// )); + /// let expected: ArrayRef = + /// std::sync::Arc::new(BooleanArray::from(vec![Some(true), None, Some(false)])); /// /// assert_eq!(&array, &expected); /// ``` @@ -2228,23 +2215,24 @@ impl ScalarValue { /// /// Example /// ``` - /// use datafusion_common::ScalarValue; - /// use arrow::array::{ListArray, Int32Array}; + /// use arrow::array::{Int32Array, ListArray}; /// use arrow::datatypes::{DataType, Int32Type}; /// use datafusion_common::cast::as_list_array; + /// use datafusion_common::ScalarValue; /// /// let scalars = vec![ - /// ScalarValue::Int32(Some(1)), - /// ScalarValue::Int32(None), - /// ScalarValue::Int32(Some(2)) + /// ScalarValue::Int32(Some(1)), + /// ScalarValue::Int32(None), + /// ScalarValue::Int32(Some(2)), /// ]; /// /// let result = ScalarValue::new_list(&scalars, &DataType::Int32, true); /// - /// let expected = ListArray::from_iter_primitive::( - /// vec![ - /// Some(vec![Some(1), None, Some(2)]) - /// ]); + /// let expected = ListArray::from_iter_primitive::(vec![Some(vec![ + /// Some(1), + /// None, + /// Some(2), + /// ])]); /// /// assert_eq!(*result, expected); /// ``` @@ -2288,23 +2276,25 @@ impl ScalarValue { /// /// Example /// ``` - /// use datafusion_common::ScalarValue; - /// use arrow::array::{ListArray, Int32Array}; + /// use arrow::array::{Int32Array, ListArray}; /// use arrow::datatypes::{DataType, Int32Type}; /// use datafusion_common::cast::as_list_array; + /// use datafusion_common::ScalarValue; /// /// let scalars = vec![ - /// ScalarValue::Int32(Some(1)), - /// ScalarValue::Int32(None), - /// ScalarValue::Int32(Some(2)) + /// ScalarValue::Int32(Some(1)), + /// ScalarValue::Int32(None), + /// ScalarValue::Int32(Some(2)), /// ]; /// - /// let result = ScalarValue::new_list_from_iter(scalars.into_iter(), &DataType::Int32, true); + /// let result = + /// ScalarValue::new_list_from_iter(scalars.into_iter(), &DataType::Int32, true); /// - /// let expected = ListArray::from_iter_primitive::( - /// vec![ - /// Some(vec![Some(1), None, Some(2)]) - /// ]); + /// let expected = ListArray::from_iter_primitive::(vec![Some(vec![ + /// Some(1), + /// None, + /// Some(2), + /// ])]); /// /// assert_eq!(*result, expected); /// ``` @@ -2330,23 +2320,25 @@ impl ScalarValue { /// /// Example /// ``` - /// use datafusion_common::ScalarValue; - /// use arrow::array::{LargeListArray, Int32Array}; + /// use arrow::array::{Int32Array, LargeListArray}; /// use arrow::datatypes::{DataType, Int32Type}; /// use datafusion_common::cast::as_large_list_array; + /// use datafusion_common::ScalarValue; /// /// let scalars = vec![ - /// ScalarValue::Int32(Some(1)), - /// ScalarValue::Int32(None), - /// ScalarValue::Int32(Some(2)) + /// ScalarValue::Int32(Some(1)), + /// ScalarValue::Int32(None), + /// ScalarValue::Int32(Some(2)), /// ]; /// /// let result = ScalarValue::new_large_list(&scalars, &DataType::Int32); /// - /// let expected = LargeListArray::from_iter_primitive::( - /// vec![ - /// Some(vec![Some(1), None, Some(2)]) - /// ]); + /// let expected = + /// LargeListArray::from_iter_primitive::(vec![Some(vec![ + /// Some(1), + /// None, + /// Some(2), + /// ])]); /// /// assert_eq!(*result, expected); /// ``` @@ -2719,29 +2711,26 @@ impl ScalarValue { /// /// Example 1: Array (ScalarValue::Int32) /// ``` - /// use datafusion_common::ScalarValue; /// use arrow::array::ListArray; /// use arrow::datatypes::{DataType, Int32Type}; + /// use datafusion_common::ScalarValue; /// /// // Equivalent to [[1,2,3], [4,5]] /// let list_arr = ListArray::from_iter_primitive::(vec![ - /// Some(vec![Some(1), Some(2), Some(3)]), - /// Some(vec![Some(4), Some(5)]) + /// Some(vec![Some(1), Some(2), Some(3)]), + /// Some(vec![Some(4), Some(5)]), /// ]); /// /// // Convert the array into Scalar Values for each row /// let scalar_vec = ScalarValue::convert_array_to_scalar_vec(&list_arr).unwrap(); /// /// let expected = vec![ - /// vec![ - /// ScalarValue::Int32(Some(1)), - /// ScalarValue::Int32(Some(2)), - /// ScalarValue::Int32(Some(3)), - /// ], - /// vec![ - /// ScalarValue::Int32(Some(4)), - /// ScalarValue::Int32(Some(5)), - /// ], + /// vec![ + /// ScalarValue::Int32(Some(1)), + /// ScalarValue::Int32(Some(2)), + /// ScalarValue::Int32(Some(3)), + /// ], + /// vec![ScalarValue::Int32(Some(4)), ScalarValue::Int32(Some(5))], /// ]; /// /// assert_eq!(scalar_vec, expected); @@ -2749,15 +2738,15 @@ impl ScalarValue { /// /// Example 2: Nested array (ScalarValue::List) /// ``` - /// use datafusion_common::ScalarValue; /// use arrow::array::ListArray; /// use arrow::datatypes::{DataType, Int32Type}; /// use datafusion_common::utils::SingleRowListArrayBuilder; + /// use datafusion_common::ScalarValue; /// use std::sync::Arc; /// /// let list_arr = ListArray::from_iter_primitive::(vec![ - /// Some(vec![Some(1), Some(2), Some(3)]), - /// Some(vec![Some(4), Some(5)]) + /// Some(vec![Some(1), Some(2), Some(3)]), + /// Some(vec![Some(4), Some(5)]), /// ]); /// /// // Wrap into another layer of list, we got nested array as [ [[1,2,3], [4,5]] ] @@ -2766,19 +2755,20 @@ impl ScalarValue { /// // Convert the array into Scalar Values for each row, we got 1D arrays in this example /// let scalar_vec = ScalarValue::convert_array_to_scalar_vec(&list_arr).unwrap(); /// - /// let l1 = ListArray::from_iter_primitive::(vec![ - /// Some(vec![Some(1), Some(2), Some(3)]), - /// ]); - /// let l2 = ListArray::from_iter_primitive::(vec![ - /// Some(vec![Some(4), Some(5)]), - /// ]); + /// let l1 = ListArray::from_iter_primitive::(vec![Some(vec![ + /// Some(1), + /// Some(2), + /// Some(3), + /// ])]); + /// let l2 = ListArray::from_iter_primitive::(vec![Some(vec![ + /// Some(4), + /// Some(5), + /// ])]); /// - /// let expected = vec![ - /// vec![ + /// let expected = vec![vec![ /// ScalarValue::List(Arc::new(l1)), /// ScalarValue::List(Arc::new(l2)), - /// ], - /// ]; + /// ]]; /// /// assert_eq!(scalar_vec, expected); /// ``` diff --git a/datafusion/common/src/scalar/struct_builder.rs b/datafusion/common/src/scalar/struct_builder.rs index fd19dccf8963..56daee904514 100644 --- a/datafusion/common/src/scalar/struct_builder.rs +++ b/datafusion/common/src/scalar/struct_builder.rs @@ -47,13 +47,11 @@ impl ScalarStructBuilder { /// ```rust /// # use arrow::datatypes::{DataType, Field}; /// # use datafusion_common::scalar::ScalarStructBuilder; - /// let fields = vec![ - /// Field::new("a", DataType::Int32, false), - /// ]; + /// let fields = vec![Field::new("a", DataType::Int32, false)]; /// let sv = ScalarStructBuilder::new_null(fields); /// // Note this is `NULL`, not `{a: NULL}` /// assert_eq!(format!("{sv}"), "NULL"); - ///``` + /// ``` /// /// To create a struct where the *fields* are null, use `Self::new()` and /// pass null values for each field: @@ -65,9 +63,9 @@ impl ScalarStructBuilder { /// let field = Field::new("a", DataType::Int32, true); /// // add a null value for the "a" field /// let sv = ScalarStructBuilder::new() - /// .with_scalar(field, ScalarValue::Int32(None)) - /// .build() - /// .unwrap(); + /// .with_scalar(field, ScalarValue::Int32(None)) + /// .build() + /// .unwrap(); /// // value is not null, but field is /// assert_eq!(format!("{sv}"), "{a:}"); /// ``` diff --git a/datafusion/common/src/stats.rs b/datafusion/common/src/stats.rs index a6d132ef51f6..c3c227818b6a 100644 --- a/datafusion/common/src/stats.rs +++ b/datafusion/common/src/stats.rs @@ -480,33 +480,35 @@ impl Statistics { /// # use arrow::datatypes::{Field, Schema, DataType}; /// # use datafusion_common::stats::Precision; /// let stats1 = Statistics::default() - /// .with_num_rows(Precision::Exact(1)) - /// .with_total_byte_size(Precision::Exact(2)) - /// .add_column_statistics(ColumnStatistics::new_unknown() - /// .with_null_count(Precision::Exact(3)) - /// .with_min_value(Precision::Exact(ScalarValue::from(4))) - /// .with_max_value(Precision::Exact(ScalarValue::from(5))) - /// ); + /// .with_num_rows(Precision::Exact(1)) + /// .with_total_byte_size(Precision::Exact(2)) + /// .add_column_statistics( + /// ColumnStatistics::new_unknown() + /// .with_null_count(Precision::Exact(3)) + /// .with_min_value(Precision::Exact(ScalarValue::from(4))) + /// .with_max_value(Precision::Exact(ScalarValue::from(5))), + /// ); /// /// let stats2 = Statistics::default() - /// .with_num_rows(Precision::Exact(10)) - /// .with_total_byte_size(Precision::Inexact(20)) - /// .add_column_statistics(ColumnStatistics::new_unknown() - /// // absent null count - /// .with_min_value(Precision::Exact(ScalarValue::from(40))) - /// .with_max_value(Precision::Exact(ScalarValue::from(50))) - /// ); + /// .with_num_rows(Precision::Exact(10)) + /// .with_total_byte_size(Precision::Inexact(20)) + /// .add_column_statistics( + /// ColumnStatistics::new_unknown() + /// // absent null count + /// .with_min_value(Precision::Exact(ScalarValue::from(40))) + /// .with_max_value(Precision::Exact(ScalarValue::from(50))), + /// ); /// /// let merged_stats = stats1.try_merge(&stats2).unwrap(); /// let expected_stats = Statistics::default() - /// .with_num_rows(Precision::Exact(11)) - /// .with_total_byte_size(Precision::Inexact(22)) // inexact in stats2 --> inexact - /// .add_column_statistics( - /// ColumnStatistics::new_unknown() - /// .with_null_count(Precision::Absent) // missing from stats2 --> absent - /// .with_min_value(Precision::Exact(ScalarValue::from(4))) - /// .with_max_value(Precision::Exact(ScalarValue::from(50))) - /// ); + /// .with_num_rows(Precision::Exact(11)) + /// .with_total_byte_size(Precision::Inexact(22)) // inexact in stats2 --> inexact + /// .add_column_statistics( + /// ColumnStatistics::new_unknown() + /// .with_null_count(Precision::Absent) // missing from stats2 --> absent + /// .with_min_value(Precision::Exact(ScalarValue::from(4))) + /// .with_max_value(Precision::Exact(ScalarValue::from(50))), + /// ); /// /// assert_eq!(merged_stats, expected_stats) /// ``` diff --git a/datafusion/common/src/table_reference.rs b/datafusion/common/src/table_reference.rs index 9b6f9696c00b..73dfbba03ff6 100644 --- a/datafusion/common/src/table_reference.rs +++ b/datafusion/common/src/table_reference.rs @@ -68,8 +68,11 @@ impl std::fmt::Display for ResolvedTableReference { /// /// // Get a table reference to 'myschema.mytable' (note the capitalization) /// let table_reference = TableReference::from("MySchema.MyTable"); -/// assert_eq!(table_reference, TableReference::partial("myschema", "mytable")); -///``` +/// assert_eq!( +/// table_reference, +/// TableReference::partial("myschema", "mytable") +/// ); +/// ``` #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] pub enum TableReference { /// An unqualified table reference, e.g. "table" @@ -246,7 +249,10 @@ impl TableReference { /// assert_eq!(table_reference.to_quoted_string(), "myschema.mytable"); /// /// let table_reference = TableReference::partial("MySchema", "MyTable"); - /// assert_eq!(table_reference.to_quoted_string(), r#""MySchema"."MyTable""#); + /// assert_eq!( + /// table_reference.to_quoted_string(), + /// r#""MySchema"."MyTable""# + /// ); /// ``` pub fn to_quoted_string(&self) -> String { match self { diff --git a/datafusion/common/src/test_util.rs b/datafusion/common/src/test_util.rs index 820a230bf6e1..6158d10cf75e 100644 --- a/datafusion/common/src/test_util.rs +++ b/datafusion/common/src/test_util.rs @@ -55,7 +55,7 @@ pub fn format_batches(results: &[RecordBatch]) -> Result i64 { 2 } /// let expr = orig_expr(); /// let ret = Transformed::no(expr.clone()) -/// .transform_data(|expr| { -/// // closure returns a result and potentially transforms the node -/// // in this example, it does transform the node -/// let new_expr = make_new_expr(expr); -/// Ok(Transformed::yes(new_expr)) -/// }).unwrap(); +/// .transform_data(|expr| { +/// // closure returns a result and potentially transforms the node +/// // in this example, it does transform the node +/// let new_expr = make_new_expr(expr); +/// Ok(Transformed::yes(new_expr)) +/// }) +/// .unwrap(); /// // transformed flag is the union of the original ans closure's transformed flag /// assert!(ret.transformed); /// ``` diff --git a/datafusion/common/src/types/logical.rs b/datafusion/common/src/types/logical.rs index 884ce20fd9e2..25648f8f9a8b 100644 --- a/datafusion/common/src/types/logical.rs +++ b/datafusion/common/src/types/logical.rs @@ -67,12 +67,12 @@ pub type LogicalTypeRef = Arc; /// &NativeType::String /// } /// -/// fn signature(&self) -> TypeSignature<'_> { -/// TypeSignature::Extension { -/// name: "JSON", -/// parameters: &[], -/// } -/// } +/// fn signature(&self) -> TypeSignature<'_> { +/// TypeSignature::Extension { +/// name: "JSON", +/// parameters: &[], +/// } +/// } /// } /// ``` pub trait LogicalType: Sync + Send { diff --git a/datafusion/common/src/utils/memory.rs b/datafusion/common/src/utils/memory.rs index 7ac081e0beb8..cc3428abb6eb 100644 --- a/datafusion/common/src/utils/memory.rs +++ b/datafusion/common/src/utils/memory.rs @@ -55,8 +55,8 @@ use std::mem::size_of; /// impl MyStruct { /// fn size(&self) -> Result { /// let num_elements = self.values.len(); -/// let fixed_size = std::mem::size_of_val(self) + -/// std::mem::size_of_val(&self.values); +/// let fixed_size = +/// std::mem::size_of_val(self) + std::mem::size_of_val(&self.values); /// /// estimate_memory_size::(num_elements, fixed_size) /// } @@ -72,8 +72,8 @@ use std::mem::size_of; /// let num_rows = 100; /// let fixed_size = std::mem::size_of::>(); /// let estimated_hashtable_size = -/// estimate_memory_size::<(u64, u64)>(num_rows,fixed_size) -/// .expect("Size estimation failed"); +/// estimate_memory_size::<(u64, u64)>(num_rows, fixed_size) +/// .expect("Size estimation failed"); /// ``` pub fn estimate_memory_size(num_elements: usize, fixed_size: usize) -> Result { // For the majority of cases hashbrown overestimates the bucket quantity diff --git a/datafusion/common/src/utils/mod.rs b/datafusion/common/src/utils/mod.rs index ad2bab879a26..83140ac778a9 100644 --- a/datafusion/common/src/utils/mod.rs +++ b/datafusion/common/src/utils/mod.rs @@ -47,26 +47,23 @@ use std::thread::available_parallelism; /// /// Example: /// ``` -/// use arrow::datatypes::{SchemaRef, Schema, Field, DataType}; +/// use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; /// use datafusion_common::project_schema; /// /// // Schema with columns 'a', 'b', and 'c' /// let schema = SchemaRef::new(Schema::new(vec![ -/// Field::new("a", DataType::Int32, true), -/// Field::new("b", DataType::Int64, true), -/// Field::new("c", DataType::Utf8, true), +/// Field::new("a", DataType::Int32, true), +/// Field::new("b", DataType::Int64, true), +/// Field::new("c", DataType::Utf8, true), /// ])); /// /// // Pick columns 'c' and 'b' -/// let projection = Some(vec![2,1]); -/// let projected_schema = project_schema( -/// &schema, -/// projection.as_ref() -/// ).unwrap(); +/// let projection = Some(vec![2, 1]); +/// let projected_schema = project_schema(&schema, projection.as_ref()).unwrap(); /// /// let expected_schema = SchemaRef::new(Schema::new(vec![ -/// Field::new("c", DataType::Utf8, true), -/// Field::new("b", DataType::Int64, true), +/// Field::new("c", DataType::Utf8, true), +/// Field::new("b", DataType::Int64, true), /// ])); /// /// assert_eq!(projected_schema, expected_schema); @@ -347,9 +344,11 @@ pub fn longest_consecutive_prefix>( /// # use arrow::array::types::Int64Type; /// # use datafusion_common::utils::SingleRowListArrayBuilder; /// // Array is [1, 2, 3] -/// let arr = ListArray::from_iter_primitive::(vec![ -/// Some(vec![Some(1), Some(2), Some(3)]), -/// ]); +/// let arr = ListArray::from_iter_primitive::(vec![Some(vec![ +/// Some(1), +/// Some(2), +/// Some(3), +/// ])]); /// // Wrap as a list array: [[1, 2, 3]] /// let list_arr = SingleRowListArrayBuilder::new(Arc::new(arr)).build_list_array(); /// assert_eq!(list_arr.len(), 1); @@ -591,7 +590,8 @@ pub fn fixed_size_list_to_arrays(a: &ArrayRef) -> Vec { /// use datafusion_common::utils::base_type; /// use std::sync::Arc; /// -/// let data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))); +/// let data_type = +/// DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))); /// assert_eq!(base_type(&data_type), DataType::Int32); /// /// let data_type = DataType::Int32; @@ -958,16 +958,19 @@ pub fn get_available_parallelism() -> usize { /// # use datafusion_common::utils::take_function_args; /// # use datafusion_common::ScalarValue; /// fn my_function(args: &[ScalarValue]) -> Result<()> { -/// // function expects 2 args, so create a 2-element array -/// let [arg1, arg2] = take_function_args("my_function", args)?; -/// // ... do stuff.. -/// Ok(()) +/// // function expects 2 args, so create a 2-element array +/// let [arg1, arg2] = take_function_args("my_function", args)?; +/// // ... do stuff.. +/// Ok(()) /// } /// /// // Calling the function with 1 argument produces an error: /// let args = vec![ScalarValue::Int32(Some(10))]; /// let err = my_function(&args).unwrap_err(); -/// assert_eq!(err.to_string(), "Execution error: my_function function requires 2 arguments, got 1"); +/// assert_eq!( +/// err.to_string(), +/// "Execution error: my_function function requires 2 arguments, got 1" +/// ); /// // Calling the function with 2 arguments works great /// let args = vec![ScalarValue::Int32(Some(10)), ScalarValue::Int32(Some(20))]; /// my_function(&args).unwrap(); diff --git a/datafusion/common/src/utils/proxy.rs b/datafusion/common/src/utils/proxy.rs index d940677a5fb3..fb951aa3b028 100644 --- a/datafusion/common/src/utils/proxy.rs +++ b/datafusion/common/src/utils/proxy.rs @@ -47,7 +47,9 @@ pub trait VecAllocExt { /// assert_eq!(allocated, 16); // no new allocation needed /// /// // push more data into the vec - /// for _ in 0..10 { vec.push_accounted(1, &mut allocated); } + /// for _ in 0..10 { + /// vec.push_accounted(1, &mut allocated); + /// } /// assert_eq!(allocated, 64); // underlying vec has space for 10 u32s /// assert_eq!(vec.allocated_size(), 64); /// ``` @@ -82,7 +84,9 @@ pub trait VecAllocExt { /// assert_eq!(vec.allocated_size(), 16); // no new allocation needed /// /// // push more data into the vec - /// for _ in 0..10 { vec.push(1); } + /// for _ in 0..10 { + /// vec.push(1); + /// } /// assert_eq!(vec.allocated_size(), 64); // space for 64 now /// ``` fn allocated_size(&self) -> usize; @@ -133,7 +137,9 @@ pub trait RawTableAllocExt { /// assert_eq!(allocated, 64); /// /// // insert more values - /// for i in 0..100 { table.insert_accounted(i, hash_fn, &mut allocated); } + /// for i in 0..100 { + /// table.insert_accounted(i, hash_fn, &mut allocated); + /// } /// assert_eq!(allocated, 400); /// ``` fn insert_accounted( @@ -200,7 +206,9 @@ pub trait HashTableAllocExt { /// assert_eq!(allocated, 64); /// /// // insert more values - /// for i in 0..100 { table.insert_accounted(i, hash_fn, &mut allocated); } + /// for i in 0..100 { + /// table.insert_accounted(i, hash_fn, &mut allocated); + /// } /// assert_eq!(allocated, 400); /// ``` fn insert_accounted( diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs index a19e6f558162..1f3fe93cf839 100644 --- a/datafusion/core/src/dataframe/mod.rs +++ b/datafusion/core/src/dataframe/mod.rs @@ -258,10 +258,13 @@ impl DataFrame { /// # async fn main() -> Result<()> { /// // datafusion will parse number as i64 first. /// let sql = "a > 1 and b in (1, 10)"; - /// let expected = col("a").gt(lit(1 as i64)) - /// .and(col("b").in_list(vec![lit(1 as i64), lit(10 as i64)], false)); + /// let expected = col("a") + /// .gt(lit(1 as i64)) + /// .and(col("b").in_list(vec![lit(1 as i64), lit(10 as i64)], false)); /// let ctx = SessionContext::new(); - /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?; + /// let df = ctx + /// .read_csv("tests/data/example.csv", CsvReadOptions::new()) + /// .await?; /// let expr = df.parse_sql_expr(sql)?; /// assert_eq!(expected, expr); /// # Ok(()) @@ -288,14 +291,16 @@ impl DataFrame { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let ctx = SessionContext::new(); - /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?; + /// let df = ctx + /// .read_csv("tests/data/example.csv", CsvReadOptions::new()) + /// .await?; /// let df = df.select_columns(&["a", "b"])?; /// let expected = vec![ /// "+---+---+", /// "| a | b |", /// "+---+---+", /// "| 1 | 2 |", - /// "+---+---+" + /// "+---+---+", /// ]; /// # assert_batches_sorted_eq!(expected, &df.collect().await?); /// # Ok(()) @@ -328,8 +333,10 @@ impl DataFrame { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let ctx = SessionContext::new(); - /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?; - /// let df : DataFrame = df.select_exprs(&["a * b", "c"])?; + /// let df = ctx + /// .read_csv("tests/data/example.csv", CsvReadOptions::new()) + /// .await?; + /// let df: DataFrame = df.select_exprs(&["a * b", "c"])?; /// # Ok(()) /// # } /// ``` @@ -355,14 +362,16 @@ impl DataFrame { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let ctx = SessionContext::new(); - /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?; + /// let df = ctx + /// .read_csv("tests/data/example.csv", CsvReadOptions::new()) + /// .await?; /// let df = df.select(vec![col("a"), col("b") * col("c")])?; /// let expected = vec![ /// "+---+-----------------------+", /// "| a | ?table?.b * ?table?.c |", /// "+---+-----------------------+", /// "| 1 | 6 |", - /// "+---+-----------------------+" + /// "+---+-----------------------+", /// ]; /// # assert_batches_sorted_eq!(expected, &df.collect().await?); /// # Ok(()) @@ -405,7 +414,9 @@ impl DataFrame { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let ctx = SessionContext::new(); - /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?; + /// let df = ctx + /// .read_csv("tests/data/example.csv", CsvReadOptions::new()) + /// .await?; /// // +----+----+----+ /// // | a | b | c | /// // +----+----+----+ @@ -417,7 +428,7 @@ impl DataFrame { /// "| b | c |", /// "+---+---+", /// "| 2 | 3 |", - /// "+---+---+" + /// "+---+---+", /// ]; /// # assert_batches_sorted_eq!(expected, &df.collect().await?); /// # Ok(()) @@ -516,7 +527,9 @@ impl DataFrame { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let ctx = SessionContext::new(); - /// let df = ctx.read_csv("tests/data/example_long.csv", CsvReadOptions::new()).await?; + /// let df = ctx + /// .read_csv("tests/data/example_long.csv", CsvReadOptions::new()) + /// .await?; /// let df = df.filter(col("a").lt_eq(col("b")))?; /// // all rows where a <= b are returned /// let expected = vec![ @@ -526,7 +539,7 @@ impl DataFrame { /// "| 1 | 2 | 3 |", /// "| 4 | 5 | 6 |", /// "| 7 | 8 | 9 |", - /// "+---+---+---+" + /// "+---+---+---+", /// ]; /// # assert_batches_sorted_eq!(expected, &df.collect().await?); /// # Ok(()) @@ -555,7 +568,9 @@ impl DataFrame { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let ctx = SessionContext::new(); - /// let df = ctx.read_csv("tests/data/example_long.csv", CsvReadOptions::new()).await?; + /// let df = ctx + /// .read_csv("tests/data/example_long.csv", CsvReadOptions::new()) + /// .await?; /// /// // The following use is the equivalent of "SELECT MIN(b) GROUP BY a" /// let df1 = df.clone().aggregate(vec![col("a")], vec![min(col("b"))])?; @@ -566,7 +581,7 @@ impl DataFrame { /// "| 1 | 2 |", /// "| 4 | 5 |", /// "| 7 | 8 |", - /// "+---+----------------+" + /// "+---+----------------+", /// ]; /// assert_batches_sorted_eq!(expected1, &df1.collect().await?); /// // The following use is the equivalent of "SELECT MIN(b)" @@ -576,7 +591,7 @@ impl DataFrame { /// "| min(?table?.b) |", /// "+----------------+", /// "| 2 |", - /// "+----------------+" + /// "+----------------+", /// ]; /// # assert_batches_sorted_eq!(expected2, &df2.collect().await?); /// # Ok(()) @@ -644,7 +659,9 @@ impl DataFrame { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let ctx = SessionContext::new(); - /// let df = ctx.read_csv("tests/data/example_long.csv", CsvReadOptions::new()).await?; + /// let df = ctx + /// .read_csv("tests/data/example_long.csv", CsvReadOptions::new()) + /// .await?; /// let df = df.limit(1, Some(2))?; /// let expected = vec![ /// "+---+---+---+", @@ -652,7 +669,7 @@ impl DataFrame { /// "+---+---+---+", /// "| 4 | 5 | 6 |", /// "| 7 | 8 | 9 |", - /// "+---+---+---+" + /// "+---+---+---+", /// ]; /// # assert_batches_sorted_eq!(expected, &df.collect().await?); /// # Ok(()) @@ -681,7 +698,9 @@ impl DataFrame { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let ctx = SessionContext::new(); - /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await? ; + /// let df = ctx + /// .read_csv("tests/data/example.csv", CsvReadOptions::new()) + /// .await?; /// let d2 = df.clone(); /// let df = df.union(d2)?; /// let expected = vec![ @@ -690,7 +709,7 @@ impl DataFrame { /// "+---+---+---+", /// "| 1 | 2 | 3 |", /// "| 1 | 2 | 3 |", - /// "+---+---+---+" + /// "+---+---+---+", /// ]; /// # assert_batches_sorted_eq!(expected, &df.collect().await?); /// # Ok(()) @@ -721,8 +740,13 @@ impl DataFrame { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let ctx = SessionContext::new(); - /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?; - /// let d2 = df.clone().select_columns(&["b", "c", "a"])?.with_column("d", lit("77"))?; + /// let df = ctx + /// .read_csv("tests/data/example.csv", CsvReadOptions::new()) + /// .await?; + /// let d2 = df + /// .clone() + /// .select_columns(&["b", "c", "a"])? + /// .with_column("d", lit("77"))?; /// let df = df.union_by_name(d2)?; /// let expected = vec![ /// "+---+---+---+----+", @@ -730,7 +754,7 @@ impl DataFrame { /// "+---+---+---+----+", /// "| 1 | 2 | 3 | |", /// "| 1 | 2 | 3 | 77 |", - /// "+---+---+---+----+" + /// "+---+---+---+----+", /// ]; /// # assert_batches_sorted_eq!(expected, &df.collect().await?); /// # Ok(()) @@ -760,7 +784,9 @@ impl DataFrame { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let ctx = SessionContext::new(); - /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?; + /// let df = ctx + /// .read_csv("tests/data/example.csv", CsvReadOptions::new()) + /// .await?; /// let d2 = df.clone(); /// let df = df.union_distinct(d2)?; /// // df2 are duplicate of df @@ -769,7 +795,7 @@ impl DataFrame { /// "| a | b | c |", /// "+---+---+---+", /// "| 1 | 2 | 3 |", - /// "+---+---+---+" + /// "+---+---+---+", /// ]; /// # assert_batches_sorted_eq!(expected, &df.collect().await?); /// # Ok(()) @@ -800,7 +826,9 @@ impl DataFrame { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let ctx = SessionContext::new(); - /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?; + /// let df = ctx + /// .read_csv("tests/data/example.csv", CsvReadOptions::new()) + /// .await?; /// let d2 = df.clone().select_columns(&["b", "c", "a"])?; /// let df = df.union_by_name_distinct(d2)?; /// let expected = vec![ @@ -808,7 +836,7 @@ impl DataFrame { /// "| a | b | c |", /// "+---+---+---+", /// "| 1 | 2 | 3 |", - /// "+---+---+---+" + /// "+---+---+---+", /// ]; /// # assert_batches_sorted_eq!(expected, &df.collect().await?); /// # Ok(()) @@ -835,14 +863,16 @@ impl DataFrame { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let ctx = SessionContext::new(); - /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?; + /// let df = ctx + /// .read_csv("tests/data/example.csv", CsvReadOptions::new()) + /// .await?; /// let df = df.distinct()?; /// let expected = vec![ /// "+---+---+---+", /// "| a | b | c |", /// "+---+---+---+", /// "| 1 | 2 | 3 |", - /// "+---+---+---+" + /// "+---+---+---+", /// ]; /// # assert_batches_sorted_eq!(expected, &df.collect().await?); /// # Ok(()) @@ -869,15 +899,17 @@ impl DataFrame { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let ctx = SessionContext::new(); - /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await? - /// // Return a single row (a, b) for each distinct value of a - /// .distinct_on(vec![col("a")], vec![col("a"), col("b")], None)?; + /// let df = ctx + /// .read_csv("tests/data/example.csv", CsvReadOptions::new()) + /// .await? + /// // Return a single row (a, b) for each distinct value of a + /// .distinct_on(vec![col("a")], vec![col("a"), col("b")], None)?; /// let expected = vec![ /// "+---+---+", /// "| a | b |", /// "+---+---+", /// "| 1 | 2 |", - /// "+---+---+" + /// "+---+---+", /// ]; /// # assert_batches_sorted_eq!(expected, &df.collect().await?); /// # Ok(()) @@ -1123,11 +1155,13 @@ impl DataFrame { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let ctx = SessionContext::new(); - /// let df = ctx.read_csv("tests/data/example_long.csv", CsvReadOptions::new()).await?; + /// let df = ctx + /// .read_csv("tests/data/example_long.csv", CsvReadOptions::new()) + /// .await?; /// let df = df.sort(vec![ - /// col("a").sort(false, true), // a DESC, nulls first - /// col("b").sort(true, false), // b ASC, nulls last - /// ])?; + /// col("a").sort(false, true), // a DESC, nulls first + /// col("b").sort(true, false), // b ASC, nulls last + /// ])?; /// let expected = vec![ /// "+---+---+---+", /// "| a | b | c |", @@ -1174,12 +1208,17 @@ impl DataFrame { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let ctx = SessionContext::new(); - /// let left = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?; - /// let right = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await? - /// .select(vec![ - /// col("a").alias("a2"), - /// col("b").alias("b2"), - /// col("c").alias("c2")])?; + /// let left = ctx + /// .read_csv("tests/data/example.csv", CsvReadOptions::new()) + /// .await?; + /// let right = ctx + /// .read_csv("tests/data/example.csv", CsvReadOptions::new()) + /// .await? + /// .select(vec![ + /// col("a").alias("a2"), + /// col("b").alias("b2"), + /// col("c").alias("c2"), + /// ])?; /// // Perform the equivalent of `left INNER JOIN right ON (a = a2 AND b = b2)` /// // finding all pairs of rows from `left` and `right` where `a = a2` and `b = b2`. /// let join = left.join(right, JoinType::Inner, &["a", "b"], &["a2", "b2"], None)?; @@ -1188,13 +1227,12 @@ impl DataFrame { /// "| a | b | c | a2 | b2 | c2 |", /// "+---+---+---+----+----+----+", /// "| 1 | 2 | 3 | 1 | 2 | 3 |", - /// "+---+---+---+----+----+----+" + /// "+---+---+---+----+----+----+", /// ]; /// assert_batches_sorted_eq!(expected, &join.collect().await?); /// # Ok(()) /// # } /// ``` - /// pub fn join( self, right: DataFrame, @@ -1256,7 +1294,7 @@ impl DataFrame { /// "+---+---+---+----+----+----+", /// "| a | b | c | a2 | b2 | c2 |", /// "+---+---+---+----+----+----+", - /// "+---+---+---+----+----+----+" + /// "+---+---+---+----+----+----+", /// ]; /// # assert_batches_sorted_eq!(expected, &join_on.collect().await?); /// # Ok(()) @@ -1288,7 +1326,9 @@ impl DataFrame { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let ctx = SessionContext::new(); - /// let df = ctx.read_csv("tests/data/example_long.csv", CsvReadOptions::new()).await?; + /// let df = ctx + /// .read_csv("tests/data/example_long.csv", CsvReadOptions::new()) + /// .await?; /// let df1 = df.repartition(Partitioning::RoundRobinBatch(4))?; /// let expected = vec![ /// "+---+---+---+", @@ -1297,7 +1337,7 @@ impl DataFrame { /// "| 1 | 2 | 3 |", /// "| 4 | 5 | 6 |", /// "| 7 | 8 | 9 |", - /// "+---+---+---+" + /// "+---+---+---+", /// ]; /// # assert_batches_sorted_eq!(expected, &df1.collect().await?); /// # Ok(()) @@ -1326,7 +1366,9 @@ impl DataFrame { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let ctx = SessionContext::new(); - /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?; + /// let df = ctx + /// .read_csv("tests/data/example.csv", CsvReadOptions::new()) + /// .await?; /// let count = df.count().await?; // 1 /// # assert_eq!(count, 1); /// # Ok(()) @@ -1365,7 +1407,9 @@ impl DataFrame { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let ctx = SessionContext::new(); - /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?; + /// let df = ctx + /// .read_csv("tests/data/example.csv", CsvReadOptions::new()) + /// .await?; /// let batches = df.collect().await?; /// # Ok(()) /// # } @@ -1385,7 +1429,9 @@ impl DataFrame { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let ctx = SessionContext::new(); - /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?; + /// let df = ctx + /// .read_csv("tests/data/example.csv", CsvReadOptions::new()) + /// .await?; /// df.show().await?; /// # Ok(()) /// # } @@ -1444,7 +1490,9 @@ impl DataFrame { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let ctx = SessionContext::new(); - /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?; + /// let df = ctx + /// .read_csv("tests/data/example.csv", CsvReadOptions::new()) + /// .await?; /// df.show_limit(10).await?; /// # Ok(()) /// # } @@ -1470,7 +1518,9 @@ impl DataFrame { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let ctx = SessionContext::new(); - /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?; + /// let df = ctx + /// .read_csv("tests/data/example.csv", CsvReadOptions::new()) + /// .await?; /// let stream = df.execute_stream().await?; /// # Ok(()) /// # } @@ -1496,7 +1546,9 @@ impl DataFrame { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let ctx = SessionContext::new(); - /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?; + /// let df = ctx + /// .read_csv("tests/data/example.csv", CsvReadOptions::new()) + /// .await?; /// let batches = df.collect_partitioned().await?; /// # Ok(()) /// # } @@ -1516,7 +1568,9 @@ impl DataFrame { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let ctx = SessionContext::new(); - /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?; + /// let df = ctx + /// .read_csv("tests/data/example.csv", CsvReadOptions::new()) + /// .await?; /// let batches = df.execute_stream_partitioned().await?; /// # Ok(()) /// # } @@ -1545,7 +1599,9 @@ impl DataFrame { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let ctx = SessionContext::new(); - /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?; + /// let df = ctx + /// .read_csv("tests/data/example.csv", CsvReadOptions::new()) + /// .await?; /// let schema = df.schema(); /// # Ok(()) /// # } @@ -1611,8 +1667,14 @@ impl DataFrame { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let ctx = SessionContext::new(); - /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?; - /// let batches = df.limit(0, Some(100))?.explain(false, false)?.collect().await?; + /// let df = ctx + /// .read_csv("tests/data/example.csv", CsvReadOptions::new()) + /// .await?; + /// let batches = df + /// .limit(0, Some(100))? + /// .explain(false, false)? + /// .collect() + /// .await?; /// # Ok(()) /// # } /// ``` @@ -1635,8 +1697,18 @@ impl DataFrame { /// # async fn main() -> Result<()> { /// use datafusion_expr::{Explain, ExplainOption}; /// let ctx = SessionContext::new(); - /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?; - /// let batches = df.limit(0, Some(100))?.explain_with_options(ExplainOption::default().with_verbose(false).with_analyze(false))?.collect().await?; + /// let df = ctx + /// .read_csv("tests/data/example.csv", CsvReadOptions::new()) + /// .await?; + /// let batches = df + /// .limit(0, Some(100))? + /// .explain_with_options( + /// ExplainOption::default() + /// .with_verbose(false) + /// .with_analyze(false), + /// )? + /// .collect() + /// .await?; /// # Ok(()) /// # } /// ``` @@ -1666,7 +1738,9 @@ impl DataFrame { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let ctx = SessionContext::new(); - /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?; + /// let df = ctx + /// .read_csv("tests/data/example.csv", CsvReadOptions::new()) + /// .await?; /// let f = df.registry(); /// // use f.udf("name", vec![...]) to use the udf /// # Ok(()) @@ -1685,15 +1759,19 @@ impl DataFrame { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let ctx = SessionContext::new(); - /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?; - /// let d2 = ctx.read_csv("tests/data/example_long.csv", CsvReadOptions::new()).await?; + /// let df = ctx + /// .read_csv("tests/data/example.csv", CsvReadOptions::new()) + /// .await?; + /// let d2 = ctx + /// .read_csv("tests/data/example_long.csv", CsvReadOptions::new()) + /// .await?; /// let df = df.intersect(d2)?; /// let expected = vec![ /// "+---+---+---+", /// "| a | b | c |", /// "+---+---+---+", /// "| 1 | 2 | 3 |", - /// "+---+---+---+" + /// "+---+---+---+", /// ]; /// # assert_batches_sorted_eq!(expected, &df.collect().await?); /// # Ok(()) @@ -1719,15 +1797,19 @@ impl DataFrame { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let ctx = SessionContext::new(); - /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?; - /// let d2 = ctx.read_csv("tests/data/example_long.csv", CsvReadOptions::new()).await?; + /// let df = ctx + /// .read_csv("tests/data/example.csv", CsvReadOptions::new()) + /// .await?; + /// let d2 = ctx + /// .read_csv("tests/data/example_long.csv", CsvReadOptions::new()) + /// .await?; /// let df = df.intersect_distinct(d2)?; /// let expected = vec![ /// "+---+---+---+", /// "| a | b | c |", /// "+---+---+---+", /// "| 1 | 2 | 3 |", - /// "+---+---+---+" + /// "+---+---+---+", /// ]; /// # assert_batches_sorted_eq!(expected, &df.collect().await?); /// # Ok(()) @@ -1753,8 +1835,12 @@ impl DataFrame { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let ctx = SessionContext::new(); - /// let df = ctx.read_csv("tests/data/example_long.csv", CsvReadOptions::new()).await?; - /// let d2 = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?; + /// let df = ctx + /// .read_csv("tests/data/example_long.csv", CsvReadOptions::new()) + /// .await?; + /// let d2 = ctx + /// .read_csv("tests/data/example.csv", CsvReadOptions::new()) + /// .await?; /// let result = df.except(d2)?; /// // those columns are not in example.csv, but in example_long.csv /// let expected = vec![ @@ -1763,7 +1849,7 @@ impl DataFrame { /// "+---+---+---+", /// "| 4 | 5 | 6 |", /// "| 7 | 8 | 9 |", - /// "+---+---+---+" + /// "+---+---+---+", /// ]; /// # assert_batches_sorted_eq!(expected, &result.collect().await?); /// # Ok(()) @@ -1789,8 +1875,12 @@ impl DataFrame { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let ctx = SessionContext::new(); - /// let df = ctx.read_csv("tests/data/example_long.csv", CsvReadOptions::new()).await?; - /// let d2 = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?; + /// let df = ctx + /// .read_csv("tests/data/example_long.csv", CsvReadOptions::new()) + /// .await?; + /// let d2 = ctx + /// .read_csv("tests/data/example.csv", CsvReadOptions::new()) + /// .await?; /// let result = df.except_distinct(d2)?; /// // those columns are not in example.csv, but in example_long.csv /// let expected = vec![ @@ -1799,7 +1889,7 @@ impl DataFrame { /// "+---+---+---+", /// "| 4 | 5 | 6 |", /// "| 7 | 8 | 9 |", - /// "+---+---+---+" + /// "+---+---+---+", /// ]; /// # assert_batches_sorted_eq!(expected, &result.collect().await?); /// # Ok(()) @@ -1876,13 +1966,15 @@ impl DataFrame { /// use datafusion::dataframe::DataFrameWriteOptions; /// let ctx = SessionContext::new(); /// // Sort the data by column "b" and write it to a new location - /// ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await? - /// .sort(vec![col("b").sort(true, true)])? // sort by b asc, nulls first - /// .write_csv( - /// "output.csv", - /// DataFrameWriteOptions::new(), - /// None, // can also specify CSV writing options here - /// ).await?; + /// ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()) + /// .await? + /// .sort(vec![col("b").sort(true, true)])? // sort by b asc, nulls first + /// .write_csv( + /// "output.csv", + /// DataFrameWriteOptions::new(), + /// None, // can also specify CSV writing options here + /// ) + /// .await?; /// # fs::remove_file("output.csv")?; /// # Ok(()) /// # } @@ -1946,13 +2038,11 @@ impl DataFrame { /// use datafusion::dataframe::DataFrameWriteOptions; /// let ctx = SessionContext::new(); /// // Sort the data by column "b" and write it to a new location - /// ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await? - /// .sort(vec![col("b").sort(true, true)])? // sort by b asc, nulls first - /// .write_json( - /// "output.json", - /// DataFrameWriteOptions::new(), - /// None - /// ).await?; + /// ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()) + /// .await? + /// .sort(vec![col("b").sort(true, true)])? // sort by b asc, nulls first + /// .write_json("output.json", DataFrameWriteOptions::new(), None) + /// .await?; /// # fs::remove_file("output.json")?; /// # Ok(()) /// # } @@ -2013,7 +2103,9 @@ impl DataFrame { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let ctx = SessionContext::new(); - /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?; + /// let df = ctx + /// .read_csv("tests/data/example.csv", CsvReadOptions::new()) + /// .await?; /// let df = df.with_column("ab_sum", col("a") + col("b"))?; /// # Ok(()) /// # } @@ -2080,7 +2172,9 @@ impl DataFrame { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let ctx = SessionContext::new(); - /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?; + /// let df = ctx + /// .read_csv("tests/data/example.csv", CsvReadOptions::new()) + /// .await?; /// let df = df.with_column_renamed("ab_sum", "total")?; /// /// # Ok(()) @@ -2213,7 +2307,9 @@ impl DataFrame { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let ctx = SessionContext::new(); - /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?; + /// let df = ctx + /// .read_csv("tests/data/example.csv", CsvReadOptions::new()) + /// .await?; /// let df = df.cache().await?; /// # Ok(()) /// # } @@ -2257,7 +2353,9 @@ impl DataFrame { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let ctx = SessionContext::new(); - /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?; + /// let df = ctx + /// .read_csv("tests/data/example.csv", CsvReadOptions::new()) + /// .await?; /// // Fill nulls in only columns "a" and "c": /// let df = df.fill_null(ScalarValue::from(0), vec!["a".to_owned(), "c".to_owned()])?; /// // Fill nulls across all columns: @@ -2328,9 +2426,9 @@ impl DataFrame { /// Helper for creating DataFrame. /// # Example /// ``` - /// use std::sync::Arc; /// use arrow::array::{ArrayRef, Int32Array, StringArray}; /// use datafusion::prelude::DataFrame; + /// use std::sync::Arc; /// let id: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3])); /// let name: ArrayRef = Arc::new(StringArray::from(vec!["foo", "bar", "baz"])); /// let df = DataFrame::from_columns(vec![("id", id), ("name", name)]).unwrap(); diff --git a/datafusion/core/src/dataframe/parquet.rs b/datafusion/core/src/dataframe/parquet.rs index 83bb60184fb9..da760e452a99 100644 --- a/datafusion/core/src/dataframe/parquet.rs +++ b/datafusion/core/src/dataframe/parquet.rs @@ -42,13 +42,15 @@ impl DataFrame { /// use datafusion::dataframe::DataFrameWriteOptions; /// let ctx = SessionContext::new(); /// // Sort the data by column "b" and write it to a new location - /// ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await? - /// .sort(vec![col("b").sort(true, true)])? // sort by b asc, nulls first - /// .write_parquet( - /// "output.parquet", - /// DataFrameWriteOptions::new(), - /// None, // can also specify parquet writing options here - /// ).await?; + /// ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()) + /// .await? + /// .sort(vec![col("b").sort(true, true)])? // sort by b asc, nulls first + /// .write_parquet( + /// "output.parquet", + /// DataFrameWriteOptions::new(), + /// None, // can also specify parquet writing options here + /// ) + /// .await?; /// # fs::remove_file("output.parquet")?; /// # Ok(()) /// # } diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs index 121ab46730b5..e1699c4a6c0b 100644 --- a/datafusion/core/src/datasource/listing/table.rs +++ b/datafusion/core/src/datasource/listing/table.rs @@ -82,7 +82,6 @@ pub enum SchemaSource { /// /// If not specified, a [`DefaultSchemaAdapterFactory`] will be used, which handles /// basic schema compatibility cases. -/// #[derive(Debug, Clone, Default)] pub struct ListingTableConfig { /// Paths on the `ObjectStore` for creating `ListingTable`. @@ -894,17 +893,17 @@ impl ListingOptions { /// /// // Create default parquet options /// let file_format = ParquetFormat::new(); -/// let listing_options = ListingOptions::new(Arc::new(file_format)) -/// .with_file_extension(".parquet"); +/// let listing_options = +/// ListingOptions::new(Arc::new(file_format)).with_file_extension(".parquet"); /// /// // Resolve the schema /// let resolved_schema = listing_options -/// .infer_schema(&session_state, &table_path) -/// .await?; +/// .infer_schema(&session_state, &table_path) +/// .await?; /// /// let config = ListingTableConfig::new(table_path) -/// .with_listing_options(listing_options) -/// .with_schema(resolved_schema); +/// .with_listing_options(listing_options) +/// .with_schema(resolved_schema); /// /// // Create a new TableProvider /// let provider = Arc::new(ListingTable::try_new(config)?); diff --git a/datafusion/core/src/execution/context/csv.rs b/datafusion/core/src/execution/context/csv.rs index 15d6d21f038a..e6f95886e91d 100644 --- a/datafusion/core/src/execution/context/csv.rs +++ b/datafusion/core/src/execution/context/csv.rs @@ -37,9 +37,16 @@ impl SessionContext { /// # async fn main() -> Result<()> { /// let ctx = SessionContext::new(); /// // You can read a single file using `read_csv` - /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?; + /// let df = ctx + /// .read_csv("tests/data/example.csv", CsvReadOptions::new()) + /// .await?; /// // you can also read multiple files: - /// let df = ctx.read_csv(vec!["tests/data/example.csv", "tests/data/example.csv"], CsvReadOptions::new()).await?; + /// let df = ctx + /// .read_csv( + /// vec!["tests/data/example.csv", "tests/data/example.csv"], + /// CsvReadOptions::new(), + /// ) + /// .await?; /// # Ok(()) /// # } /// ``` diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index ea8850d3b66c..1f17398ad82c 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -164,22 +164,23 @@ where /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let ctx = SessionContext::new(); -/// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?; -/// let df = df.filter(col("a").lt_eq(col("b")))? -/// .aggregate(vec![col("a")], vec![min(col("b"))])? -/// .limit(0, Some(100))?; -/// let results = df -/// .collect() -/// .await?; +/// let df = ctx +/// .read_csv("tests/data/example.csv", CsvReadOptions::new()) +/// .await?; +/// let df = df +/// .filter(col("a").lt_eq(col("b")))? +/// .aggregate(vec![col("a")], vec![min(col("b"))])? +/// .limit(0, Some(100))?; +/// let results = df.collect().await?; /// assert_batches_eq!( -/// &[ -/// "+---+----------------+", -/// "| a | min(?table?.b) |", -/// "+---+----------------+", -/// "| 1 | 2 |", -/// "+---+----------------+", -/// ], -/// &results +/// &[ +/// "+---+----------------+", +/// "| a | min(?table?.b) |", +/// "+---+----------------+", +/// "| 1 | 2 |", +/// "+---+----------------+", +/// ], +/// &results /// ); /// # Ok(()) /// # } @@ -195,21 +196,22 @@ where /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let ctx = SessionContext::new(); -/// ctx.register_csv("example", "tests/data/example.csv", CsvReadOptions::new()).await?; +/// ctx.register_csv("example", "tests/data/example.csv", CsvReadOptions::new()) +/// .await?; /// let results = ctx -/// .sql("SELECT a, min(b) FROM example GROUP BY a LIMIT 100") -/// .await? -/// .collect() -/// .await?; +/// .sql("SELECT a, min(b) FROM example GROUP BY a LIMIT 100") +/// .await? +/// .collect() +/// .await?; /// assert_batches_eq!( -/// &[ -/// "+---+----------------+", -/// "| a | min(example.b) |", -/// "+---+----------------+", -/// "| 1 | 2 |", -/// "+---+----------------+", -/// ], -/// &results +/// &[ +/// "+---+----------------+", +/// "| a | min(example.b) |", +/// "+---+----------------+", +/// "| 1 | 2 |", +/// "+---+----------------+", +/// ], +/// &results /// ); /// # Ok(()) /// # } @@ -229,18 +231,18 @@ where /// let config = SessionConfig::new().with_batch_size(4 * 1024); /// /// // configure a memory limit of 1GB with 20% slop -/// let runtime_env = RuntimeEnvBuilder::new() +/// let runtime_env = RuntimeEnvBuilder::new() /// .with_memory_limit(1024 * 1024 * 1024, 0.80) /// .build_arc() /// .unwrap(); /// /// // Create a SessionState using the config and runtime_env /// let state = SessionStateBuilder::new() -/// .with_config(config) -/// .with_runtime_env(runtime_env) -/// // include support for built in functions and configurations -/// .with_default_features() -/// .build(); +/// .with_config(config) +/// .with_runtime_env(runtime_env) +/// // include support for built in functions and configurations +/// .with_default_features() +/// .build(); /// /// // Create a SessionContext /// let ctx = SessionContext::from(state); @@ -426,16 +428,14 @@ impl SessionContext { /// # use datafusion::prelude::*; /// # use datafusion::execution::SessionStateBuilder; /// # use datafusion_optimizer::push_down_filter::PushDownFilter; - /// let my_rule = PushDownFilter{}; // pretend it is a new rule - /// // Create a new builder with a custom optimizer rule + /// let my_rule = PushDownFilter {}; // pretend it is a new rule + /// // Create a new builder with a custom optimizer rule /// let context: SessionContext = SessionStateBuilder::new() - /// .with_optimizer_rule(Arc::new(my_rule)) - /// .build() - /// .into(); + /// .with_optimizer_rule(Arc::new(my_rule)) + /// .build() + /// .into(); /// // Enable local file access and convert context back to a builder - /// let builder = context - /// .enable_url_table() - /// .into_state_builder(); + /// let builder = context.enable_url_table().into_state_builder(); /// ``` pub fn into_state_builder(self) -> SessionStateBuilder { let SessionContext { @@ -576,11 +576,10 @@ impl SessionContext { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let ctx = SessionContext::new(); - /// ctx - /// .sql("CREATE TABLE foo (x INTEGER)") - /// .await? - /// .collect() - /// .await?; + /// ctx.sql("CREATE TABLE foo (x INTEGER)") + /// .await? + /// .collect() + /// .await?; /// assert!(ctx.table_exist("foo").unwrap()); /// # Ok(()) /// # } @@ -604,14 +603,14 @@ impl SessionContext { /// # #[tokio::main] /// # async fn main() -> Result<()> { /// let ctx = SessionContext::new(); - /// let options = SQLOptions::new() - /// .with_allow_ddl(false); - /// let err = ctx.sql_with_options("CREATE TABLE foo (x INTEGER)", options) - /// .await - /// .unwrap_err(); - /// assert!( - /// err.to_string().starts_with("Error during planning: DDL not supported: CreateMemoryTable") - /// ); + /// let options = SQLOptions::new().with_allow_ddl(false); + /// let err = ctx + /// .sql_with_options("CREATE TABLE foo (x INTEGER)", options) + /// .await + /// .unwrap_err(); + /// assert!(err + /// .to_string() + /// .starts_with("Error during planning: DDL not supported: CreateMemoryTable")); /// # Ok(()) /// # } /// ``` @@ -642,8 +641,7 @@ impl SessionContext { /// // provide type information that `a` is an Int32 /// let schema = Schema::new(vec![Field::new("a", DataType::Int32, true)]); /// let df_schema = DFSchema::try_from(schema).unwrap(); - /// let expr = SessionContext::new() - /// .parse_sql_expr(sql, &df_schema)?; + /// let expr = SessionContext::new().parse_sql_expr(sql, &df_schema)?; /// assert_eq!(expected, expr); /// # Ok(()) /// # } @@ -1083,8 +1081,14 @@ impl SessionContext { /// ``` /// use datafusion::execution::context::SessionContext; /// - /// assert_eq!(SessionContext::parse_memory_limit("1M").unwrap(), 1024 * 1024); - /// assert_eq!(SessionContext::parse_memory_limit("1.5G").unwrap(), (1.5 * 1024.0 * 1024.0 * 1024.0) as usize); + /// assert_eq!( + /// SessionContext::parse_memory_limit("1M").unwrap(), + /// 1024 * 1024 + /// ); + /// assert_eq!( + /// SessionContext::parse_memory_limit("1.5G").unwrap(), + /// (1.5 * 1024.0 * 1024.0 * 1024.0) as usize + /// ); /// ``` pub fn parse_memory_limit(limit: &str) -> Result { let (number, unit) = limit.split_at(limit.len() - 1); diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs index 364ad75b0869..efe4ab13ab74 100644 --- a/datafusion/core/src/execution/session_state.rs +++ b/datafusion/core/src/execution/session_state.rs @@ -108,12 +108,12 @@ use uuid::Uuid; /// # use std::sync::Arc; /// # #[tokio::main] /// # async fn main() -> Result<()> { -/// let state = SessionStateBuilder::new() -/// .with_config(SessionConfig::new()) -/// .with_runtime_env(Arc::new(RuntimeEnv::default())) -/// .with_default_features() -/// .build(); -/// Ok(()) +/// let state = SessionStateBuilder::new() +/// .with_config(SessionConfig::new()) +/// .with_runtime_env(Arc::new(RuntimeEnv::default())) +/// .with_default_features() +/// .build(); +/// Ok(()) /// # } /// ``` /// @@ -1300,7 +1300,7 @@ impl SessionStateBuilder { /// let url = Url::try_from("file://").unwrap(); /// let object_store = object_store::local::LocalFileSystem::new(); /// let state = SessionStateBuilder::new() - /// .with_config(SessionConfig::new()) + /// .with_config(SessionConfig::new()) /// .with_object_store(&url, Arc::new(object_store)) /// .with_default_features() /// .build(); diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs index dc9f7cf1cc18..0effa519519c 100644 --- a/datafusion/core/src/lib.rs +++ b/datafusion/core/src/lib.rs @@ -86,26 +86,29 @@ //! let ctx = SessionContext::new(); //! //! // create the dataframe -//! let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?; +//! let df = ctx +//! .read_csv("tests/data/example.csv", CsvReadOptions::new()) +//! .await?; //! //! // create a plan -//! let df = df.filter(col("a").lt_eq(col("b")))? -//! .aggregate(vec![col("a")], vec![min(col("b"))])? -//! .limit(0, Some(100))?; +//! let df = df +//! .filter(col("a").lt_eq(col("b")))? +//! .aggregate(vec![col("a")], vec![min(col("b"))])? +//! .limit(0, Some(100))?; //! //! // execute the plan //! let results: Vec = df.collect().await?; //! //! // format the results -//! let pretty_results = arrow::util::pretty::pretty_format_batches(&results)? -//! .to_string(); +//! let pretty_results = +//! arrow::util::pretty::pretty_format_batches(&results)?.to_string(); //! //! let expected = vec![ //! "+---+----------------+", //! "| a | min(?table?.b) |", //! "+---+----------------+", //! "| 1 | 2 |", -//! "+---+----------------+" +//! "+---+----------------+", //! ]; //! //! assert_eq!(pretty_results.trim().lines().collect::>(), expected); @@ -126,24 +129,27 @@ //! # async fn main() -> Result<()> { //! let ctx = SessionContext::new(); //! -//! ctx.register_csv("example", "tests/data/example.csv", CsvReadOptions::new()).await?; +//! ctx.register_csv("example", "tests/data/example.csv", CsvReadOptions::new()) +//! .await?; //! //! // create a plan -//! let df = ctx.sql("SELECT a, MIN(b) FROM example WHERE a <= b GROUP BY a LIMIT 100").await?; +//! let df = ctx +//! .sql("SELECT a, MIN(b) FROM example WHERE a <= b GROUP BY a LIMIT 100") +//! .await?; //! //! // execute the plan //! let results: Vec = df.collect().await?; //! //! // format the results -//! let pretty_results = arrow::util::pretty::pretty_format_batches(&results)? -//! .to_string(); +//! let pretty_results = +//! arrow::util::pretty::pretty_format_batches(&results)?.to_string(); //! //! let expected = vec![ //! "+---+----------------+", //! "| a | min(example.b) |", //! "+---+----------------+", //! "| 1 | 2 |", -//! "+---+----------------+" +//! "+---+----------------+", //! ]; //! //! assert_eq!(pretty_results.trim().lines().collect::>(), expected); @@ -458,7 +464,7 @@ //! consumes it immediately as well. //! //! ```text -//! +//! //! Step 3: FilterExec calls next() Step 2: ProjectionExec calls //! on input Stream next() on input Stream //! ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┐ @@ -607,7 +613,7 @@ //! └─────────────┘ ┗━━━━━━━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━┻━━━━━━━━━━━━━━┛ //! ─────────────────────────────────────────────────────────────▶ //! time -//!``` +//! ``` //! //! Note that DataFusion does not use [`tokio::task::spawn_blocking`] for //! CPU-bounded work, because `spawn_blocking` is designed for blocking **IO**, diff --git a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/context_generator.rs b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/context_generator.rs index 2abfcd8417cb..fa8ea0b31c02 100644 --- a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/context_generator.rs +++ b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/context_generator.rs @@ -44,7 +44,6 @@ use crate::fuzz_cases::aggregation_fuzzer::data_generator::Dataset; /// - hint `sorted` or not /// - `spilling` or not (TODO, I think a special `MemoryPool` may be needed /// to support this) -/// pub struct SessionContextGenerator { /// Current testing dataset dataset: Arc, diff --git a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/data_generator.rs b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/data_generator.rs index 753a74995d8f..aaf2d1b9bad4 100644 --- a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/data_generator.rs +++ b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/data_generator.rs @@ -39,7 +39,6 @@ use crate::fuzz_cases::record_batch_generator::{ColumnDescr, RecordBatchGenerato /// will generate one `base dataset` firstly. Then the `base dataset` will be sorted /// based on each `sort_key` respectively. And finally `len(sort_keys) + 1` datasets /// will be returned -/// #[derive(Debug, Clone)] pub struct DatasetGeneratorConfig { /// Descriptions of columns in datasets, it's `required` @@ -115,7 +114,6 @@ impl DatasetGeneratorConfig { /// /// - Split each batch to multiple batches which each sub-batch in has the randomly `rows num`, /// and this multiple batches will be used to create the `Dataset`. -/// pub struct DatasetGenerator { batch_generator: RecordBatchGenerator, sort_keys_set: Vec>, diff --git a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs index 564232651424..2801acccad4a 100644 --- a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs +++ b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs @@ -253,7 +253,6 @@ impl AggregationFuzzer { /// /// - `dataset_ref`, the input dataset, store it for error reported when found /// the inconsistency between the one for `ctx` and `expected results`. -/// struct AggregationFuzzTestTask { /// Generated session context in current test case ctx_with_params: SessionContextWithParams, diff --git a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/query_builder.rs b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/query_builder.rs index 209278385b7b..766e2bedd74c 100644 --- a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/query_builder.rs +++ b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/query_builder.rs @@ -24,7 +24,7 @@ use rand::{rng, seq::SliceRandom, Rng}; /// Creates queries like /// ```sql /// SELECT AGG(..) FROM table_name GROUP BY -///``` +/// ``` #[derive(Debug, Default, Clone)] pub struct QueryBuilder { // =================================== @@ -95,7 +95,6 @@ pub struct QueryBuilder { /// More details can see [`GroupOrdering`]. /// /// [`GroupOrdering`]: datafusion_physical_plan::aggregates::order::GroupOrdering - /// dataset_sort_keys: Vec>, /// If we will also test the no grouping case like: @@ -103,7 +102,6 @@ pub struct QueryBuilder { /// ```text /// SELECT aggr FROM t; /// ``` - /// no_grouping: bool, // ==================================== diff --git a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs index e31a30cc0883..72ecb260878b 100644 --- a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs +++ b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs @@ -90,7 +90,6 @@ fn csv_exec_sorted( /// `$EXPECTED_OPTIMIZED_PLAN_LINES`: optimized plan /// `$PLAN`: the plan to optimized /// `REPARTITION_SORTS`: Flag to set `config.options.optimizer.repartition_sorts` option. -/// macro_rules! assert_optimized { ($EXPECTED_PLAN_LINES: expr, $EXPECTED_OPTIMIZED_PLAN_LINES: expr, $PLAN: expr, $REPARTITION_SORTS: expr) => { let mut config = ConfigOptions::new(); diff --git a/datafusion/core/tests/sql/mod.rs b/datafusion/core/tests/sql/mod.rs index e212ee269b15..743c8750b521 100644 --- a/datafusion/core/tests/sql/mod.rs +++ b/datafusion/core/tests/sql/mod.rs @@ -43,7 +43,6 @@ use tempfile::TempDir; /// A macro to assert that some particular line contains two substrings /// /// Usage: `assert_metrics!(actual, operator_name, metrics)` -/// macro_rules! assert_metrics { ($ACTUAL: expr, $OPERATOR_NAME: expr, $METRICS: expr) => { let found = $ACTUAL diff --git a/datafusion/core/tests/user_defined/user_defined_plan.rs b/datafusion/core/tests/user_defined/user_defined_plan.rs index 4d3916c1760e..c1026bb5e202 100644 --- a/datafusion/core/tests/user_defined/user_defined_plan.rs +++ b/datafusion/core/tests/user_defined/user_defined_plan.rs @@ -56,7 +56,6 @@ //! //! The same answer can be produced by simply keeping track of the top //! N elements, reducing the total amount of required buffer memory. -//! use std::fmt::Debug; use std::hash::Hash; diff --git a/datafusion/datasource-avro/src/avro_to_arrow/reader.rs b/datafusion/datasource-avro/src/avro_to_arrow/reader.rs index 7f5900605a06..e4c25ee144f5 100644 --- a/datafusion/datasource-avro/src/avro_to_arrow/reader.rs +++ b/datafusion/datasource-avro/src/avro_to_arrow/reader.rs @@ -64,13 +64,9 @@ impl ReaderBuilder { /// let file = File::open("test/data/basic.avro").unwrap(); /// /// // create a builder, inferring the schema with the first 100 records - /// let builder = ReaderBuilder::new() - /// .read_schema() - /// .with_batch_size(100); + /// let builder = ReaderBuilder::new().read_schema().with_batch_size(100); /// - /// let reader = builder - /// .build::(file) - /// .unwrap(); + /// let reader = builder.build::(file).unwrap(); /// /// reader /// } diff --git a/datafusion/datasource-parquet/src/page_filter.rs b/datafusion/datasource-parquet/src/page_filter.rs index 5f3e05747d40..613e947e8063 100644 --- a/datafusion/datasource-parquet/src/page_filter.rs +++ b/datafusion/datasource-parquet/src/page_filter.rs @@ -90,7 +90,6 @@ use parquet::{ /// ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━┛ /// /// Total rows: 300 -/// /// ``` /// /// Given the predicate `A > 35 AND B = 'F'`: diff --git a/datafusion/datasource-parquet/src/source.rs b/datafusion/datasource-parquet/src/source.rs index 76a1a8c08acb..0e16a96aaa64 100644 --- a/datafusion/datasource-parquet/src/source.rs +++ b/datafusion/datasource-parquet/src/source.rs @@ -80,7 +80,6 @@ use object_store::ObjectStore; /// │.───────────────────.│ /// │ ) /// `───────────────────' -/// /// ``` /// /// # Example: Create a `DataSourceExec` @@ -338,7 +337,6 @@ impl ParquetSource { } /// Optional user defined parquet file reader factory. - /// pub fn with_parquet_file_reader_factory( mut self, parquet_file_reader_factory: Arc, diff --git a/datafusion/datasource/src/file_scan_config.rs b/datafusion/datasource/src/file_scan_config.rs index 95cc9e24b645..33c7faa010b7 100644 --- a/datafusion/datasource/src/file_scan_config.rs +++ b/datafusion/datasource/src/file_scan_config.rs @@ -1274,25 +1274,25 @@ fn create_output_array( /// correctly sorted on `(A, B, C)` /// /// ```text -///┏ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ┓ +/// ┏ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ┓ /// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┐ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ┐ -///┃ ┌───────────────┐ ┌──────────────┐ │ ┌──────────────┐ │ ┌─────────────┐ ┃ +/// ┃ ┌───────────────┐ ┌──────────────┐ │ ┌──────────────┐ │ ┌─────────────┐ ┃ /// │ │ 1.parquet │ │ │ │ 2.parquet │ │ │ 3.parquet │ │ │ 4.parquet │ │ -///┃ │ Sort: A, B, C │ │Sort: A, B, C │ │ │Sort: A, B, C │ │ │Sort: A, B, C│ ┃ +/// ┃ │ Sort: A, B, C │ │Sort: A, B, C │ │ │Sort: A, B, C │ │ │Sort: A, B, C│ ┃ /// │ └───────────────┘ │ │ └──────────────┘ │ └──────────────┘ │ └─────────────┘ │ -///┃ │ │ ┃ +/// ┃ │ │ ┃ /// │ │ │ │ │ │ -///┃ │ │ ┃ +/// ┃ │ │ ┃ /// │ │ │ │ │ │ -///┃ │ │ ┃ +/// ┃ │ │ ┃ /// │ │ │ │ │ │ -///┃ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┃ +/// ┃ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┃ /// DataFusion DataFusion DataFusion DataFusion -///┃ Partition 1 Partition 2 Partition 3 Partition 4 ┃ +/// ┃ Partition 1 Partition 2 Partition 3 Partition 4 ┃ /// ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ /// /// DataSourceExec -///``` +/// ``` /// /// However, when more than 1 file is assigned to each partition, each /// partition is NOT correctly sorted on `(A, B, C)`. Once the second @@ -1300,25 +1300,25 @@ fn create_output_array( /// the same sorted stream /// ///```text -///┏ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ +/// ┏ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ /// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┐ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┃ -///┃ ┌───────────────┐ ┌──────────────┐ │ +/// ┃ ┌───────────────┐ ┌──────────────┐ │ /// │ │ 1.parquet │ │ │ │ 2.parquet │ ┃ -///┃ │ Sort: A, B, C │ │Sort: A, B, C │ │ +/// ┃ │ Sort: A, B, C │ │Sort: A, B, C │ │ /// │ └───────────────┘ │ │ └──────────────┘ ┃ -///┃ ┌───────────────┐ ┌──────────────┐ │ +/// ┃ ┌───────────────┐ ┌──────────────┐ │ /// │ │ 3.parquet │ │ │ │ 4.parquet │ ┃ -///┃ │ Sort: A, B, C │ │Sort: A, B, C │ │ +/// ┃ │ Sort: A, B, C │ │Sort: A, B, C │ │ /// │ └───────────────┘ │ │ └──────────────┘ ┃ -///┃ │ +/// ┃ │ /// │ │ │ ┃ -///┃ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘ +/// ┃ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘ /// DataFusion DataFusion ┃ -///┃ Partition 1 Partition 2 +/// ┃ Partition 1 Partition 2 /// ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ┛ /// /// DataSourceExec -///``` +/// ``` fn get_projected_output_ordering( base_config: &FileScanConfig, projected_schema: &SchemaRef, diff --git a/datafusion/datasource/src/mod.rs b/datafusion/datasource/src/mod.rs index 3cd4a1a6c1c9..e26a7d50fd0a 100644 --- a/datafusion/datasource/src/mod.rs +++ b/datafusion/datasource/src/mod.rs @@ -310,7 +310,6 @@ pub async fn calculate_range( /// Returns a `Result` wrapping a `usize` that represents the position of the first newline character found within the specified range. If no newline is found, it returns the length of the scanned data, effectively indicating the end of the range. /// /// The function returns an `Error` if any issues arise while reading from the object store or processing the data stream. -/// async fn find_first_newline( object_store: &Arc, location: &Path, diff --git a/datafusion/datasource/src/url.rs b/datafusion/datasource/src/url.rs index 1dc12f7d1d06..68c026967b9e 100644 --- a/datafusion/datasource/src/url.rs +++ b/datafusion/datasource/src/url.rs @@ -361,7 +361,6 @@ const GLOB_START_CHARS: [char; 3] = ['?', '*', '[']; /// /// Path delimiters are determined using [`std::path::is_separator`] which /// permits `/` as a path delimiter even on Windows platforms. -/// #[cfg(not(target_arch = "wasm32"))] fn split_glob_expression(path: &str) -> Option<(&str, &str)> { let mut last_separator = 0; diff --git a/datafusion/datasource/src/write/mod.rs b/datafusion/datasource/src/write/mod.rs index 3694568682a5..85832f81bc18 100644 --- a/datafusion/datasource/src/write/mod.rs +++ b/datafusion/datasource/src/write/mod.rs @@ -162,7 +162,11 @@ impl ObjectWriterBuilder { /// # let object_store = Arc::new(InMemory::new()); /// let mut builder = ObjectWriterBuilder::new(compression_type, &location, object_store); /// builder.set_buffer_size(Some(20 * 1024 * 1024)); //20 MiB - /// assert_eq!(builder.get_buffer_size(), Some(20 * 1024 * 1024), "Internal error: Builder buffer size doesn't match"); + /// assert_eq!( + /// builder.get_buffer_size(), + /// Some(20 * 1024 * 1024), + /// "Internal error: Builder buffer size doesn't match" + /// ); /// ``` pub fn set_buffer_size(&mut self, buffer_size: Option) { self.buffer_size = buffer_size; @@ -182,7 +186,11 @@ impl ObjectWriterBuilder { /// # let object_store = Arc::new(InMemory::new()); /// let builder = ObjectWriterBuilder::new(compression_type, &location, object_store) /// .with_buffer_size(Some(20 * 1024 * 1024)); //20 MiB - /// assert_eq!(builder.get_buffer_size(), Some(20 * 1024 * 1024), "Internal error: Builder buffer size doesn't match"); + /// assert_eq!( + /// builder.get_buffer_size(), + /// Some(20 * 1024 * 1024), + /// "Internal error: Builder buffer size doesn't match" + /// ); /// ``` pub fn with_buffer_size(mut self, buffer_size: Option) -> Self { self.buffer_size = buffer_size; diff --git a/datafusion/doc/src/lib.rs b/datafusion/doc/src/lib.rs index ca74c3b06d6d..bf97a97e5f11 100644 --- a/datafusion/doc/src/lib.rs +++ b/datafusion/doc/src/lib.rs @@ -188,7 +188,7 @@ impl Default for DocSection { /// Example: /// /// ```rust -/// +/// /// # fn main() { /// use datafusion_doc::{DocSection, Documentation}; /// let doc_section = DocSection { diff --git a/datafusion/execution/src/config.rs b/datafusion/execution/src/config.rs index c1ee2820c0b4..5ece7e839ba2 100644 --- a/datafusion/execution/src/config.rs +++ b/datafusion/execution/src/config.rs @@ -44,12 +44,15 @@ use datafusion_common::{ /// shorthand for setting `datafusion.execution.batch_size`. /// /// ``` -/// use datafusion_execution::config::SessionConfig; /// use datafusion_common::ScalarValue; +/// use datafusion_execution::config::SessionConfig; /// /// let config = SessionConfig::new() -/// .set("datafusion.execution.batch_size", &ScalarValue::UInt64(Some(1234))) -/// .set_bool("datafusion.execution.parquet.pushdown_filters", true); +/// .set( +/// "datafusion.execution.batch_size", +/// &ScalarValue::UInt64(Some(1234)), +/// ) +/// .set_bool("datafusion.execution.parquet.pushdown_filters", true); /// /// assert_eq!(config.batch_size(), 1234); /// assert_eq!(config.options().execution.batch_size, 1234); @@ -493,8 +496,8 @@ impl SessionConfig { /// /// # Example /// ``` - /// use std::sync::Arc; /// use datafusion_execution::config::SessionConfig; + /// use std::sync::Arc; /// /// // application-specific extension types /// struct Ext1(u8); @@ -536,8 +539,8 @@ impl SessionConfig { /// /// # Example /// ``` - /// use std::sync::Arc; /// use datafusion_execution::config::SessionConfig; + /// use std::sync::Arc; /// /// // application-specific extension types /// struct Ext1(u8); diff --git a/datafusion/execution/src/runtime_env.rs b/datafusion/execution/src/runtime_env.rs index b086430a4ef7..b018cf030952 100644 --- a/datafusion/execution/src/runtime_env.rs +++ b/datafusion/execution/src/runtime_env.rs @@ -65,9 +65,9 @@ use url::Url; /// // restrict to using at most 100MB of memory /// let pool_size = 100 * 1024 * 1024; /// let runtime_env = RuntimeEnvBuilder::new() -/// .with_memory_pool(Arc::new(GreedyMemoryPool::new(pool_size))) -/// .build() -/// .unwrap(); +/// .with_memory_pool(Arc::new(GreedyMemoryPool::new(pool_size))) +/// .build() +/// .unwrap(); /// ``` pub struct RuntimeEnv { /// Runtime memory management diff --git a/datafusion/expr-common/src/interval_arithmetic.rs b/datafusion/expr-common/src/interval_arithmetic.rs index d656c676bd01..65ebec33c602 100644 --- a/datafusion/expr-common/src/interval_arithmetic.rs +++ b/datafusion/expr-common/src/interval_arithmetic.rs @@ -1659,22 +1659,23 @@ fn cast_scalar_value( /// /// // [1, 2) U {NULL} /// let maybe_null = NullableInterval::MaybeNull { -/// values: Interval::try_new( -/// ScalarValue::Int32(Some(1)), -/// ScalarValue::Int32(Some(2)), -/// ).unwrap(), +/// values: Interval::try_new( +/// ScalarValue::Int32(Some(1)), +/// ScalarValue::Int32(Some(2)), +/// ) +/// .unwrap(), /// }; /// /// // (0, ∞) /// let not_null = NullableInterval::NotNull { -/// values: Interval::try_new( -/// ScalarValue::Int32(Some(0)), -/// ScalarValue::Int32(None), -/// ).unwrap(), +/// values: Interval::try_new(ScalarValue::Int32(Some(0)), ScalarValue::Int32(None)) +/// .unwrap(), /// }; /// /// // {NULL} -/// let null_interval = NullableInterval::Null { datatype: DataType::Int32 }; +/// let null_interval = NullableInterval::Null { +/// datatype: DataType::Int32, +/// }; /// /// // {4} /// let single_value = NullableInterval::from(ScalarValue::Int32(Some(4))); @@ -1776,22 +1777,26 @@ impl NullableInterval { /// /// ``` /// use datafusion_common::ScalarValue; - /// use datafusion_expr_common::operator::Operator; /// use datafusion_expr_common::interval_arithmetic::Interval; /// use datafusion_expr_common::interval_arithmetic::NullableInterval; + /// use datafusion_expr_common::operator::Operator; /// /// // 4 > 3 -> true /// let lhs = NullableInterval::from(ScalarValue::Int32(Some(4))); /// let rhs = NullableInterval::from(ScalarValue::Int32(Some(3))); /// let result = lhs.apply_operator(&Operator::Gt, &rhs).unwrap(); - /// assert_eq!(result, NullableInterval::from(ScalarValue::Boolean(Some(true)))); + /// assert_eq!( + /// result, + /// NullableInterval::from(ScalarValue::Boolean(Some(true))) + /// ); /// /// // [1, 3) > NULL -> NULL /// let lhs = NullableInterval::NotNull { /// values: Interval::try_new( - /// ScalarValue::Int32(Some(1)), - /// ScalarValue::Int32(Some(3)), - /// ).unwrap(), + /// ScalarValue::Int32(Some(1)), + /// ScalarValue::Int32(Some(3)), + /// ) + /// .unwrap(), /// }; /// let rhs = NullableInterval::from(ScalarValue::Int32(None)); /// let result = lhs.apply_operator(&Operator::Gt, &rhs).unwrap(); @@ -1800,22 +1805,27 @@ impl NullableInterval { /// // [1, 3] > [2, 4] -> [false, true] /// let lhs = NullableInterval::NotNull { /// values: Interval::try_new( - /// ScalarValue::Int32(Some(1)), - /// ScalarValue::Int32(Some(3)), - /// ).unwrap(), + /// ScalarValue::Int32(Some(1)), + /// ScalarValue::Int32(Some(3)), + /// ) + /// .unwrap(), /// }; /// let rhs = NullableInterval::NotNull { - /// values: Interval::try_new( - /// ScalarValue::Int32(Some(2)), - /// ScalarValue::Int32(Some(4)), - /// ).unwrap(), + /// values: Interval::try_new( + /// ScalarValue::Int32(Some(2)), + /// ScalarValue::Int32(Some(4)), + /// ) + /// .unwrap(), /// }; /// let result = lhs.apply_operator(&Operator::Gt, &rhs).unwrap(); /// // Both inputs are valid (non-null), so result must be non-null - /// assert_eq!(result, NullableInterval::NotNull { - /// // Uncertain whether inequality is true or false - /// values: Interval::UNCERTAIN, - /// }); + /// assert_eq!( + /// result, + /// NullableInterval::NotNull { + /// // Uncertain whether inequality is true or false + /// values: Interval::UNCERTAIN, + /// } + /// ); /// ``` pub fn apply_operator(&self, op: &Operator, rhs: &Self) -> Result { match op { @@ -1913,7 +1923,8 @@ impl NullableInterval { /// values: Interval::try_new( /// ScalarValue::Int32(Some(1)), /// ScalarValue::Int32(Some(4)), - /// ).unwrap(), + /// ) + /// .unwrap(), /// }; /// assert_eq!(interval.single_value(), None); /// ``` diff --git a/datafusion/expr-common/src/signature.rs b/datafusion/expr-common/src/signature.rs index 5e1705d8ff61..9d22ed86dad2 100644 --- a/datafusion/expr-common/src/signature.rs +++ b/datafusion/expr-common/src/signature.rs @@ -89,11 +89,11 @@ pub enum Volatility { /// # use arrow::datatypes::{DataType, TimeUnit}; /// # use datafusion_expr_common::signature::{TIMEZONE_WILDCARD, TypeSignature}; /// let type_signature = TypeSignature::Exact(vec![ -/// // A nanosecond precision timestamp with ANY timezone -/// // matches Timestamp(Nanosecond, Some("+0:00")) -/// // matches Timestamp(Nanosecond, Some("+5:00")) -/// // does not match Timestamp(Nanosecond, None) -/// DataType::Timestamp(TimeUnit::Nanosecond, Some(TIMEZONE_WILDCARD.into())), +/// // A nanosecond precision timestamp with ANY timezone +/// // matches Timestamp(Nanosecond, Some("+0:00")) +/// // matches Timestamp(Nanosecond, Some("+5:00")) +/// // does not match Timestamp(Nanosecond, None) +/// DataType::Timestamp(TimeUnit::Nanosecond, Some(TIMEZONE_WILDCARD.into())), /// ]); /// ``` #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)] @@ -562,8 +562,8 @@ fn get_data_types(native_type: &NativeType) -> Vec { /// # Examples /// /// ``` +/// use datafusion_common::types::{logical_binary, logical_string, NativeType}; /// use datafusion_expr_common::signature::{Coercion, TypeSignatureClass}; -/// use datafusion_common::types::{NativeType, logical_binary, logical_string}; /// /// // Exact coercion that only accepts timestamp types /// let exact = Coercion::new_exact(TypeSignatureClass::Timestamp); @@ -572,7 +572,7 @@ fn get_data_types(native_type: &NativeType) -> Vec { /// let implicit = Coercion::new_implicit( /// TypeSignatureClass::Native(logical_string()), /// vec![TypeSignatureClass::Native(logical_binary())], -/// NativeType::String +/// NativeType::String, /// ); /// ``` /// diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 0749ff0e98b7..37b4cae2a5ec 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -132,11 +132,11 @@ use sqlparser::ast::{ /// # use datafusion_expr::{lit, col, Operator, Expr}; /// // Use the `+` operator to add two columns together /// let expr = col("c1") + col("c2"); -/// assert!(matches!(expr, Expr::BinaryExpr { ..} )); +/// assert!(matches!(expr, Expr::BinaryExpr { .. })); /// if let Expr::BinaryExpr(binary_expr) = expr { -/// assert_eq!(*binary_expr.left, col("c1")); -/// assert_eq!(*binary_expr.right, col("c2")); -/// assert_eq!(binary_expr.op, Operator::Plus); +/// assert_eq!(*binary_expr.left, col("c1")); +/// assert_eq!(*binary_expr.right, col("c2")); +/// assert_eq!(binary_expr.op, Operator::Plus); /// } /// ``` /// @@ -147,12 +147,12 @@ use sqlparser::ast::{ /// # use datafusion_common::ScalarValue; /// # use datafusion_expr::{lit, col, Operator, Expr}; /// let expr = col("c1").eq(lit(42_i32)); -/// assert!(matches!(expr, Expr::BinaryExpr { .. } )); +/// assert!(matches!(expr, Expr::BinaryExpr { .. })); /// if let Expr::BinaryExpr(binary_expr) = expr { -/// assert_eq!(*binary_expr.left, col("c1")); -/// let scalar = ScalarValue::Int32(Some(42)); -/// assert_eq!(*binary_expr.right, Expr::Literal(scalar, None)); -/// assert_eq!(binary_expr.op, Operator::Eq); +/// assert_eq!(*binary_expr.left, col("c1")); +/// let scalar = ScalarValue::Int32(Some(42)); +/// assert_eq!(*binary_expr.right, Expr::Literal(scalar, None)); +/// assert_eq!(binary_expr.op, Operator::Eq); /// } /// ``` /// @@ -165,22 +165,22 @@ use sqlparser::ast::{ /// # use datafusion_expr::Expr; /// // Create a schema c1(int, c2 float) /// let arrow_schema = Schema::new(vec![ -/// Field::new("c1", DataType::Int32, false), -/// Field::new("c2", DataType::Float64, false), +/// Field::new("c1", DataType::Int32, false), +/// Field::new("c2", DataType::Float64, false), /// ]); /// // DFSchema is a an Arrow schema with optional relation name -/// let df_schema = DFSchema::try_from_qualified_schema("t1", &arrow_schema) -/// .unwrap(); +/// let df_schema = DFSchema::try_from_qualified_schema("t1", &arrow_schema).unwrap(); /// /// // Form Vec with an expression for each column in the schema -/// let exprs: Vec<_> = df_schema.iter() -/// .map(Expr::from) -/// .collect(); -/// -/// assert_eq!(exprs, vec![ -/// Expr::from(Column::from_qualified_name("t1.c1")), -/// Expr::from(Column::from_qualified_name("t1.c2")), -/// ]); +/// let exprs: Vec<_> = df_schema.iter().map(Expr::from).collect(); +/// +/// assert_eq!( +/// exprs, +/// vec![ +/// Expr::from(Column::from_qualified_name("t1.c1")), +/// Expr::from(Column::from_qualified_name("t1.c2")), +/// ] +/// ); /// ``` /// /// # Examples: Displaying `Exprs` @@ -241,12 +241,13 @@ use sqlparser::ast::{ /// let mut scalars = HashSet::new(); /// // apply recursively visits all nodes in the expression tree /// expr.apply(|e| { -/// if let Expr::Literal(scalar, _) = e { -/// scalars.insert(scalar); -/// } -/// // The return value controls whether to continue visiting the tree -/// Ok(TreeNodeRecursion::Continue) -/// }).unwrap(); +/// if let Expr::Literal(scalar, _) = e { +/// scalars.insert(scalar); +/// } +/// // The return value controls whether to continue visiting the tree +/// Ok(TreeNodeRecursion::Continue) +/// }) +/// .unwrap(); /// // All subtrees have been visited and literals found /// assert_eq!(scalars.len(), 2); /// assert!(scalars.contains(&ScalarValue::Int32(Some(5)))); @@ -443,7 +444,6 @@ impl<'a> TreeNodeContainer<'a, Self> for Expr { /// // Add any metadata from `FieldMetadata` to `Field` /// let updated_field = metadata.add_to_field(field); /// ``` -/// #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] pub struct FieldMetadata { /// The inner metadata of a literal expression, which is a map of string @@ -1586,7 +1586,6 @@ impl Expr { /// let metadata = FieldMetadata::from(metadata); /// let expr = col("foo").alias_with_metadata("bar", Some(metadata)); /// ``` - /// pub fn alias_with_metadata( self, name: impl Into, @@ -1616,9 +1615,9 @@ impl Expr { /// # use datafusion_expr::expr::FieldMetadata; /// let metadata = HashMap::from([("key".to_string(), "value".to_string())]); /// let metadata = FieldMetadata::from(metadata); - /// let expr = col("foo").alias_qualified_with_metadata(Some("tbl"), "bar", Some(metadata)); + /// let expr = + /// col("foo").alias_qualified_with_metadata(Some("tbl"), "bar", Some(metadata)); /// ``` - /// pub fn alias_qualified_with_metadata( self, relation: Option>, diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 8ca479bb6f9b..ad568d962858 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -81,15 +81,17 @@ impl ExprSchemable for Expr { /// # use std::collections::HashMap; /// /// fn main() { - /// let expr = col("c1") + col("c2"); - /// let schema = DFSchema::from_unqualified_fields( - /// vec![ - /// Field::new("c1", DataType::Int32, true), - /// Field::new("c2", DataType::Float32, true), - /// ].into(), - /// HashMap::new(), - /// ).unwrap(); - /// assert_eq!("Float32", format!("{}", expr.get_type(&schema).unwrap())); + /// let expr = col("c1") + col("c2"); + /// let schema = DFSchema::from_unqualified_fields( + /// vec![ + /// Field::new("c1", DataType::Int32, true), + /// Field::new("c2", DataType::Float32, true), + /// ] + /// .into(), + /// HashMap::new(), + /// ) + /// .unwrap(); + /// assert_eq!("Float32", format!("{}", expr.get_type(&schema).unwrap())); /// } /// ``` /// @@ -695,7 +697,6 @@ impl Expr { /// new projection with the casted expression. /// 2. **Non-projection plan**: If the subquery isn't a projection, it adds a projection to the plan /// with the casted first column. -/// pub fn cast_subquery(subquery: Subquery, cast_to_type: &DataType) -> Result { if subquery.subquery.schema().field(0).data_type() == cast_to_type { return Ok(subquery); diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs index 1ab5ffa75842..55752dcc5686 100644 --- a/datafusion/expr/src/logical_plan/builder.rs +++ b/datafusion/expr/src/logical_plan/builder.rs @@ -436,14 +436,13 @@ impl LogicalPlanBuilder { /// # ])) as _; /// # let table_source = Arc::new(LogicalTableSource::new(employee_schema)); /// // VALUES (1), (2) - /// let input = LogicalPlanBuilder::values(vec![vec![lit(1)], vec![lit(2)]])? - /// .build()?; + /// let input = LogicalPlanBuilder::values(vec![vec![lit(1)], vec![lit(2)]])?.build()?; /// // INSERT INTO MyTable VALUES (1), (2) /// let insert_plan = LogicalPlanBuilder::insert_into( - /// input, - /// "MyTable", - /// table_source, - /// InsertOp::Append, + /// input, + /// "MyTable", + /// table_source, + /// InsertOp::Append, /// )?; /// # Ok(()) /// # } @@ -939,8 +938,8 @@ impl LogicalPlanBuilder { /// // Form the expression `(left.a != right.a)` AND `(left.b != right.b)` /// let exprs = vec![ /// col("left.a").eq(col("right.a")), - /// col("left.b").not_eq(col("right.b")) - /// ]; + /// col("left.b").not_eq(col("right.b")), + /// ]; /// /// // Perform the equivalent of `left INNER JOIN right ON (a != a2 AND b != b2)` /// // finding all pairs of rows from `left` and `right` where diff --git a/datafusion/expr/src/logical_plan/display.rs b/datafusion/expr/src/logical_plan/display.rs index cc3fbad7b0c2..2e3c98e9acc4 100644 --- a/datafusion/expr/src/logical_plan/display.rs +++ b/datafusion/expr/src/logical_plan/display.rs @@ -98,17 +98,17 @@ impl<'n> TreeNodeVisitor<'n> for IndentVisitor<'_, '_> { /// `foo:Utf8;N` if `foo` is nullable. /// /// ``` -/// use arrow::datatypes::{Field, Schema, DataType}; +/// use arrow::datatypes::{DataType, Field, Schema}; /// # use datafusion_expr::logical_plan::display_schema; /// let schema = Schema::new(vec![ /// Field::new("id", DataType::Int32, false), /// Field::new("first_name", DataType::Utf8, true), -/// ]); +/// ]); /// -/// assert_eq!( -/// "[id:Int32, first_name:Utf8;N]", -/// format!("{}", display_schema(&schema)) -/// ); +/// assert_eq!( +/// "[id:Int32, first_name:Utf8;N]", +/// format!("{}", display_schema(&schema)) +/// ); /// ``` pub fn display_schema(schema: &Schema) -> impl fmt::Display + '_ { struct Wrapper<'a>(&'a Schema); diff --git a/datafusion/expr/src/logical_plan/extension.rs b/datafusion/expr/src/logical_plan/extension.rs index 5bf64a36a654..fc662c7c5eb4 100644 --- a/datafusion/expr/src/logical_plan/extension.rs +++ b/datafusion/expr/src/logical_plan/extension.rs @@ -39,10 +39,10 @@ pub trait UserDefinedLogicalNode: fmt::Debug + Send + Sync { /// # struct Dummy { } /// /// # impl Dummy { - /// // canonical boiler plate - /// fn as_any(&self) -> &dyn Any { - /// self - /// } + /// // canonical boiler plate + /// fn as_any(&self) -> &dyn Any { + /// self + /// } /// # } /// ``` fn as_any(&self) -> &dyn Any; @@ -131,18 +131,18 @@ pub trait UserDefinedLogicalNode: fmt::Debug + Send + Sync { /// // User defined node that derives Hash /// #[derive(Hash, Debug, PartialEq, Eq)] /// struct MyNode { - /// val: u64 + /// val: u64, /// } /// /// // impl UserDefinedLogicalNode { /// // ... /// # impl MyNode { - /// // Boiler plate to call the derived Hash impl - /// fn dyn_hash(&self, state: &mut dyn std::hash::Hasher) { + /// // Boiler plate to call the derived Hash impl + /// fn dyn_hash(&self, state: &mut dyn std::hash::Hasher) { /// use std::hash::Hash; /// let mut s = state; /// self.hash(&mut s); - /// } + /// } /// // } /// # } /// ``` @@ -169,19 +169,19 @@ pub trait UserDefinedLogicalNode: fmt::Debug + Send + Sync { /// // User defined node that derives Eq /// #[derive(Hash, Debug, PartialEq, Eq)] /// struct MyNode { - /// val: u64 + /// val: u64, /// } /// /// // impl UserDefinedLogicalNode { /// // ... /// # impl MyNode { - /// // Boiler plate to call the derived Eq impl - /// fn dyn_eq(&self, other: &dyn UserDefinedLogicalNode) -> bool { + /// // Boiler plate to call the derived Eq impl + /// fn dyn_eq(&self, other: &dyn UserDefinedLogicalNode) -> bool { /// match other.as_any().downcast_ref::() { - /// Some(o) => self == o, - /// None => false, + /// Some(o) => self == o, + /// None => false, /// } - /// } + /// } /// // } /// # } /// ``` diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index d68e6cd81272..be062a70fb17 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -199,7 +199,6 @@ pub use datafusion_common::{JoinConstraint, JoinType}; /// # Ok(()) /// # } /// ``` -/// #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)] pub enum LogicalPlan { /// Evaluates an arbitrary list of expressions (essentially a @@ -1265,7 +1264,6 @@ impl LogicalPlan { /// \n TableScan: t1", /// plan.display_indent().to_string() /// ); - /// /// ``` pub fn with_param_values( self, @@ -1538,20 +1536,20 @@ impl LogicalPlan { /// ``` /// /// ``` - /// use arrow::datatypes::{Field, Schema, DataType}; - /// use datafusion_expr::{lit, col, LogicalPlanBuilder, logical_plan::table_scan}; - /// let schema = Schema::new(vec![ - /// Field::new("id", DataType::Int32, false), - /// ]); - /// let plan = table_scan(Some("t1"), &schema, None).unwrap() - /// .filter(col("id").eq(lit(5))).unwrap() - /// .build().unwrap(); + /// use arrow::datatypes::{DataType, Field, Schema}; + /// use datafusion_expr::{col, lit, logical_plan::table_scan, LogicalPlanBuilder}; + /// let schema = Schema::new(vec![Field::new("id", DataType::Int32, false)]); + /// let plan = table_scan(Some("t1"), &schema, None) + /// .unwrap() + /// .filter(col("id").eq(lit(5))) + /// .unwrap() + /// .build() + /// .unwrap(); /// /// // Format using display_indent /// let display_string = format!("{}", plan.display_indent()); /// - /// assert_eq!("Filter: t1.id = Int32(5)\n TableScan: t1", - /// display_string); + /// assert_eq!("Filter: t1.id = Int32(5)\n TableScan: t1", display_string); /// ``` pub fn display_indent(&self) -> impl Display + '_ { // Boilerplate structure to wrap LogicalPlan with something @@ -1580,21 +1578,24 @@ impl LogicalPlan { /// ``` /// /// ``` - /// use arrow::datatypes::{Field, Schema, DataType}; - /// use datafusion_expr::{lit, col, LogicalPlanBuilder, logical_plan::table_scan}; - /// let schema = Schema::new(vec![ - /// Field::new("id", DataType::Int32, false), - /// ]); - /// let plan = table_scan(Some("t1"), &schema, None).unwrap() - /// .filter(col("id").eq(lit(5))).unwrap() - /// .build().unwrap(); + /// use arrow::datatypes::{DataType, Field, Schema}; + /// use datafusion_expr::{col, lit, logical_plan::table_scan, LogicalPlanBuilder}; + /// let schema = Schema::new(vec![Field::new("id", DataType::Int32, false)]); + /// let plan = table_scan(Some("t1"), &schema, None) + /// .unwrap() + /// .filter(col("id").eq(lit(5))) + /// .unwrap() + /// .build() + /// .unwrap(); /// /// // Format using display_indent_schema /// let display_string = format!("{}", plan.display_indent_schema()); /// - /// assert_eq!("Filter: t1.id = Int32(5) [id:Int32]\ + /// assert_eq!( + /// "Filter: t1.id = Int32(5) [id:Int32]\ /// \n TableScan: t1 [id:Int32]", - /// display_string); + /// display_string + /// ); /// ``` pub fn display_indent_schema(&self) -> impl Display + '_ { // Boilerplate structure to wrap LogicalPlan with something @@ -1642,14 +1643,15 @@ impl LogicalPlan { /// structure, and one with additional details such as schema. /// /// ``` - /// use arrow::datatypes::{Field, Schema, DataType}; - /// use datafusion_expr::{lit, col, LogicalPlanBuilder, logical_plan::table_scan}; - /// let schema = Schema::new(vec![ - /// Field::new("id", DataType::Int32, false), - /// ]); - /// let plan = table_scan(Some("t1"), &schema, None).unwrap() - /// .filter(col("id").eq(lit(5))).unwrap() - /// .build().unwrap(); + /// use arrow::datatypes::{DataType, Field, Schema}; + /// use datafusion_expr::{col, lit, logical_plan::table_scan, LogicalPlanBuilder}; + /// let schema = Schema::new(vec![Field::new("id", DataType::Int32, false)]); + /// let plan = table_scan(Some("t1"), &schema, None) + /// .unwrap() + /// .filter(col("id").eq(lit(5))) + /// .unwrap() + /// .build() + /// .unwrap(); /// /// // Format using display_graphviz /// let graphviz_string = format!("{}", plan.display_graphviz()); @@ -1661,7 +1663,6 @@ impl LogicalPlan { /// ```bash /// dot -Tpdf < /tmp/example.dot > /tmp/example.pdf /// ``` - /// pub fn display_graphviz(&self) -> impl Display + '_ { // Boilerplate structure to wrap LogicalPlan with something // that that can be formatted @@ -1700,13 +1701,13 @@ impl LogicalPlan { /// Projection: id /// ``` /// ``` - /// use arrow::datatypes::{Field, Schema, DataType}; - /// use datafusion_expr::{lit, col, LogicalPlanBuilder, logical_plan::table_scan}; - /// let schema = Schema::new(vec![ - /// Field::new("id", DataType::Int32, false), - /// ]); - /// let plan = table_scan(Some("t1"), &schema, None).unwrap() - /// .build().unwrap(); + /// use arrow::datatypes::{DataType, Field, Schema}; + /// use datafusion_expr::{col, lit, logical_plan::table_scan, LogicalPlanBuilder}; + /// let schema = Schema::new(vec![Field::new("id", DataType::Int32, false)]); + /// let plan = table_scan(Some("t1"), &schema, None) + /// .unwrap() + /// .build() + /// .unwrap(); /// /// // Format using display /// let display_string = format!("{}", plan.display()); diff --git a/datafusion/expr/src/select_expr.rs b/datafusion/expr/src/select_expr.rs index 039df20f397b..bfec4c5844d0 100644 --- a/datafusion/expr/src/select_expr.rs +++ b/datafusion/expr/src/select_expr.rs @@ -44,10 +44,8 @@ use crate::{expr::WildcardOptions, Expr}; /// let wildcard = SelectExpr::Wildcard(WildcardOptions::default()); /// /// // SELECT mytable.* -/// let qualified = SelectExpr::QualifiedWildcard( -/// "mytable".into(), -/// WildcardOptions::default() -/// ); +/// let qualified = +/// SelectExpr::QualifiedWildcard("mytable".into(), WildcardOptions::default()); /// /// // SELECT col1 /// let expr = SelectExpr::Expression(col("col1").into()); diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs index b6c8eb627c77..e607917dcdb9 100644 --- a/datafusion/expr/src/udaf.rs +++ b/datafusion/expr/src/udaf.rs @@ -863,7 +863,6 @@ pub trait AggregateUDFImpl: Debug + Send + Sync { /// * 'info': [crate::simplify::SimplifyInfo] /// /// closure returns simplified [Expr] or an error. - /// fn simplify(&self) -> Option { None } diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index 3a94981ae406..cb127c2b0bb1 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -503,10 +503,10 @@ pub trait ScalarUDFImpl: Debug + Send + Sync { /// # struct Example{} /// # impl Example { /// fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result { - /// // report output is only nullable if any one of the arguments are nullable - /// let nullable = args.arg_fields.iter().any(|f| f.is_nullable()); - /// let field = Arc::new(Field::new("ignored_name", DataType::Int32, true)); - /// Ok(field) + /// // report output is only nullable if any one of the arguments are nullable + /// let nullable = args.arg_fields.iter().any(|f| f.is_nullable()); + /// let field = Arc::new(Field::new("ignored_name", DataType::Int32, true)); + /// Ok(field) /// } /// # } /// ``` diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs index 8950f5e450e0..e2b41f00237b 100644 --- a/datafusion/expr/src/utils.rs +++ b/datafusion/expr/src/utils.rs @@ -869,7 +869,6 @@ pub fn check_all_columns_from_schema( /// all referenced column of the right side is from the right schema. /// 2. Or opposite. All referenced column of the left side is from the right schema, /// and the right side is from the left schema. -/// pub fn find_valid_equijoin_key_pair( left_key: &Expr, right_key: &Expr, @@ -1013,10 +1012,7 @@ pub fn iter_conjunction_owned(expr: Expr) -> impl Iterator { /// let expr = col("a").eq(lit(1)).and(col("b").eq(lit(2))); /// /// // [a=1, b=2] -/// let split = vec![ -/// col("a").eq(lit(1)), -/// col("b").eq(lit(2)), -/// ]; +/// let split = vec![col("a").eq(lit(1)), col("b").eq(lit(2))]; /// /// // use split_conjunction_owned to split them /// assert_eq!(split_conjunction_owned(expr), split); @@ -1039,10 +1035,7 @@ pub fn split_conjunction_owned(expr: Expr) -> Vec { /// let expr = col("a").eq(lit(1)).add(col("b").eq(lit(2))); /// /// // [a=1, b=2] -/// let split = vec![ -/// col("a").eq(lit(1)), -/// col("b").eq(lit(2)), -/// ]; +/// let split = vec![col("a").eq(lit(1)), col("b").eq(lit(2))]; /// /// // use split_binary_owned to split them /// assert_eq!(split_binary_owned(expr, Operator::Plus), split); @@ -1110,10 +1103,7 @@ fn split_binary_impl<'a>( /// let expr = col("a").eq(lit(1)).and(col("b").eq(lit(2))); /// /// // [a=1, b=2] -/// let split = vec![ -/// col("a").eq(lit(1)), -/// col("b").eq(lit(2)), -/// ]; +/// let split = vec![col("a").eq(lit(1)), col("b").eq(lit(2))]; /// /// // use conjunction to join them together with `AND` /// assert_eq!(conjunction(split), Some(expr)); @@ -1136,10 +1126,7 @@ pub fn conjunction(filters: impl IntoIterator) -> Option { /// let expr = col("a").eq(lit(1)).or(col("b").eq(lit(2))); /// /// // [a=1, b=2] -/// let split = vec![ -/// col("a").eq(lit(1)), -/// col("b").eq(lit(2)), -/// ]; +/// let split = vec![col("a").eq(lit(1)), col("b").eq(lit(2))]; /// /// // use disjunction to join them together with `OR` /// assert_eq!(disjunction(split), Some(expr)); diff --git a/datafusion/expr/src/window_frame.rs b/datafusion/expr/src/window_frame.rs index b91bbddd8bac..eb94f72768d8 100644 --- a/datafusion/expr/src/window_frame.rs +++ b/datafusion/expr/src/window_frame.rs @@ -307,7 +307,6 @@ impl WindowFrame { /// 3. CURRENT ROW /// 4. `` FOLLOWING /// 5. UNBOUNDED FOLLOWING -/// #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)] pub enum WindowFrameBound { /// 1. UNBOUNDED PRECEDING diff --git a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator.rs b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator.rs index aa2f5a586e87..c807591dabec 100644 --- a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator.rs +++ b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator.rs @@ -80,15 +80,13 @@ use datafusion_expr_common::groups_accumulator::{EmitTo, GroupsAccumulator}; /// Logical group Current Min/Max value for that group stored /// number as a ScalarValue which points to an /// individually allocated String -/// -///``` +/// ``` /// /// # Optimizations /// /// The adapter minimizes the number of calls to [`Accumulator::update_batch`] /// by first collecting the input rows for each group into a contiguous array /// using [`compute::take`] -/// pub struct GroupsAccumulatorAdapter { factory: Box Result> + Send>, @@ -184,7 +182,6 @@ impl GroupsAccumulatorAdapter { /// └─────────┘ └─────────┘ └ ─ ─ ─ ─ ┘ └─────────┘ └ ─ ─ ─ ─ ┘ /// /// logical group values opt_filter logical group values opt_filter - /// /// ``` fn invoke_per_accumulator( &mut self, diff --git a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/accumulate.rs b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/accumulate.rs index 987ba57f7719..736345874c27 100644 --- a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/accumulate.rs +++ b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/accumulate.rs @@ -760,7 +760,6 @@ mod test { /// Calls `NullState::accumulate` and `accumulate_indices` to /// ensure it generates the correct values. - /// fn accumulate_test( group_indices: &[usize], values: &UInt32Array, diff --git a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/prim_op.rs b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/prim_op.rs index 078982c983fc..fe920927f39b 100644 --- a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/prim_op.rs +++ b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/prim_op.rs @@ -142,7 +142,6 @@ where /// The state is: /// - self.prim_fn for all non null, non filtered values /// - null otherwise - /// fn convert_to_state( &self, values: &[ArrayRef], diff --git a/datafusion/functions-aggregate-common/src/tdigest.rs b/datafusion/functions-aggregate-common/src/tdigest.rs index 378fc8c42bc6..d6a17ff32497 100644 --- a/datafusion/functions-aggregate-common/src/tdigest.rs +++ b/datafusion/functions-aggregate-common/src/tdigest.rs @@ -564,7 +564,7 @@ impl TDigest { /// [`ScalarValue::Float64`]: /// /// ```text - /// + /// /// ┌────────┬────────┬────────┬───────┬────────┬────────┐ /// │max_size│ sum │ count │ max │ min │centroid│ /// └────────┴────────┴────────┴───────┴────────┴────────┘ @@ -585,7 +585,6 @@ impl TDigest { /// │└ ─ ─ ─ ┘│ /// │ │ /// ... - /// /// ``` /// /// The [`TDigest::from_scalar_state()`] method reverses this processes, diff --git a/datafusion/functions-aggregate/src/count.rs b/datafusion/functions-aggregate/src/count.rs index 09904bbad6ec..85b0530372b7 100644 --- a/datafusion/functions-aggregate/src/count.rs +++ b/datafusion/functions-aggregate/src/count.rs @@ -112,8 +112,8 @@ pub fn count_all() -> Expr { /// // create `count(*)` OVER ... window function expression /// let expr = count_all_window(); /// assert_eq!( -/// expr.schema_name().to_string(), -/// "count(Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING" +/// expr.schema_name().to_string(), +/// "count(Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING" /// ); /// // if you need to refer to this column, use the `schema_name` function /// let expr = col(expr.schema_name().to_string()); diff --git a/datafusion/functions-aggregate/src/median.rs b/datafusion/functions-aggregate/src/median.rs index 5c3d265d1d6b..efdd7dd973bd 100644 --- a/datafusion/functions-aggregate/src/median.rs +++ b/datafusion/functions-aggregate/src/median.rs @@ -295,7 +295,6 @@ impl Accumulator for MedianAccumulator { /// of groups before final evaluation. /// So values in each group will be stored in a `Vec`, and the total group values /// will be actually organized as a `Vec>`. -/// #[derive(Debug)] struct MedianGroupsAccumulator { data_type: DataType, diff --git a/datafusion/functions-nested/src/expr_ext.rs b/datafusion/functions-nested/src/expr_ext.rs index 4da4a3f583b7..18c4c5fb59c3 100644 --- a/datafusion/functions-nested/src/expr_ext.rs +++ b/datafusion/functions-nested/src/expr_ext.rs @@ -36,8 +36,7 @@ use crate::extract::{array_element, array_slice}; /// ``` /// # use datafusion_expr::{lit, col, Expr}; /// # use datafusion_functions_nested::expr_ext::IndexAccessor; -/// let expr = col("c1") -/// .index(lit(3)); +/// let expr = col("c1").index(lit(3)); /// assert_eq!(expr.schema_name().to_string(), "c1[Int32(3)]"); /// ``` pub trait IndexAccessor { @@ -66,8 +65,7 @@ impl IndexAccessor for Expr { /// ``` /// # use datafusion_expr::{lit, col}; /// # use datafusion_functions_nested::expr_ext::SliceAccessor; -/// let expr = col("c1") -/// .range(lit(2), lit(4)); +/// let expr = col("c1").range(lit(2), lit(4)); /// assert_eq!(expr.schema_name().to_string(), "c1[Int32(2):Int32(4)]"); /// ``` pub trait SliceAccessor { diff --git a/datafusion/functions-nested/src/lib.rs b/datafusion/functions-nested/src/lib.rs index 1d3f11b50c61..510c44c68672 100644 --- a/datafusion/functions-nested/src/lib.rs +++ b/datafusion/functions-nested/src/lib.rs @@ -32,7 +32,6 @@ //! [DataFusion]: https://crates.io/crates/datafusion //! //! You can register the functions in this crate using the [`register_all`] function. -//! #[macro_use] pub mod macros; diff --git a/datafusion/functions-window-common/src/expr.rs b/datafusion/functions-window-common/src/expr.rs index 774cd5182b30..71e6f0be2199 100644 --- a/datafusion/functions-window-common/src/expr.rs +++ b/datafusion/functions-window-common/src/expr.rs @@ -39,7 +39,6 @@ impl<'a> ExpressionArgs<'a> { /// to the user-defined window function. /// * `input_types` - The data types corresponding to the /// arguments to the user-defined window function. - /// pub fn new( input_exprs: &'a [Arc], input_fields: &'a [FieldRef], diff --git a/datafusion/functions-window-common/src/field.rs b/datafusion/functions-window-common/src/field.rs index 8d22efa3bcf4..8e33930ff760 100644 --- a/datafusion/functions-window-common/src/field.rs +++ b/datafusion/functions-window-common/src/field.rs @@ -36,7 +36,6 @@ impl<'a> WindowUDFFieldArgs<'a> { /// arguments to the user-defined window function. /// * `function_name` - The qualified schema name of the /// user-defined window function expression. - /// pub fn new(input_fields: &'a [FieldRef], display_name: &'a str) -> Self { WindowUDFFieldArgs { input_fields, diff --git a/datafusion/functions-window-common/src/partition.rs b/datafusion/functions-window-common/src/partition.rs index 61125e596130..66bfd3e2aa23 100644 --- a/datafusion/functions-window-common/src/partition.rs +++ b/datafusion/functions-window-common/src/partition.rs @@ -48,7 +48,6 @@ impl<'a> PartitionEvaluatorArgs<'a> { /// window function is reversible and is reversed. /// * `ignore_nulls` - Set to `true` when `IGNORE NULLS` is /// specified. - /// pub fn new( input_exprs: &'a [Arc], input_fields: &'a [FieldRef], diff --git a/datafusion/functions-window/src/lib.rs b/datafusion/functions-window/src/lib.rs index 10e09542d7c5..9870d1bdc2d9 100644 --- a/datafusion/functions-window/src/lib.rs +++ b/datafusion/functions-window/src/lib.rs @@ -30,7 +30,6 @@ //! implemented using the extension API. //! //! [DataFusion]: https://crates.io/crates/datafusion -//! use std::sync::Arc; diff --git a/datafusion/functions-window/src/nth_value.rs b/datafusion/functions-window/src/nth_value.rs index 4c7dd995ec8c..b94955f0b7ba 100644 --- a/datafusion/functions-window/src/nth_value.rs +++ b/datafusion/functions-window/src/nth_value.rs @@ -59,19 +59,16 @@ get_or_init_udwf!( ); /// Create an expression to represent the `first_value` window function -/// pub fn first_value(arg: datafusion_expr::Expr) -> datafusion_expr::Expr { first_value_udwf().call(vec![arg]) } /// Create an expression to represent the `last_value` window function -/// pub fn last_value(arg: datafusion_expr::Expr) -> datafusion_expr::Expr { last_value_udwf().call(vec![arg]) } /// Create an expression to represent the `nth_value` window function -/// pub fn nth_value(arg: datafusion_expr::Expr, n: i64) -> datafusion_expr::Expr { nth_value_udwf().call(vec![arg, n.lit()]) } diff --git a/datafusion/functions/benches/ltrim.rs b/datafusion/functions/benches/ltrim.rs index 7a44f40a689a..865945bfe133 100644 --- a/datafusion/functions/benches/ltrim.rs +++ b/datafusion/functions/benches/ltrim.rs @@ -99,7 +99,6 @@ pub fn create_string_array_and_characters( /// Outputs: /// - testing string array /// - trimmed characters -/// fn create_args( size: usize, characters: &str, diff --git a/datafusion/functions/src/core/expr_ext.rs b/datafusion/functions/src/core/expr_ext.rs index af05f447f1c1..3b8581995ab3 100644 --- a/datafusion/functions/src/core/expr_ext.rs +++ b/datafusion/functions/src/core/expr_ext.rs @@ -39,8 +39,7 @@ use super::expr_fn::get_field; /// ``` /// # use datafusion_expr::{col}; /// # use datafusion_functions::core::expr_ext::FieldAccessor; -/// let expr = col("c1") -/// .field("my_field"); +/// let expr = col("c1").field("my_field"); /// assert_eq!(expr.schema_name().to_string(), "c1[my_field]"); /// ``` pub trait FieldAccessor { diff --git a/datafusion/functions/src/core/nullif.rs b/datafusion/functions/src/core/nullif.rs index ee29714da16b..7f2d9d204479 100644 --- a/datafusion/functions/src/core/nullif.rs +++ b/datafusion/functions/src/core/nullif.rs @@ -113,7 +113,6 @@ impl ScalarUDFImpl for NullIfFunc { /// Implements NULLIF(expr1, expr2) /// Args: 0 - left expr is any array /// 1 - if the left is equal to this expr2, then the result is NULL, otherwise left value is passed. -/// fn nullif_func(args: &[ColumnarValue]) -> Result { let [lhs, rhs] = take_function_args("nullif", args)?; diff --git a/datafusion/functions/src/datetime/common.rs b/datafusion/functions/src/datetime/common.rs index fd9f37d8052c..bfae526cf145 100644 --- a/datafusion/functions/src/datetime/common.rs +++ b/datafusion/functions/src/datetime/common.rs @@ -139,7 +139,6 @@ pub(crate) fn string_to_datetime_formatted( /// defined by `chrono`. /// /// [`chrono::format::strftime`]: https://docs.rs/chrono/latest/chrono/format/strftime/index.html -/// #[inline] pub(crate) fn string_to_timestamp_nanos_formatted( s: &str, @@ -170,7 +169,6 @@ pub(crate) fn string_to_timestamp_nanos_formatted( /// defined by `chrono`. /// /// [`chrono::format::strftime`]: https://docs.rs/chrono/latest/chrono/format/strftime/index.html -/// #[inline] pub(crate) fn string_to_timestamp_millis_formatted(s: &str, format: &str) -> Result { Ok(string_to_datetime_formatted(&Utc, s, format)? diff --git a/datafusion/macros/src/user_doc.rs b/datafusion/macros/src/user_doc.rs index 31cf9bb1b750..21c4cc6104f5 100644 --- a/datafusion/macros/src/user_doc.rs +++ b/datafusion/macros/src/user_doc.rs @@ -61,7 +61,6 @@ use syn::{parse_macro_input, DeriveInput, LitStr}; /// } /// ``` /// will generate the following code -/// /// ```ignore /// pub struct ToDateFunc { /// signature: Signature, diff --git a/datafusion/optimizer/src/push_down_filter.rs b/datafusion/optimizer/src/push_down_filter.rs index bcb867f6e7fa..4b95946cf53d 100644 --- a/datafusion/optimizer/src/push_down_filter.rs +++ b/datafusion/optimizer/src/push_down_filter.rs @@ -561,7 +561,6 @@ fn push_down_join( /// /// * `on_filters` filters from the join ON clause that have not already been /// identified as join predicates -/// fn infer_join_predicates( join: &Join, predicates: &[Expr], @@ -648,7 +647,6 @@ impl InferredPredicates { /// * `predicates` the pushed down predicates /// /// * `inferred_predicates` the inferred results -/// fn infer_join_predicates_from_predicates( join_col_keys: &[(&Column, &Column)], predicates: &[Expr], @@ -672,7 +670,6 @@ fn infer_join_predicates_from_predicates( /// identified as join predicates /// /// * `inferred_predicates` the inferred results -/// fn infer_join_predicates_from_on_filters( join_col_keys: &[(&Column, &Column)], join_type: JoinType, @@ -718,7 +715,6 @@ fn infer_join_predicates_from_on_filters( /// /// * `ENABLE_RIGHT_TO_LEFT` indicates that the left table related predicate can /// be inferred from the right table related predicate -/// fn infer_join_predicates_impl< const ENABLE_LEFT_TO_RIGHT: bool, const ENABLE_RIGHT_TO_LEFT: bool, diff --git a/datafusion/optimizer/src/push_down_limit.rs b/datafusion/optimizer/src/push_down_limit.rs index ec042dd350ca..a58ac532ccf8 100644 --- a/datafusion/optimizer/src/push_down_limit.rs +++ b/datafusion/optimizer/src/push_down_limit.rs @@ -30,7 +30,6 @@ use datafusion_expr::logical_plan::{Join, JoinType, Limit, LogicalPlan}; use datafusion_expr::{lit, FetchType, SkipType}; /// Optimization rule that tries to push down `LIMIT`. -/// //. It will push down through projection, limits (taking the smaller limit) #[derive(Default, Debug)] pub struct PushDownLimit {} diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 9a3a8bcd23a7..82f36df6b1a1 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -67,23 +67,21 @@ use regex::Regex; /// /// For example: /// ``` -/// use arrow::datatypes::{Schema, Field, DataType}; -/// use datafusion_expr::{col, lit}; +/// use arrow::datatypes::{DataType, Field, Schema}; /// use datafusion_common::{DataFusionError, ToDFSchema}; /// use datafusion_expr::execution_props::ExecutionProps; /// use datafusion_expr::simplify::SimplifyContext; +/// use datafusion_expr::{col, lit}; /// use datafusion_optimizer::simplify_expressions::ExprSimplifier; /// /// // Create the schema -/// let schema = Schema::new(vec![ -/// Field::new("i", DataType::Int64, false), -/// ]) -/// .to_dfschema_ref().unwrap(); +/// let schema = Schema::new(vec![Field::new("i", DataType::Int64, false)]) +/// .to_dfschema_ref() +/// .unwrap(); /// /// // Create the simplifier /// let props = ExecutionProps::new(); -/// let context = SimplifyContext::new(&props) -/// .with_schema(schema); +/// let context = SimplifyContext::new(&props).with_schema(schema); /// let simplifier = ExprSimplifier::new(context); /// /// // Use the simplifier @@ -142,35 +140,35 @@ impl ExprSimplifier { /// /// ``` /// use arrow::datatypes::DataType; - /// use datafusion_expr::{col, lit, Expr}; + /// use datafusion_common::DFSchema; /// use datafusion_common::Result; /// use datafusion_expr::execution_props::ExecutionProps; /// use datafusion_expr::simplify::SimplifyContext; /// use datafusion_expr::simplify::SimplifyInfo; + /// use datafusion_expr::{col, lit, Expr}; /// use datafusion_optimizer::simplify_expressions::ExprSimplifier; - /// use datafusion_common::DFSchema; /// use std::sync::Arc; /// /// /// Simple implementation that provides `Simplifier` the information it needs /// /// See SimplifyContext for a structure that does this. /// #[derive(Default)] /// struct Info { - /// execution_props: ExecutionProps, + /// execution_props: ExecutionProps, /// }; /// /// impl SimplifyInfo for Info { - /// fn is_boolean_type(&self, expr: &Expr) -> Result { - /// Ok(false) - /// } - /// fn nullable(&self, expr: &Expr) -> Result { - /// Ok(true) - /// } - /// fn execution_props(&self) -> &ExecutionProps { - /// &self.execution_props - /// } - /// fn get_data_type(&self, expr: &Expr) -> Result { - /// Ok(DataType::Int32) - /// } + /// fn is_boolean_type(&self, expr: &Expr) -> Result { + /// Ok(false) + /// } + /// fn nullable(&self, expr: &Expr) -> Result { + /// Ok(true) + /// } + /// fn execution_props(&self) -> &ExecutionProps { + /// &self.execution_props + /// } + /// fn get_data_type(&self, expr: &Expr) -> Result { + /// Ok(DataType::Int32) + /// } /// } /// /// // Create the simplifier @@ -196,7 +194,6 @@ impl ExprSimplifier { /// optimizations. /// /// See [Self::simplify] for details and usage examples. - /// #[deprecated( since = "48.0.0", note = "Use `simplify_with_cycle_count_transformed` instead" @@ -220,7 +217,6 @@ impl ExprSimplifier { /// - The number of simplification cycles that were performed /// /// See [Self::simplify] for details and usage examples. - /// pub fn simplify_with_cycle_count_transformed( &self, mut expr: Expr, @@ -284,24 +280,24 @@ impl ExprSimplifier { /// /// ```rust /// use arrow::datatypes::{DataType, Field, Schema}; - /// use datafusion_expr::{col, lit, Expr}; - /// use datafusion_expr::interval_arithmetic::{Interval, NullableInterval}; /// use datafusion_common::{Result, ScalarValue, ToDFSchema}; /// use datafusion_expr::execution_props::ExecutionProps; + /// use datafusion_expr::interval_arithmetic::{Interval, NullableInterval}; /// use datafusion_expr::simplify::SimplifyContext; + /// use datafusion_expr::{col, lit, Expr}; /// use datafusion_optimizer::simplify_expressions::ExprSimplifier; /// /// let schema = Schema::new(vec![ - /// Field::new("x", DataType::Int64, false), - /// Field::new("y", DataType::UInt32, false), - /// Field::new("z", DataType::Int64, false), - /// ]) - /// .to_dfschema_ref().unwrap(); + /// Field::new("x", DataType::Int64, false), + /// Field::new("y", DataType::UInt32, false), + /// Field::new("z", DataType::Int64, false), + /// ]) + /// .to_dfschema_ref() + /// .unwrap(); /// /// // Create the simplifier /// let props = ExecutionProps::new(); - /// let context = SimplifyContext::new(&props) - /// .with_schema(schema); + /// let context = SimplifyContext::new(&props).with_schema(schema); /// /// // Expression: (x >= 3) AND (y + 2 < 10) AND (z > 5) /// let expr_x = col("x").gt_eq(lit(3_i64)); @@ -310,15 +306,18 @@ impl ExprSimplifier { /// let expr = expr_x.and(expr_y).and(expr_z.clone()); /// /// let guarantees = vec![ - /// // x ∈ [3, 5] - /// ( - /// col("x"), - /// NullableInterval::NotNull { - /// values: Interval::make(Some(3_i64), Some(5_i64)).unwrap() - /// } - /// ), - /// // y = 3 - /// (col("y"), NullableInterval::from(ScalarValue::UInt32(Some(3)))), + /// // x ∈ [3, 5] + /// ( + /// col("x"), + /// NullableInterval::NotNull { + /// values: Interval::make(Some(3_i64), Some(5_i64)).unwrap(), + /// }, + /// ), + /// // y = 3 + /// ( + /// col("y"), + /// NullableInterval::from(ScalarValue::UInt32(Some(3))), + /// ), /// ]; /// let simplifier = ExprSimplifier::new(context).with_guarantees(guarantees); /// let output = simplifier.simplify(expr).unwrap(); @@ -343,24 +342,24 @@ impl ExprSimplifier { /// /// ```rust /// use arrow::datatypes::{DataType, Field, Schema}; - /// use datafusion_expr::{col, lit, Expr}; - /// use datafusion_expr::interval_arithmetic::{Interval, NullableInterval}; /// use datafusion_common::{Result, ScalarValue, ToDFSchema}; /// use datafusion_expr::execution_props::ExecutionProps; + /// use datafusion_expr::interval_arithmetic::{Interval, NullableInterval}; /// use datafusion_expr::simplify::SimplifyContext; + /// use datafusion_expr::{col, lit, Expr}; /// use datafusion_optimizer::simplify_expressions::ExprSimplifier; /// /// let schema = Schema::new(vec![ - /// Field::new("a", DataType::Int64, false), - /// Field::new("b", DataType::Int64, false), - /// Field::new("c", DataType::Int64, false), - /// ]) - /// .to_dfschema_ref().unwrap(); + /// Field::new("a", DataType::Int64, false), + /// Field::new("b", DataType::Int64, false), + /// Field::new("c", DataType::Int64, false), + /// ]) + /// .to_dfschema_ref() + /// .unwrap(); /// /// // Create the simplifier /// let props = ExecutionProps::new(); - /// let context = SimplifyContext::new(&props) - /// .with_schema(schema); + /// let context = SimplifyContext::new(&props).with_schema(schema); /// let simplifier = ExprSimplifier::new(context); /// /// // Expression: a = c AND 1 = b @@ -374,9 +373,9 @@ impl ExprSimplifier { /// /// // If canonicalization is disabled, the expression is not changed /// let non_canonicalized = simplifier - /// .with_canonicalize(false) - /// .simplify(expr.clone()) - /// .unwrap(); + /// .with_canonicalize(false) + /// .simplify(expr.clone()) + /// .unwrap(); /// /// assert_eq!(non_canonicalized, expr); /// ``` @@ -435,7 +434,6 @@ impl ExprSimplifier { /// assert_eq!(simplified_expr.data, lit(true)); /// // Only 1 cycle was executed /// assert_eq!(count, 1); - /// /// ``` pub fn with_max_cycles(mut self, max_simplifier_cycles: u32) -> Self { self.max_simplifier_cycles = max_simplifier_cycles; diff --git a/datafusion/optimizer/src/simplify_expressions/unwrap_cast.rs b/datafusion/optimizer/src/simplify_expressions/unwrap_cast.rs index 6e66e467a89d..dfe6205263f2 100644 --- a/datafusion/optimizer/src/simplify_expressions/unwrap_cast.rs +++ b/datafusion/optimizer/src/simplify_expressions/unwrap_cast.rs @@ -53,7 +53,6 @@ //! ```text //! c1 > INT32(10) //! ``` -//! use arrow::datatypes::DataType; use datafusion_common::{internal_err, tree_node::Transformed}; diff --git a/datafusion/physical-expr-common/src/physical_expr.rs b/datafusion/physical-expr-common/src/physical_expr.rs index b4cb08715f53..474a5426bbc6 100644 --- a/datafusion/physical-expr-common/src/physical_expr.rs +++ b/datafusion/physical-expr-common/src/physical_expr.rs @@ -296,7 +296,6 @@ pub trait PhysicalExpr: Send + Sync + Display + Debug + DynEq + DynHash { /// representation. /// /// See the [`fmt_sql`] function for an example of printing `PhysicalExpr`s as SQL. - /// fn fmt_sql(&self, f: &mut Formatter<'_>) -> fmt::Result; /// Take a snapshot of this `PhysicalExpr`, if it is dynamic. diff --git a/datafusion/physical-expr/src/equivalence/properties/mod.rs b/datafusion/physical-expr/src/equivalence/properties/mod.rs index 6d18d34ca4de..a57cf1a782e0 100644 --- a/datafusion/physical-expr/src/equivalence/properties/mod.rs +++ b/datafusion/physical-expr/src/equivalence/properties/mod.rs @@ -123,11 +123,14 @@ use itertools::Itertools; /// let mut eq_properties = EquivalenceProperties::new(schema); /// eq_properties.add_constants(vec![ConstExpr::from(col_b)]); /// eq_properties.add_ordering([ -/// PhysicalSortExpr::new_default(col_a).asc(), -/// PhysicalSortExpr::new_default(col_c).desc(), +/// PhysicalSortExpr::new_default(col_a).asc(), +/// PhysicalSortExpr::new_default(col_c).desc(), /// ]); /// -/// assert_eq!(eq_properties.to_string(), "order: [[a@0 ASC, c@2 DESC]], eq: [{members: [b@1], constant: (heterogeneous)}]"); +/// assert_eq!( +/// eq_properties.to_string(), +/// "order: [[a@0 ASC, c@2 DESC]], eq: [{members: [b@1], constant: (heterogeneous)}]" +/// ); /// ``` #[derive(Clone, Debug)] pub struct EquivalenceProperties { diff --git a/datafusion/physical-expr/src/expressions/column.rs b/datafusion/physical-expr/src/expressions/column.rs index c9f3fb00f019..9ca464b30430 100644 --- a/datafusion/physical-expr/src/expressions/column.rs +++ b/datafusion/physical-expr/src/expressions/column.rs @@ -49,9 +49,9 @@ use datafusion_expr::ColumnarValue; /// # use arrow::datatypes::{DataType, Field, Schema}; /// // Schema with columns a, b, c /// let schema = Schema::new(vec![ -/// Field::new("a", DataType::Int32, false), -/// Field::new("b", DataType::Int32, false), -/// Field::new("c", DataType::Int32, false), +/// Field::new("a", DataType::Int32, false), +/// Field::new("b", DataType::Int32, false), +/// Field::new("c", DataType::Int32, false), /// ]); /// /// // reference to column b is index 1 diff --git a/datafusion/physical-expr/src/intervals/cp_solver.rs b/datafusion/physical-expr/src/intervals/cp_solver.rs index c44197bbbe6f..be0e5e1fa6e0 100644 --- a/datafusion/physical-expr/src/intervals/cp_solver.rs +++ b/datafusion/physical-expr/src/intervals/cp_solver.rs @@ -579,15 +579,11 @@ impl ExprIntervalGraph { /// /// let mut graph = ExprIntervalGraph::try_new(expr, &schema).unwrap(); /// // Do it once, while constructing. - /// let node_indices = graph - /// .gather_node_indices(&[Arc::new(Column::new("gnz", 0))]); + /// let node_indices = graph.gather_node_indices(&[Arc::new(Column::new("gnz", 0))]); /// let left_index = node_indices.get(0).unwrap().1; /// /// // Provide intervals for leaf variables (here, there is only one). - /// let intervals = vec![( - /// left_index, - /// Interval::make(Some(10), Some(20)).unwrap(), - /// )]; + /// let intervals = vec![(left_index, Interval::make(Some(10), Some(20)).unwrap())]; /// /// // Evaluate bounds for the composite expression: /// graph.assign_intervals(&intervals); diff --git a/datafusion/physical-expr/src/physical_expr.rs b/datafusion/physical-expr/src/physical_expr.rs index 80dd8ce069b7..467ef866e65a 100644 --- a/datafusion/physical-expr/src/physical_expr.rs +++ b/datafusion/physical-expr/src/physical_expr.rs @@ -119,12 +119,16 @@ pub fn physical_exprs_bag_equal( /// ]); /// /// let sort_exprs = vec![ -/// vec![ -/// SortExpr { expr: Expr::Column(Column::new(Some("t"), "id")), asc: true, nulls_first: false } -/// ], -/// vec![ -/// SortExpr { expr: Expr::Column(Column::new(Some("t"), "name")), asc: false, nulls_first: true } -/// ] +/// vec![SortExpr { +/// expr: Expr::Column(Column::new(Some("t"), "id")), +/// asc: true, +/// nulls_first: false, +/// }], +/// vec![SortExpr { +/// expr: Expr::Column(Column::new(Some("t"), "name")), +/// asc: false, +/// nulls_first: true, +/// }], /// ]; /// let result = create_ordering(&schema, &sort_exprs).unwrap(); /// ``` diff --git a/datafusion/physical-optimizer/src/combine_partial_final_agg.rs b/datafusion/physical-optimizer/src/combine_partial_final_agg.rs index 86f7e73e9e35..bffb2c9df98e 100644 --- a/datafusion/physical-optimizer/src/combine_partial_final_agg.rs +++ b/datafusion/physical-optimizer/src/combine_partial_final_agg.rs @@ -36,7 +36,6 @@ use datafusion_physical_expr::{physical_exprs_equal, PhysicalExpr}; /// into a Single AggregateExec if their grouping exprs and aggregate exprs equal. /// /// This rule should be applied after the EnforceDistribution and EnforceSorting rules -/// #[derive(Default, Debug)] pub struct CombinePartialFinalAggregate {} diff --git a/datafusion/physical-optimizer/src/enforce_distribution.rs b/datafusion/physical-optimizer/src/enforce_distribution.rs index 39eb557ea601..7ef7a6d8d855 100644 --- a/datafusion/physical-optimizer/src/enforce_distribution.rs +++ b/datafusion/physical-optimizer/src/enforce_distribution.rs @@ -281,7 +281,6 @@ pub type PlanWithKeyRequirements = PlanContext>>; /// 3) If the current plan is RepartitionExec, CoalescePartitionsExec or WindowAggExec, clear all the requirements, return the unchanged plan /// 4) If the current plan is Projection, transform the requirements to the columns before the Projection and push down requirements /// 5) For other types of operators, by default, pushdown the parent requirements to children. -/// pub fn adjust_input_keys_ordering( mut requirements: PlanWithKeyRequirements, ) -> Result> { diff --git a/datafusion/physical-optimizer/src/join_selection.rs b/datafusion/physical-optimizer/src/join_selection.rs index dc220332141b..f6277a585eaa 100644 --- a/datafusion/physical-optimizer/src/join_selection.rs +++ b/datafusion/physical-optimizer/src/join_selection.rs @@ -524,7 +524,6 @@ fn hash_join_convert_symmetric_subrule( /// | Data Source |--------------| Repartition | /// | | | | /// +--------------+ +--------------+ -/// /// ``` pub fn hash_join_swap_subrule( mut input: Arc, diff --git a/datafusion/physical-plan/src/aggregates/group_values/mod.rs b/datafusion/physical-plan/src/aggregates/group_values/mod.rs index f2f489b7223c..4243fe4fefd5 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/mod.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/mod.rs @@ -128,7 +128,6 @@ pub trait GroupValues: Send { /// `GroupColumn`: crate::aggregates::group_values::multi_group_by::GroupColumn /// `GroupValuesColumn`: crate::aggregates::group_values::multi_group_by::GroupValuesColumn /// `GroupValuesRows`: crate::aggregates::group_values::row::GroupValuesRows -/// pub fn new_group_values( schema: SchemaRef, group_ordering: &GroupOrdering, diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs index 722bc6049c80..76aab8adb8e8 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs @@ -76,7 +76,6 @@ pub trait GroupColumn: Send + Sync { /// /// And if found nth result in `equal_to_results` is already /// `false`, the check for nth row will be skipped. - /// fn vectorized_equal_to( &self, lhs_rows: &[usize], @@ -136,7 +135,6 @@ pub fn nulls_equal_to(lhs_null: bool, rhs_null: bool) -> Option { /// +---------------------+---------------------------------------------+ /// /// `inlined flag`: 1 represents `non-inlined`, and 0 represents `inlined` -/// #[derive(Debug, Clone, Copy, PartialEq, Eq)] struct GroupIndexView(u64); @@ -165,7 +163,6 @@ impl GroupIndexView { /// A [`GroupValues`] that stores multiple columns of group values, /// and supports vectorized operators for them -/// pub struct GroupValuesColumn { /// The output schema schema: SchemaRef, @@ -182,8 +179,6 @@ pub struct GroupValuesColumn { /// We don't really store the actual `group values` in `hashtable`, /// instead we store the `group indices` pointing to values in `GroupValues`. /// And we use [`GroupIndexView`] to represent such `group indices` in table. - /// - /// map: HashTable<(u64, GroupIndexView)>, /// The size of `map` in bytes @@ -196,7 +191,6 @@ pub struct GroupValuesColumn { /// /// The chained indices is like: /// `latest group index -> older group index -> even older group index -> ...` - /// group_index_lists: Vec>, /// When emitting first n, we need to decrease/erase group indices in @@ -322,7 +316,6 @@ impl GroupValuesColumn { /// /// `Group indices` order are against with their input order, and this will lead to error /// in `streaming aggregation`. - /// fn scalarized_intern( &mut self, cols: &[ArrayRef], @@ -424,7 +417,6 @@ impl GroupValuesColumn { /// /// The vectorized approach can offer higher performance for avoiding row by row /// downcast for `cols` and being able to implement even more optimizations(like simd). - /// fn vectorized_intern( &mut self, cols: &[ArrayRef], @@ -492,7 +484,6 @@ impl GroupValuesColumn { /// - Check if the `group index view` is `inlined` or `non_inlined`: /// If it is inlined, add to `vectorized_equal_to_group_indices` directly. /// Otherwise get all group indices from `group_index_lists`, and add them. - /// fn collect_vectorized_process_context( &mut self, batch_hashes: &[u64], @@ -720,7 +711,6 @@ impl GroupValuesColumn { /// The hash collision may be not frequent, so the fallback will indeed hardly happen. /// In most situations, `scalarized_indices` will found to be empty after finishing to /// preform `vectorized_equal_to`. - /// fn scalarized_intern_remaining( &mut self, cols: &[ArrayRef], @@ -885,7 +875,6 @@ impl GroupValuesColumn { /// `$v`: the vector to push the new builder into /// `$nullable`: whether the input can contains nulls /// `$t`: the primitive type of the builder -/// macro_rules! instantiate_primitive { ($v:expr, $nullable:expr, $t:ty, $data_type:ident) => { if $nullable { @@ -1447,7 +1436,6 @@ mod tests { /// - Group not exist + bucket not found in `map` /// - Group not exist + not equal to inlined group view(tested in hash collision) /// - Group not exist + not equal to non-inlined group view(tested in hash collision) - /// struct VectorizedTestDataSet { test_batches: Vec>, expected_batch: RecordBatch, diff --git a/datafusion/physical-plan/src/aggregates/group_values/single_group_by/primitive.rs b/datafusion/physical-plan/src/aggregates/group_values/single_group_by/primitive.rs index 8b1905e54041..f35c580b0e63 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/single_group_by/primitive.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/single_group_by/primitive.rs @@ -87,7 +87,6 @@ pub struct GroupValuesPrimitive { /// is obvious in high cardinality group by situation. /// More details can see: /// - /// map: HashTable<(usize, u64)>, /// The group index of the null value if any null_group: Option, diff --git a/datafusion/physical-plan/src/aggregates/order/partial.rs b/datafusion/physical-plan/src/aggregates/order/partial.rs index 3e495900f77a..476551a7ca21 100644 --- a/datafusion/physical-plan/src/aggregates/order/partial.rs +++ b/datafusion/physical-plan/src/aggregates/order/partial.rs @@ -61,7 +61,7 @@ use datafusion_expr::EmitTo; /// group indices /// (in group value group_values current tracks the most /// order) recent group index -///``` +/// ``` #[derive(Debug)] pub struct GroupOrderingPartial { /// State machine diff --git a/datafusion/physical-plan/src/aggregates/row_hash.rs b/datafusion/physical-plan/src/aggregates/row_hash.rs index 1d659d728084..c4c80aba48ee 100644 --- a/datafusion/physical-plan/src/aggregates/row_hash.rs +++ b/datafusion/physical-plan/src/aggregates/row_hash.rs @@ -219,7 +219,7 @@ impl SkipAggregationProbe { /// # Architecture /// /// ```text -/// +/// /// Assigns a consecutive group internally stores aggregate values /// index for each unique set for all groups /// of group values @@ -298,7 +298,6 @@ impl SkipAggregationProbe { /// later stream-merge sort on reading back the spilled data does re-grouping. Note the rows cannot /// be grouped once spilled onto disk, the read back data needs to be re-grouped again. In addition, /// re-grouping may cause out of memory again. Thus, re-grouping has to be a sort based aggregation. -/// /// ```text /// Partial Aggregation [batch_size = 2] (max memory = 3 rows) /// diff --git a/datafusion/physical-plan/src/coalesce/mod.rs b/datafusion/physical-plan/src/coalesce/mod.rs index 0eca27f8e40e..979c72d3a73e 100644 --- a/datafusion/physical-plan/src/coalesce/mod.rs +++ b/datafusion/physical-plan/src/coalesce/mod.rs @@ -67,7 +67,6 @@ use std::sync::Arc; /// /// 3. Eventually this may also be able to handle other optimizations such as a /// combined filter/coalesce operation. -/// #[derive(Debug)] pub struct BatchCoalescer { /// The input schema diff --git a/datafusion/physical-plan/src/execution_plan.rs b/datafusion/physical-plan/src/execution_plan.rs index 6d51bf195dc6..0a9a0297512b 100644 --- a/datafusion/physical-plan/src/execution_plan.rs +++ b/datafusion/physical-plan/src/execution_plan.rs @@ -320,12 +320,15 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync { /// fn execute( /// &self, /// partition: usize, - /// context: Arc + /// context: Arc, /// ) -> Result { /// // use functions from futures crate convert the batch into a stream /// let fut = futures::future::ready(Ok(self.batch.clone())); /// let stream = futures::stream::once(fut); - /// Ok(Box::pin(RecordBatchStreamAdapter::new(self.batch.schema(), stream))) + /// Ok(Box::pin(RecordBatchStreamAdapter::new( + /// self.batch.schema(), + /// stream, + /// ))) /// } /// } /// ``` @@ -355,11 +358,14 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync { /// fn execute( /// &self, /// partition: usize, - /// context: Arc + /// context: Arc, /// ) -> Result { /// let fut = get_batch(); /// let stream = futures::stream::once(fut); - /// Ok(Box::pin(RecordBatchStreamAdapter::new(self.schema.clone(), stream))) + /// Ok(Box::pin(RecordBatchStreamAdapter::new( + /// self.schema.clone(), + /// stream, + /// ))) /// } /// } /// ``` @@ -391,13 +397,16 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync { /// fn execute( /// &self, /// partition: usize, - /// context: Arc + /// context: Arc, /// ) -> Result { /// // A future that yields a stream /// let fut = get_batch_stream(); /// // Use TryStreamExt::try_flatten to flatten the stream of streams /// let stream = futures::stream::once(fut).try_flatten(); - /// Ok(Box::pin(RecordBatchStreamAdapter::new(self.schema.clone(), stream))) + /// Ok(Box::pin(RecordBatchStreamAdapter::new( + /// self.schema.clone(), + /// stream, + /// ))) /// } /// } /// ``` diff --git a/datafusion/physical-plan/src/joins/hash_join.rs b/datafusion/physical-plan/src/joins/hash_join.rs index a7f28ede4408..b1e9ddb32a3d 100644 --- a/datafusion/physical-plan/src/joins/hash_join.rs +++ b/datafusion/physical-plan/src/joins/hash_join.rs @@ -216,7 +216,7 @@ impl JoinLeftData { /// /// /// ```text -/// +/// /// Original build-side data Inserting build-side values into hashmap Concatenated build-side batch /// ┌───────────────────────────┐ /// hashmap.insert(row-hash, row-idx + offset) │ idx │ @@ -241,7 +241,6 @@ impl JoinLeftData { /// └───────┘ │ └───────┘ │ /// │ │ /// └───────────────────────────┘ -/// /// ``` /// /// 2. the **probe phase** where the tuples of the probe side are streamed @@ -276,7 +275,6 @@ impl JoinLeftData { /// └────────────┘ └────────────┘ /// /// build side probe side -/// /// ``` /// /// # Example "Optimal" Plans @@ -1155,7 +1153,7 @@ impl BuildSide { /// Expected state transitions performed by HashJoinStream are: /// /// ```text -/// +/// /// WaitBuildSide /// │ /// ▼ @@ -1163,7 +1161,6 @@ impl BuildSide { /// │ │ /// │ ▼ /// └─ ProcessProbeBatch -/// /// ``` #[derive(Debug, Clone)] enum HashJoinStreamState { diff --git a/datafusion/physical-plan/src/joins/stream_join_utils.rs b/datafusion/physical-plan/src/joins/stream_join_utils.rs index 9f5485ee93bd..3e4cbc5d33cd 100644 --- a/datafusion/physical-plan/src/joins/stream_join_utils.rs +++ b/datafusion/physical-plan/src/joins/stream_join_utils.rs @@ -655,7 +655,6 @@ pub fn combine_two_batches( /// * `visited` - A hash set to store the visited indices. /// * `offset` - An offset to the indices in the `PrimitiveArray`. /// * `indices` - The input `PrimitiveArray` of type `T` which stores the indices to be recorded. -/// pub fn record_visited_indices( visited: &mut HashSet, offset: usize, diff --git a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs index 9a8d4cbb6605..4998218656ca 100644 --- a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs +++ b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs @@ -797,7 +797,6 @@ fn need_to_produce_result_in_final(build_side: JoinSide, join_type: JoinType) -> /// # Returns /// /// A tuple of two arrays of primitive types representing the build and probe indices. -/// fn calculate_indices_by_join_type( build_side: JoinSide, prune_length: usize, diff --git a/datafusion/physical-plan/src/joins/utils.rs b/datafusion/physical-plan/src/joins/utils.rs index 35827d4fcd72..540ef212ae0e 100644 --- a/datafusion/physical-plan/src/joins/utils.rs +++ b/datafusion/physical-plan/src/joins/utils.rs @@ -207,7 +207,6 @@ pub struct ColumnIndex { /// Returns the output field given the input field. Outer joins may /// insert nulls even if the input was not null -/// fn output_join_field(old_field: &Field, join_type: &JoinType, is_left: bool) -> Field { let force_nullable = match join_type { JoinType::Inner => false, diff --git a/datafusion/physical-plan/src/metrics/builder.rs b/datafusion/physical-plan/src/metrics/builder.rs index dbda0a310ce5..69a38dbfe3ba 100644 --- a/datafusion/physical-plan/src/metrics/builder.rs +++ b/datafusion/physical-plan/src/metrics/builder.rs @@ -29,19 +29,18 @@ use super::{ /// case of constant strings /// /// ```rust -/// use datafusion_physical_plan::metrics::*; +/// use datafusion_physical_plan::metrics::*; /// -/// let metrics = ExecutionPlanMetricsSet::new(); -/// let partition = 1; +/// let metrics = ExecutionPlanMetricsSet::new(); +/// let partition = 1; /// -/// // Create the standard output_rows metric -/// let output_rows = MetricBuilder::new(&metrics).output_rows(partition); -/// -/// // Create a operator specific counter with some labels -/// let num_bytes = MetricBuilder::new(&metrics) -/// .with_new_label("filename", "my_awesome_file.parquet") -/// .counter("num_bytes", partition); +/// // Create the standard output_rows metric +/// let output_rows = MetricBuilder::new(&metrics).output_rows(partition); /// +/// // Create a operator specific counter with some labels +/// let num_bytes = MetricBuilder::new(&metrics) +/// .with_new_label("filename", "my_awesome_file.parquet") +/// .counter("num_bytes", partition); /// ``` pub struct MetricBuilder<'a> { /// Location that the metric created by this builder will be added do diff --git a/datafusion/physical-plan/src/metrics/custom.rs b/datafusion/physical-plan/src/metrics/custom.rs index 546af6f3335e..4421db94dc17 100644 --- a/datafusion/physical-plan/src/metrics/custom.rs +++ b/datafusion/physical-plan/src/metrics/custom.rs @@ -64,7 +64,8 @@ use std::{any::Any, fmt::Debug, fmt::Display, sync::Arc}; /// /// fn aggregate(&self, other: Arc) { /// let other = other.as_any().downcast_ref::().unwrap(); -/// self.count.fetch_add(other.count.load(Ordering::Relaxed), Ordering::Relaxed); +/// self.count +/// .fetch_add(other.count.load(Ordering::Relaxed), Ordering::Relaxed); /// } /// /// fn as_any(&self) -> &dyn Any { diff --git a/datafusion/physical-plan/src/metrics/mod.rs b/datafusion/physical-plan/src/metrics/mod.rs index 0b9b4bed856b..7bf53bee3a9e 100644 --- a/datafusion/physical-plan/src/metrics/mod.rs +++ b/datafusion/physical-plan/src/metrics/mod.rs @@ -45,24 +45,23 @@ pub use value::{Count, Gauge, MetricValue, ScopedTimerGuard, Time, Timestamp}; /// [`ExecutionPlanMetricsSet`]. /// /// ``` -/// use datafusion_physical_plan::metrics::*; +/// use datafusion_physical_plan::metrics::*; /// -/// let metrics = ExecutionPlanMetricsSet::new(); -/// assert!(metrics.clone_inner().output_rows().is_none()); +/// let metrics = ExecutionPlanMetricsSet::new(); +/// assert!(metrics.clone_inner().output_rows().is_none()); /// -/// // Create a counter to increment using the MetricBuilder -/// let partition = 1; -/// let output_rows = MetricBuilder::new(&metrics) -/// .output_rows(partition); +/// // Create a counter to increment using the MetricBuilder +/// let partition = 1; +/// let output_rows = MetricBuilder::new(&metrics).output_rows(partition); /// -/// // Counter can be incremented -/// output_rows.add(13); +/// // Counter can be incremented +/// output_rows.add(13); /// -/// // The value can be retrieved directly: -/// assert_eq!(output_rows.value(), 13); +/// // The value can be retrieved directly: +/// assert_eq!(output_rows.value(), 13); /// -/// // As well as from the metrics set -/// assert_eq!(metrics.clone_inner().output_rows(), Some(13)); +/// // As well as from the metrics set +/// assert_eq!(metrics.clone_inner().output_rows(), Some(13)); /// ``` /// /// [`ExecutionPlan`]: super::ExecutionPlan diff --git a/datafusion/physical-plan/src/recursive_query.rs b/datafusion/physical-plan/src/recursive_query.rs index 99b460dfcfdc..8051dc5bb4f1 100644 --- a/datafusion/physical-plan/src/recursive_query.rs +++ b/datafusion/physical-plan/src/recursive_query.rs @@ -247,7 +247,6 @@ impl DisplayAs for RecursiveQueryExec { /// while batch := recursive_stream.next(): /// buffer.append(batch) /// yield buffer -/// struct RecursiveQueryStream { /// The context to be used for managing handlers & executing new tasks task_context: Arc, diff --git a/datafusion/physical-plan/src/repartition/mod.rs b/datafusion/physical-plan/src/repartition/mod.rs index 754a208126ee..2cf34cbb7336 100644 --- a/datafusion/physical-plan/src/repartition/mod.rs +++ b/datafusion/physical-plan/src/repartition/mod.rs @@ -429,10 +429,10 @@ impl BatchPartitioner { /// │ │ │ /// │ │ │ /// │ │ │ -///┌───────────────┐ ┌───────────────┐ ┌───────────────┐ -///│ GroupBy │ │ GroupBy │ │ GroupBy │ -///│ (Partial) │ │ (Partial) │ │ (Partial) │ -///└───────────────┘ └───────────────┘ └───────────────┘ +/// ┌───────────────┐ ┌───────────────┐ ┌───────────────┐ +/// │ GroupBy │ │ GroupBy │ │ GroupBy │ +/// │ (Partial) │ │ (Partial) │ │ (Partial) │ +/// └───────────────┘ └───────────────┘ └───────────────┘ /// ▲ ▲ ▲ /// └──────────────────┼──────────────────┘ /// │ @@ -451,7 +451,7 @@ impl BatchPartitioner { /// ╲ ╱ ╲ ╱ /// '─. ,─' '─. ,─' /// `───────' `───────' -///``` +/// ``` /// /// # Error Handling /// @@ -1724,7 +1724,6 @@ mod test { /// /// `$EXPECTED_PLAN_LINES`: input plan /// `$PLAN`: the plan to optimized - /// macro_rules! assert_plan { ($EXPECTED_PLAN_LINES: expr, $PLAN: expr) => { let physical_plan = $PLAN; diff --git a/datafusion/physical-plan/src/sorts/cursor.rs b/datafusion/physical-plan/src/sorts/cursor.rs index 8ab603e04961..2b064c82b061 100644 --- a/datafusion/physical-plan/src/sorts/cursor.rs +++ b/datafusion/physical-plan/src/sorts/cursor.rs @@ -52,7 +52,7 @@ pub trait CursorValues { /// [`CursorValues`] /// /// ```text -/// +/// /// ┌───────────────────────┐ /// │ │ ┌──────────────────────┐ /// │ ┌─────────┐ ┌─────┐ │ ─ ─ ─ ─│ Cursor │ diff --git a/datafusion/physical-plan/src/sorts/merge.rs b/datafusion/physical-plan/src/sorts/merge.rs index ca2d5f2105f2..5295c261c36b 100644 --- a/datafusion/physical-plan/src/sorts/merge.rs +++ b/datafusion/physical-plan/src/sorts/merge.rs @@ -389,7 +389,6 @@ impl SortPreservingMergeStream { /// /// Zooming in at node 2 in the loser tree as an example, we can see that /// it takes as input the next item at (S0) and the loser of (S3, S4). - /// #[inline] fn lt_leaf_node_index(&self, cursor_index: usize) -> usize { (self.cursors.len() + cursor_index) / 2 diff --git a/datafusion/physical-plan/src/sorts/partial_sort.rs b/datafusion/physical-plan/src/sorts/partial_sort.rs index 32b34a75cc76..46fe19c8acb6 100644 --- a/datafusion/physical-plan/src/sorts/partial_sort.rs +++ b/datafusion/physical-plan/src/sorts/partial_sort.rs @@ -32,7 +32,7 @@ //! | 0 | 1 | 1 | //! | 0 | 2 | 0 | //! +---+---+---+ -//!``` +//! ``` //! //! and required ordering for the plan is `a ASC, b ASC, d ASC`. //! The first 3 rows(segment) can be sorted as the segment already @@ -46,7 +46,7 @@ //! +---+---+---+ //! | 0 | 2 | 4 | //! +---+---+---+ -//!``` +//! ``` //! //! The plan concats incoming data with such last rows of previous input //! and continues partial sorting of the segments. diff --git a/datafusion/physical-plan/src/sorts/sort.rs b/datafusion/physical-plan/src/sorts/sort.rs index bb572c4315fb..b6e6ed43ff35 100644 --- a/datafusion/physical-plan/src/sorts/sort.rs +++ b/datafusion/physical-plan/src/sorts/sort.rs @@ -124,7 +124,6 @@ impl ExternalSorterMetrics { /// └─────┘ /// /// in_mem_batches -/// /// ``` /// /// # When data does not fit in available memory diff --git a/datafusion/physical-plan/src/stream.rs b/datafusion/physical-plan/src/stream.rs index 773a0986693a..9edd38eaac15 100644 --- a/datafusion/physical-plan/src/stream.rs +++ b/datafusion/physical-plan/src/stream.rs @@ -188,7 +188,9 @@ impl ReceiverStreamBuilder { /// let schema_1 = Arc::clone(&schema); /// builder.spawn(async move { /// // Your task needs to send batches to the tx -/// tx_1.send(Ok(RecordBatch::new_empty(schema_1))).await.unwrap(); +/// tx_1.send(Ok(RecordBatch::new_empty(schema_1))) +/// .await +/// .unwrap(); /// /// Ok(()) /// }); @@ -198,7 +200,9 @@ impl ReceiverStreamBuilder { /// let schema_2 = Arc::clone(&schema); /// builder.spawn(async move { /// // Your task needs to send batches to the tx -/// tx_2.send(Ok(RecordBatch::new_empty(schema_2))).await.unwrap(); +/// tx_2.send(Ok(RecordBatch::new_empty(schema_2))) +/// .await +/// .unwrap(); /// /// Ok(()) /// }); @@ -380,9 +384,10 @@ impl RecordBatchStreamAdapter { /// # use datafusion_physical_plan::stream::RecordBatchStreamAdapter; /// // Create stream of Result /// let batch = record_batch!( - /// ("a", Int32, [1, 2, 3]), - /// ("b", Float64, [Some(4.0), None, Some(5.0)]) - /// ).expect("created batch"); + /// ("a", Int32, [1, 2, 3]), + /// ("b", Float64, [Some(4.0), None, Some(5.0)]) + /// ) + /// .expect("created batch"); /// let schema = batch.schema(); /// let stream = futures::stream::iter(vec![Ok(batch)]); /// // Convert the stream to a SendableRecordBatchStream diff --git a/datafusion/physical-plan/src/test/exec.rs b/datafusion/physical-plan/src/test/exec.rs index 12ffca871f07..b720181b27fe 100644 --- a/datafusion/physical-plan/src/test/exec.rs +++ b/datafusion/physical-plan/src/test/exec.rs @@ -291,7 +291,6 @@ fn clone_error(e: &DataFusionError) -> DataFusionError { /// A Mock ExecutionPlan that does not start producing input until a /// barrier is called -/// #[derive(Debug)] pub struct BarrierExec { /// partitions to send back diff --git a/datafusion/physical-plan/src/union.rs b/datafusion/physical-plan/src/union.rs index 73d7933e7c05..c237c9a8837e 100644 --- a/datafusion/physical-plan/src/union.rs +++ b/datafusion/physical-plan/src/union.rs @@ -68,7 +68,7 @@ use tokio::macros::support::thread_rng_n; /// │ │ │ │ /// Output │ ... │ │ │ /// Partitions │0 │N-1 │ N │N+M-1 -///(passes through ┌────┴───────┴───────────┴─────────┴───┐ +/// (passes through ┌────┴───────┴───────────┴─────────┴───┐ /// the N+M input │ UnionExec │ /// partitions) │ │ /// └──────────────────────────────────────┘ diff --git a/datafusion/physical-plan/src/unnest.rs b/datafusion/physical-plan/src/unnest.rs index e36cd2b6c242..39e0a2c85481 100644 --- a/datafusion/physical-plan/src/unnest.rs +++ b/datafusion/physical-plan/src/unnest.rs @@ -532,7 +532,7 @@ struct UnnestingResult { /// ``` /// Result: /// ```text -/// +/// /// ┌────────────────────────────────────────────────┬────────────────────────────────────────────────┐ /// │ unnest(i, "recursive" := CAST('t' AS BOOLEAN)) │ unnest(j, "recursive" := CAST('t' AS BOOLEAN)) │ /// │ int32 │ int32 │ @@ -694,7 +694,6 @@ fn build_batch( /// ```ignore /// longest_length: [3, 1, 1, 2] /// ``` -/// fn find_longest_length( list_arrays: &[ArrayRef], options: &UnnestOptions, @@ -815,7 +814,6 @@ fn unnest_list_arrays( /// ```ignore /// [1, null, 2, 3, 4, null, null, 5, null, null] /// ``` -/// fn unnest_list_array( list_array: &dyn ListArrayType, length_array: &PrimitiveArray, @@ -863,7 +861,6 @@ fn unnest_list_array( /// ```ignore /// [0, 0, 1, 1, 1, 2] /// ``` -/// fn create_take_indices( length_array: &PrimitiveArray, capacity: usize, @@ -928,7 +925,6 @@ fn create_take_indices( /// ```ignore /// c1: 1, null, 2, 3, 4, null, 5, 6 // Repeated using `indices` /// c2: null, null, null, null, null, null, null, null // Replaced with nulls -/// fn repeat_arrs_from_indices( batch: &[ArrayRef], indices: &PrimitiveArray, diff --git a/datafusion/proto-common/src/lib.rs b/datafusion/proto-common/src/lib.rs index 6400e4bdc66d..ba170b12717f 100644 --- a/datafusion/proto-common/src/lib.rs +++ b/datafusion/proto-common/src/lib.rs @@ -62,28 +62,33 @@ //! # use datafusion_proto_common::protobuf_common; //! # use prost::Message; //! # fn main() -> Result<()>{ -//! // Create a new ScalarValue -//! let val = ScalarValue::UInt64(Some(3)); -//! let mut buffer = BytesMut::new(); -//! let protobuf: protobuf_common::ScalarValue = match val { -//! ScalarValue::UInt64(Some(val)) => { -//! protobuf_common::ScalarValue{value: Some(protobuf_common::scalar_value::Value::Uint64Value(val))} -//! } -//! _ => unreachable!(), -//! }; +//! // Create a new ScalarValue +//! let val = ScalarValue::UInt64(Some(3)); +//! let mut buffer = BytesMut::new(); +//! let protobuf: protobuf_common::ScalarValue = match val { +//! ScalarValue::UInt64(Some(val)) => protobuf_common::ScalarValue { +//! value: Some(protobuf_common::scalar_value::Value::Uint64Value(val)), +//! }, +//! _ => unreachable!(), +//! }; //! -//! protobuf.encode(&mut buffer) +//! protobuf +//! .encode(&mut buffer) //! .map_err(|e| plan_datafusion_err!("Error encoding protobuf as bytes: {e}"))?; -//! // Convert it to bytes (for sending over the network, etc.) -//! let bytes: Bytes = buffer.into(); +//! // Convert it to bytes (for sending over the network, etc.) +//! let bytes: Bytes = buffer.into(); //! -//! let protobuf = protobuf_common::ScalarValue::decode(bytes).map_err(|e| plan_datafusion_err!("Error decoding ScalarValue as protobuf: {e}"))?; -//! // Decode bytes from somewhere (over network, etc.) back to ScalarValue -//! let decoded_val: ScalarValue = match protobuf.value { -//! Some(protobuf_common::scalar_value::Value::Uint64Value(val)) => ScalarValue::UInt64(Some(val)), -//! _ => unreachable!(), -//! }; -//! assert_eq!(val, decoded_val); +//! let protobuf = protobuf_common::ScalarValue::decode(bytes).map_err(|e| { +//! plan_datafusion_err!("Error decoding ScalarValue as protobuf: {e}") +//! })?; +//! // Decode bytes from somewhere (over network, etc.) back to ScalarValue +//! let decoded_val: ScalarValue = match protobuf.value { +//! Some(protobuf_common::scalar_value::Value::Uint64Value(val)) => { +//! ScalarValue::UInt64(Some(val)) +//! } +//! _ => unreachable!(), +//! }; +//! assert_eq!(val, decoded_val); //! # Ok(()) //! # } //! ``` diff --git a/datafusion/proto/src/lib.rs b/datafusion/proto/src/lib.rs index 2df162f21e3a..9744f302e4c6 100644 --- a/datafusion/proto/src/lib.rs +++ b/datafusion/proto/src/lib.rs @@ -64,15 +64,15 @@ //! # use datafusion_expr::{col, lit, Expr}; //! # use datafusion_proto::bytes::Serializeable; //! # fn main() -> Result<()>{ -//! // Create a new `Expr` a < 32 -//! let expr = col("a").lt(lit(5i32)); +//! // Create a new `Expr` a < 32 +//! let expr = col("a").lt(lit(5i32)); //! -//! // Convert it to bytes (for sending over the network, etc.) -//! let bytes = expr.to_bytes()?; +//! // Convert it to bytes (for sending over the network, etc.) +//! let bytes = expr.to_bytes()?; //! -//! // Decode bytes from somewhere (over network, etc.) back to Expr -//! let decoded_expr = Expr::from_bytes(&bytes)?; -//! assert_eq!(expr, decoded_expr); +//! // Decode bytes from somewhere (over network, etc.) back to Expr +//! let decoded_expr = Expr::from_bytes(&bytes)?; +//! assert_eq!(expr, decoded_expr); //! # Ok(()) //! # } //! ``` diff --git a/datafusion/pruning/src/pruning_predicate.rs b/datafusion/pruning/src/pruning_predicate.rs index e863f57f8d16..712cf4e9cfdd 100644 --- a/datafusion/pruning/src/pruning_predicate.rs +++ b/datafusion/pruning/src/pruning_predicate.rs @@ -882,7 +882,7 @@ impl From> for RequiredColumns { /// ```text /// ("s1", Min, Field:s1_min) /// ("s2", Max, field:s2_max) -///``` +/// ``` /// /// And the input statistics had /// ```text @@ -5108,7 +5108,6 @@ mod tests { /// /// `expected` is a vector of bools, where true means the row group should /// be kept, and false means it should be pruned. - /// // TODO refactor other tests to use this to reduce boiler plate fn prune_with_expr( expr: Expr, diff --git a/datafusion/session/src/session.rs b/datafusion/session/src/session.rs index de23dba491fd..fd033172f224 100644 --- a/datafusion/session/src/session.rs +++ b/datafusion/session/src/session.rs @@ -57,9 +57,12 @@ use std::sync::{Arc, Weak}; /// // Given a `Session` reference, get the concrete `SessionState` reference /// // Note: this may stop working in future versions, /// fn session_state_from_session(session: &dyn Session) -> Result<&SessionState> { -/// session.as_any() -/// .downcast_ref::() -/// .ok_or_else(|| exec_datafusion_err!("Failed to downcast Session to SessionState")) +/// session +/// .as_any() +/// .downcast_ref::() +/// .ok_or_else(|| { +/// exec_datafusion_err!("Failed to downcast Session to SessionState") +/// }) /// } /// ``` /// diff --git a/datafusion/spark/src/lib.rs b/datafusion/spark/src/lib.rs index 4ce9be1263ef..e8e25a2fcd05 100644 --- a/datafusion/spark/src/lib.rs +++ b/datafusion/spark/src/lib.rs @@ -86,7 +86,7 @@ //! use datafusion_spark::expr_fn::sha2; //! // Create the expression `sha2(my_data, 256)` //! let expr = sha2(col("my_data"), lit(256)); -//!``` +//! ``` //! //![`Expr`]: datafusion_expr::Expr diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs index 2c673162ec9c..73658546dadb 100644 --- a/datafusion/sql/src/parser.rs +++ b/datafusion/sql/src/parser.rs @@ -58,7 +58,7 @@ fn parse_file_type(s: &str) -> Result { /// Syntax: /// ```sql /// EXPLAIN [FORMAT format] statement -///``` +/// ``` #[derive(Debug, Clone, PartialEq, Eq)] pub struct ExplainStatement { /// `EXPLAIN ANALYZE ..` @@ -304,8 +304,7 @@ const DEFAULT_DIALECT: GenericDialect = GenericDialect {}; /// # use datafusion_sql::parser::DFParserBuilder; /// # use datafusion_common::Result; /// # fn test() -> Result<()> { -/// let mut parser = DFParserBuilder::new("SELECT * FROM foo; SELECT 1 + 2") -/// .build()?; +/// let mut parser = DFParserBuilder::new("SELECT * FROM foo; SELECT 1 + 2").build()?; /// // parse the SQL into DFStatements /// let statements = parser.parse_statements()?; /// assert_eq!(statements.len(), 2); @@ -320,13 +319,13 @@ const DEFAULT_DIALECT: GenericDialect = GenericDialect {}; /// # use datafusion_sql::sqlparser::dialect::MySqlDialect; /// # use datafusion_sql::sqlparser::ast::Expr; /// # fn test() -> Result<()> { -/// let dialect = MySqlDialect{}; // Parse using MySQL dialect +/// let dialect = MySqlDialect {}; // Parse using MySQL dialect /// let mut parser = DFParserBuilder::new("1 + 2") -/// .with_dialect(&dialect) -/// .build()?; +/// .with_dialect(&dialect) +/// .build()?; /// // parse 1+2 into an sqlparser::ast::Expr /// let res = parser.parse_expr()?; -/// assert!(matches!(res.expr, Expr::BinaryOp {..})); +/// assert!(matches!(res.expr, Expr::BinaryOp { .. })); /// # Ok(()) /// # } /// ``` diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 26c982690115..9063e237bf26 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -189,7 +189,6 @@ impl IdentNormalizer { /// This helps resolve scoping issues of CTEs. /// By using cloning, a subquery can inherit CTEs from the outer query /// and can also define its own private CTEs without affecting the outer query. -/// #[derive(Debug, Clone)] pub struct PlannerContext { /// Data types for numbered parameters ($1, $2, etc), if supplied diff --git a/datafusion/sql/src/resolve.rs b/datafusion/sql/src/resolve.rs index 9e909f66fa97..db5ddd511519 100644 --- a/datafusion/sql/src/resolve.rs +++ b/datafusion/sql/src/resolve.rs @@ -175,14 +175,14 @@ fn visit_statement(statement: &DFStatement, visitor: &mut RelationVisitor) { /// ## Example with CTEs /// /// ``` -/// # use datafusion_sql::parser::DFParser; +/// # use datafusion_sql::parser::DFParser; /// # use datafusion_sql::resolve::resolve_table_references; -/// let query = "with my_cte as (values (1), (2)) SELECT * from my_cte;"; -/// let statement = DFParser::parse_sql(query).unwrap().pop_back().unwrap(); -/// let (table_refs, ctes) = resolve_table_references(&statement, true).unwrap(); +/// let query = "with my_cte as (values (1), (2)) SELECT * from my_cte;"; +/// let statement = DFParser::parse_sql(query).unwrap().pop_back().unwrap(); +/// let (table_refs, ctes) = resolve_table_references(&statement, true).unwrap(); /// assert_eq!(table_refs.len(), 0); -/// assert_eq!(ctes.len(), 1); -/// assert_eq!(ctes[0].to_string(), "my_cte"); +/// assert_eq!(ctes.len(), 1); +/// assert_eq!(ctes[0].to_string(), "my_cte"); /// ``` pub fn resolve_table_references( statement: &crate::parser::Statement, diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index 4ddd5ccccbbd..83aafb52e3e6 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -68,7 +68,7 @@ use sqlparser::tokenizer::Span; /// use datafusion_sql::unparser::expr_to_sql; /// let expr = col("a").gt(lit(4)); // form an expression `a > 4` /// let sql = expr_to_sql(&expr).unwrap(); // convert to ast::Expr -/// // use the Display impl to convert to SQL text +/// // use the Display impl to convert to SQL text /// assert_eq!(sql.to_string(), "(a > 4)") /// ``` /// diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs index 4fb1e42d6028..432437f6fea7 100644 --- a/datafusion/sql/src/unparser/plan.rs +++ b/datafusion/sql/src/unparser/plan.rs @@ -82,8 +82,11 @@ use std::{sync::Arc, vec}; /// .build() /// .unwrap(); /// let sql = plan_to_sql(&plan).unwrap(); // convert to AST -/// // use the Display impl to convert to SQL text -/// assert_eq!(sql.to_string(), "SELECT \"table\".id, \"table\".\"value\" FROM \"table\"") +/// // use the Display impl to convert to SQL text +/// assert_eq!( +/// sql.to_string(), +/// "SELECT \"table\".id, \"table\".\"value\" FROM \"table\"" +/// ) /// ``` /// /// [`SqlToRel::sql_statement_to_plan`]: crate::planner::SqlToRel::sql_statement_to_plan diff --git a/datafusion/sql/src/utils.rs b/datafusion/sql/src/utils.rs index 52832e1324be..f90ee2dfe48e 100644 --- a/datafusion/sql/src/utils.rs +++ b/datafusion/sql/src/utils.rs @@ -524,7 +524,6 @@ impl TreeNodeRewriter for RecursiveUnnestRewriter<'_> { /// / / /// column2 /// ``` - /// fn f_up(&mut self, expr: Expr) -> Result> { if let Expr::Unnest(ref traversing_unnest) = expr { if traversing_unnest == self.top_most_unnest.as_ref().unwrap() { diff --git a/datafusion/sql/tests/cases/diagnostic.rs b/datafusion/sql/tests/cases/diagnostic.rs index b3fc5dea9eff..d50b1223478e 100644 --- a/datafusion/sql/tests/cases/diagnostic.rs +++ b/datafusion/sql/tests/cases/diagnostic.rs @@ -69,7 +69,9 @@ fn do_query(sql: &'static str) -> Diagnostic { /// ## Example /// /// ```rust -/// let spans = get_spans("SELECT /*whole+left*/speed/*left*/ + /*right*/10/*right+whole*/ FROM cars"); +/// let spans = get_spans( +/// "SELECT /*whole+left*/speed/*left*/ + /*right*/10/*right+whole*/ FROM cars", +/// ); /// // whole is ^^^^^^^^^^^^^^^^^^^^^^^^^^^ /// // left is ^^^^^ /// // right is ^^ diff --git a/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs b/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs index 05e1f284c560..bb73d236684e 100644 --- a/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs +++ b/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs @@ -187,7 +187,6 @@ macro_rules! get_row_value { /// [NULL Values and empty strings]: https://duckdb.org/dev/sqllogictest/result_verification#null-values-and-empty-strings /// /// Floating numbers are rounded to have a consistent representation with the Postgres runner. -/// pub fn cell_to_string(col: &ArrayRef, row: usize, is_spark_path: bool) -> Result { if !col.is_valid(row) { // represent any null value with the string "NULL" diff --git a/datafusion/substrait/src/lib.rs b/datafusion/substrait/src/lib.rs index 0f2fbf199be3..cdde5df0ad22 100644 --- a/datafusion/substrait/src/lib.rs +++ b/datafusion/substrait/src/lib.rs @@ -66,19 +66,24 @@ //! # use datafusion::arrow::array::{Int32Array, RecordBatch}; //! # use datafusion_substrait::logical_plan; //! // Create a plan that scans table 't' -//! let ctx = SessionContext::new(); -//! let batch = RecordBatch::try_from_iter(vec![("x", Arc::new(Int32Array::from(vec![42])) as _)])?; -//! ctx.register_batch("t", batch)?; -//! let df = ctx.sql("SELECT x from t").await?; -//! let plan = df.into_optimized_plan()?; +//! let ctx = SessionContext::new(); +//! let batch = RecordBatch::try_from_iter(vec![( +//! "x", +//! Arc::new(Int32Array::from(vec![42])) as _, +//! )])?; +//! ctx.register_batch("t", batch)?; +//! let df = ctx.sql("SELECT x from t").await?; +//! let plan = df.into_optimized_plan()?; //! -//! // Convert the plan into a substrait (protobuf) Plan -//! let substrait_plan = logical_plan::producer::to_substrait_plan(&plan, &ctx.state())?; +//! // Convert the plan into a substrait (protobuf) Plan +//! let substrait_plan = logical_plan::producer::to_substrait_plan(&plan, &ctx.state())?; //! -//! // Receive a substrait protobuf from somewhere, and turn it into a LogicalPlan -//! let logical_round_trip = logical_plan::consumer::from_substrait_plan(&ctx.state(), &substrait_plan).await?; -//! let logical_round_trip = ctx.state().optimize(&logical_round_trip)?; -//! assert_eq!(format!("{:?}", plan), format!("{:?}", logical_round_trip)); +//! // Receive a substrait protobuf from somewhere, and turn it into a LogicalPlan +//! let logical_round_trip = +//! logical_plan::consumer::from_substrait_plan(&ctx.state(), &substrait_plan) +//! .await?; +//! let logical_round_trip = ctx.state().optimize(&logical_round_trip)?; +//! assert_eq!(format!("{:?}", plan), format!("{:?}", logical_round_trip)); //! # Ok(()) //! # } //! ``` diff --git a/datafusion/substrait/src/logical_plan/consumer/substrait_consumer.rs b/datafusion/substrait/src/logical_plan/consumer/substrait_consumer.rs index 5392dd77b576..c734b9eb7a54 100644 --- a/datafusion/substrait/src/logical_plan/consumer/substrait_consumer.rs +++ b/datafusion/substrait/src/logical_plan/consumer/substrait_consumer.rs @@ -150,7 +150,6 @@ use substrait::proto::{ /// } /// } /// ``` -/// pub trait SubstraitConsumer: Send + Sync + Sized { async fn resolve_table_ref( &self, diff --git a/rustfmt.toml b/rustfmt.toml index 4522e520a469..37758e85367e 100644 --- a/rustfmt.toml +++ b/rustfmt.toml @@ -17,6 +17,7 @@ edition = "2021" max_width = 90 +format_code_in_doc_comments = true # nightly-only feature # ignore generated files # ignore = [