diff --git a/Cargo.lock b/Cargo.lock index b4ad326358f8..2ca8c6c743ec 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -247,8 +247,7 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" version = "55.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1bb018b6960c87fd9d025009820406f74e83281185a8bdcb44880d2aa5c9a87" +source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb/cache_filter_result#a6f6e583f8a371f110f2cf0b50a5dcb480dfe2cb" dependencies = [ "arrow-arith", "arrow-array", @@ -271,8 +270,7 @@ dependencies = [ [[package]] name = "arrow-arith" version = "55.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44de76b51473aa888ecd6ad93ceb262fb8d40d1f1154a4df2f069b3590aa7575" +source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb/cache_filter_result#a6f6e583f8a371f110f2cf0b50a5dcb480dfe2cb" dependencies = [ "arrow-array", "arrow-buffer", @@ -285,8 +283,7 @@ dependencies = [ [[package]] name = "arrow-array" version = "55.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29ed77e22744475a9a53d00026cf8e166fe73cf42d89c4c4ae63607ee1cfcc3f" +source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb/cache_filter_result#a6f6e583f8a371f110f2cf0b50a5dcb480dfe2cb" dependencies = [ "ahash 0.8.12", "arrow-buffer", @@ -302,8 +299,7 @@ dependencies = [ [[package]] name = "arrow-buffer" version = "55.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0391c96eb58bf7389171d1e103112d3fc3e5625ca6b372d606f2688f1ea4cce" +source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb/cache_filter_result#a6f6e583f8a371f110f2cf0b50a5dcb480dfe2cb" dependencies = [ "bytes", "half", @@ -313,8 +309,7 @@ dependencies = [ [[package]] name = "arrow-cast" version = "55.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f39e1d774ece9292697fcbe06b5584401b26bd34be1bec25c33edae65c2420ff" +source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb/cache_filter_result#a6f6e583f8a371f110f2cf0b50a5dcb480dfe2cb" dependencies = [ "arrow-array", "arrow-buffer", @@ -334,8 +329,7 @@ dependencies = [ [[package]] name = "arrow-csv" version = "55.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9055c972a07bf12c2a827debfd34f88d3b93da1941d36e1d9fee85eebe38a12a" +source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb/cache_filter_result#a6f6e583f8a371f110f2cf0b50a5dcb480dfe2cb" dependencies = [ "arrow-array", "arrow-cast", @@ -350,8 +344,7 @@ dependencies = [ [[package]] name = "arrow-data" version = "55.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf75ac27a08c7f48b88e5c923f267e980f27070147ab74615ad85b5c5f90473d" +source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb/cache_filter_result#a6f6e583f8a371f110f2cf0b50a5dcb480dfe2cb" dependencies = [ "arrow-buffer", "arrow-schema", @@ -362,8 +355,7 @@ dependencies = [ [[package]] name = "arrow-flight" version = "55.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91efc67a4f5a438833dd76ef674745c80f6f6b9a428a3b440cbfbf74e32867e6" +source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb/cache_filter_result#a6f6e583f8a371f110f2cf0b50a5dcb480dfe2cb" dependencies = [ "arrow-arith", "arrow-array", @@ -389,8 +381,7 @@ dependencies = [ [[package]] name = "arrow-ipc" version = "55.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a222f0d93772bd058d1268f4c28ea421a603d66f7979479048c429292fac7b2e" +source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb/cache_filter_result#a6f6e583f8a371f110f2cf0b50a5dcb480dfe2cb" dependencies = [ "arrow-array", "arrow-buffer", @@ -403,8 +394,7 @@ dependencies = [ [[package]] name = "arrow-json" version = "55.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9085342bbca0f75e8cb70513c0807cc7351f1fbf5cb98192a67d5e3044acb033" +source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb/cache_filter_result#a6f6e583f8a371f110f2cf0b50a5dcb480dfe2cb" dependencies = [ "arrow-array", "arrow-buffer", @@ -425,8 +415,7 @@ dependencies = [ [[package]] name = "arrow-ord" version = "55.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab2f1065a5cad7b9efa9e22ce5747ce826aa3855766755d4904535123ef431e7" +source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb/cache_filter_result#a6f6e583f8a371f110f2cf0b50a5dcb480dfe2cb" dependencies = [ "arrow-array", "arrow-buffer", @@ -438,8 +427,7 @@ dependencies = [ [[package]] name = "arrow-row" version = "55.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3703a0e3e92d23c3f756df73d2dc9476873f873a76ae63ef9d3de17fda83b2d8" +source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb/cache_filter_result#a6f6e583f8a371f110f2cf0b50a5dcb480dfe2cb" dependencies = [ "arrow-array", "arrow-buffer", @@ -451,8 +439,7 @@ dependencies = [ [[package]] name = "arrow-schema" version = "55.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73a47aa0c771b5381de2b7f16998d351a6f4eb839f1e13d48353e17e873d969b" +source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb/cache_filter_result#a6f6e583f8a371f110f2cf0b50a5dcb480dfe2cb" dependencies = [ "bitflags 2.9.1", "serde", @@ -462,8 +449,7 @@ dependencies = [ [[package]] name = "arrow-select" version = "55.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24b7b85575702b23b85272b01bc1c25a01c9b9852305e5d0078c79ba25d995d4" +source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb/cache_filter_result#a6f6e583f8a371f110f2cf0b50a5dcb480dfe2cb" dependencies = [ "ahash 0.8.12", "arrow-array", @@ -476,8 +462,7 @@ dependencies = [ [[package]] name = "arrow-string" version = "55.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9260fddf1cdf2799ace2b4c2fc0356a9789fa7551e0953e35435536fecefebbd" +source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb/cache_filter_result#a6f6e583f8a371f110f2cf0b50a5dcb480dfe2cb" dependencies = [ "arrow-array", "arrow-buffer", @@ -1063,7 +1048,7 @@ dependencies = [ "bitflags 2.9.1", "cexpr", "clang-sys", - "itertools 0.12.1", + "itertools 0.10.5", "lazy_static", "lazycell", "log", @@ -3732,15 +3717,6 @@ dependencies = [ "either", ] -[[package]] -name = "itertools" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" -dependencies = [ - "either", -] - [[package]] name = "itertools" version = "0.13.0" @@ -4416,8 +4392,7 @@ dependencies = [ [[package]] name = "parquet" version = "55.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be7b2d778f6b841d37083ebdf32e33a524acde1266b5884a8ca29bf00dfa1231" +source = "git+https://github.com/alamb/arrow-rs.git?branch=alamb/cache_filter_result#a6f6e583f8a371f110f2cf0b50a5dcb480dfe2cb" dependencies = [ "ahash 0.8.12", "arrow-array", @@ -4839,7 +4814,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf" dependencies = [ "heck 0.5.0", - "itertools 0.14.0", + "itertools 0.13.0", "log", "multimap", "once_cell", @@ -4859,7 +4834,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" dependencies = [ "anyhow", - "itertools 0.14.0", + "itertools 0.13.0", "proc-macro2", "quote", "syn 2.0.101", diff --git a/Cargo.toml b/Cargo.toml index 79bb2f3cc602..63472bc6f189 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -218,3 +218,33 @@ uninlined_format_args = "warn" [workspace.lints.rust] unexpected_cfgs = { level = "warn", check-cfg = ["cfg(tarpaulin)"] } unused_qualifications = "deny" + + +# Patch to pull in https://github.com/apache/arrow-rs/pull/7513 +[patch.crates-io] +arrow = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/cache_filter_result" } +arrow-array = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/cache_filter_result" } +arrow-buffer = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/cache_filter_result" } +arrow-cast = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/cache_filter_result" } +arrow-data = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/cache_filter_result" } +arrow-ipc = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/cache_filter_result" } +arrow-schema = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/cache_filter_result" } +arrow-select = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/cache_filter_result" } +arrow-string = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/cache_filter_result" } +arrow-ord = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/cache_filter_result" } +arrow-flight = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/cache_filter_result" } +parquet = { git = "https://github.com/alamb/arrow-rs.git", branch = "alamb/cache_filter_result" } +# /Users/andrewlamb/Software/arrow-rs +#arrow = { path= "/Users/andrewlamb/Software/arrow-rs/arrow" } +#arrow-array = { path= "/Users/andrewlamb/Software/arrow-rs/arrow-array" } +#arrow-buffer = { path= "/Users/andrewlamb/Software/arrow-rs/arrow-buffer" } +#arrow-cast = { path= "/Users/andrewlamb/Software/arrow-rs/arrow-cast" } +#arrow-data = { path= "/Users/andrewlamb/Software/arrow-rs/arrow-data" } +#arrow-ipc = { path= "/Users/andrewlamb/Software/arrow-rs/arrow-ipc" } +#arrow-schema = { path= "/Users/andrewlamb/Software/arrow-rs/arrow-schema" } +#arrow-select = { path= "/Users/andrewlamb/Software/arrow-rs/arrow-select" } +#arrow-string = { path= "/Users/andrewlamb/Software/arrow-rs/arrow-string" } +#arrow-ord = { path= "/Users/andrewlamb/Software/arrow-rs/arrow-ord" } +#arrow-flight = { path= "/Users/andrewlamb/Software/arrow-rs/arrow-flight" } +#parquet = { path= "/Users/andrewlamb/Software/arrow-rs/parquet" } + diff --git a/benchmarks/src/clickbench.rs b/benchmarks/src/clickbench.rs index 2e934346748e..3678f96776ed 100644 --- a/benchmarks/src/clickbench.rs +++ b/benchmarks/src/clickbench.rs @@ -122,6 +122,8 @@ impl RunOpt { // The hits_partitioned dataset specifies string columns // as binary due to how it was written. Force it to strings parquet_options.binary_as_string = true; + // TEST out impact of filter pushdown + parquet_options.pushdown_filters = true; } let rt_builder = self.common.runtime_env_builder()?;