Skip to content

Commit cb135ad

Browse files
committed
performance: eliminate unnecessary null_counts calculations
after calculating the null_counts array in advance, there is no need to call GetColumnDatum to continue updating null_counts. We can directly read the datum.
1 parent 2b44170 commit cb135ad

File tree

4 files changed

+19
-14
lines changed

4 files changed

+19
-14
lines changed

contrib/pax_storage/src/cpp/storage/micro_partition_row_filter_reader.cc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -152,15 +152,15 @@ bool MicroPartitionRowFilterReader::ApplyFiltersWithSampling(
152152
return true;
153153
}
154154

155+
ctx->sample_rows++;
155156
bool all_pass = true;
157+
// in the sampling phase, we need to evaluate all filter nodes, if any node
158+
// fails, the tuple is rejected
156159
for (auto &node : ctx->filter_nodes) {
157160
if (!EvalFilterNode(ctx, group, desc, row_index, slot, node, true)) {
158161
all_pass = false;
159-
break;
160162
}
161163
}
162-
ctx->sample_rows++;
163-
if (!all_pass) return false;
164164

165165
if (ctx->sample_rows >= ctx->sample_target) {
166166
for (auto &node : ctx->filter_nodes) {
@@ -175,7 +175,7 @@ bool MicroPartitionRowFilterReader::ApplyFiltersWithSampling(
175175
});
176176
ctx->sampling = false;
177177
}
178-
return true;
178+
return all_pass;
179179
}
180180

181181
bool MicroPartitionRowFilterReader::ReadTuple(TupleTableSlot *slot) {

contrib/pax_storage/src/cpp/storage/orc/orc_group.cc

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -293,15 +293,20 @@ std::pair<Datum, bool> OrcGroup::GetColumnValueNoMissing(size_t column_index,
293293
return {0, true};
294294
}
295295

296-
if (column->HasNull() && !nulls_shuffle_[column_index]) {
297-
CalcNullShuffle(column, column_index);
298-
}
296+
if (column->HasNull()) {
297+
const auto &bm = column->GetBitmap();
298+
Assert(bm);
299+
if (!bm->Test(row_index)) {
300+
return {0, true};
301+
}
299302

300-
if (nulls_shuffle_[column_index]) {
303+
// if not null value, calculate the null offsets array for each row
304+
if (!nulls_shuffle_[column_index]) {
305+
CalcNullShuffle(column, column_index);
306+
}
301307
null_counts = nulls_shuffle_[column_index][row_index];
302308
}
303-
304-
return GetColumnDatum(column, row_index, &null_counts);
309+
return {column->GetDatum(row_index - null_counts), false};
305310
}
306311

307312
void OrcGroup::CalcNullShuffle(PaxColumn *column, size_t column_index) {

contrib/pax_storage/src/test/regress/expected/gp_runtime_filter.out

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -261,8 +261,8 @@ DROP TABLE IF EXISTS t1;
261261
NOTICE: table "t1" does not exist, skipping
262262
DROP TABLE IF EXISTS t2;
263263
NOTICE: table "t2" does not exist, skipping
264-
CREATE TABLE t1(c1 int, c2 int, c3 int, c4 int, c5 int) with (appendonly=true, orientation=column) distributed by (c1);
265-
CREATE TABLE t2(c1 int, c2 int, c3 int, c4 int, c5 int) with (appendonly=true, orientation=column) distributed REPLICATED;
264+
CREATE TABLE t1(c1 int, c2 int, c3 int, c4 int, c5 int) distributed by (c1);
265+
CREATE TABLE t2(c1 int, c2 int, c3 int, c4 int, c5 int) distributed REPLICATED;
266266
INSERT INTO t1 VALUES (5,5,5,5,5);
267267
INSERT INTO t2 VALUES (1,1,1,1,1), (2,2,2,2,2), (3,3,3,3,3), (4,4,4,4,4);
268268
INSERT INTO t1 SELECT * FROM t1;

contrib/pax_storage/src/test/regress/sql/gp_runtime_filter.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,8 @@ SET enable_parallel TO off;
8585
-- case 1: join on distribution table and replicated table.
8686
DROP TABLE IF EXISTS t1;
8787
DROP TABLE IF EXISTS t2;
88-
CREATE TABLE t1(c1 int, c2 int, c3 int, c4 int, c5 int) with (appendonly=true, orientation=column) distributed by (c1);
89-
CREATE TABLE t2(c1 int, c2 int, c3 int, c4 int, c5 int) with (appendonly=true, orientation=column) distributed REPLICATED;
88+
CREATE TABLE t1(c1 int, c2 int, c3 int, c4 int, c5 int) distributed by (c1);
89+
CREATE TABLE t2(c1 int, c2 int, c3 int, c4 int, c5 int) distributed REPLICATED;
9090

9191
INSERT INTO t1 VALUES (5,5,5,5,5);
9292
INSERT INTO t2 VALUES (1,1,1,1,1), (2,2,2,2,2), (3,3,3,3,3), (4,4,4,4,4);

0 commit comments

Comments
 (0)