@@ -30,21 +30,31 @@ void SourceColumnsDescription::PhysicalColumnPositions::clear()
30
30
subcolumns.clear ();
31
31
}
32
32
33
- SourceColumnsDescription::SourceColumnsDescription (const Names & required_column_names, StorageSnapshotPtr storage_snapshot)
33
+ SourceColumnsDescription::SourceColumnsDescription (
34
+ const Names & required_column_names, StorageSnapshotPtr storage_snapshot, bool enable_partial_read)
34
35
: SourceColumnsDescription(
35
- storage_snapshot->getColumnsByNames (GetColumnsOptions(GetColumnsOptions::All).withSubcolumns().withVirtuals().withExtendedObjects(), required_column_names),
36
+ storage_snapshot->getColumnsByNames (
37
+ GetColumnsOptions (GetColumnsOptions::All).withSubcolumns().withVirtuals().withExtendedObjects(), required_column_names),
36
38
storage_snapshot->getMetadataForQuery()->getSampleBlock(),
37
- storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects()))
39
+ storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects()),
40
+ enable_partial_read)
38
41
{
39
42
}
40
43
41
- SourceColumnsDescription::SourceColumnsDescription (const NamesAndTypesList & columns_to_read, const Block & schema, const NamesAndTypesList & all_extended_columns)
44
+ SourceColumnsDescription::SourceColumnsDescription (
45
+ const NamesAndTypesList & columns_to_read,
46
+ const Block & schema,
47
+ const NamesAndTypesList & all_extended_columns,
48
+ bool enable_partial_read)
42
49
{
43
50
// / FIXME, when we have multi-version of schema, the header and the schema may be mismatched
44
51
auto column_size = columns_to_read.size ();
45
52
53
+ // / Just read required partial physical columns
54
+ if (enable_partial_read)
55
+ physical_column_positions_to_read.positions .reserve (column_size);
56
+
46
57
positions.reserve (column_size);
47
- physical_column_positions_to_read.positions .reserve (column_size);
48
58
subcolumns_to_read.reserve (column_size);
49
59
50
60
std::vector<uint16_t > read_all_subcolumns_positions;
@@ -112,45 +122,48 @@ SourceColumnsDescription::SourceColumnsDescription(const NamesAndTypesList & col
112
122
auto pos_in_schema = schema.getPositionByName (name_in_storage);
113
123
const auto & column_in_storage = schema.getByName (name_in_storage);
114
124
115
- // / Calculate main column pos
116
- size_t physical_pos_in_schema_to_read = 0 ;
117
- // / We don't need to read duplicate physical columns from schema
118
- auto physical_pos_iter = std::find (
119
- physical_column_positions_to_read.positions .begin (), physical_column_positions_to_read.positions .end (), pos_in_schema);
120
- if (physical_pos_iter == physical_column_positions_to_read.positions .end ())
125
+ size_t physical_pos_in_schema_to_read = pos_in_schema;
126
+ // / Specially, re-calculate pos in partially read schema
127
+ if (enable_partial_read)
121
128
{
122
- physical_pos_in_schema_to_read = physical_column_positions_to_read.positions .size ();
123
- physical_column_positions_to_read.positions .emplace_back (pos_in_schema);
124
-
125
- // / json, array(json), tuple(..., json, ...)
126
- if (column_in_storage.type ->hasDynamicSubcolumns ())
129
+ // / We don't need to read duplicate physical columns from schema
130
+ auto physical_pos_iter = std::find (
131
+ physical_column_positions_to_read.positions .begin (), physical_column_positions_to_read.positions .end (), pos_in_schema);
132
+ if (physical_pos_iter == physical_column_positions_to_read.positions .end ())
127
133
{
128
- // / We like to read parent json column once if multiple subcolumns of the same json are required
129
- // / like `select json.a, json.b from stream`
130
- auto find_iter = std::find_if (
131
- physical_object_columns_to_read.begin (),
132
- physical_object_columns_to_read.end (),
133
- [&column](const auto & col_name_type) { return col_name_type.name == column.name ; });
134
-
135
- if (find_iter == physical_object_columns_to_read.end ())
134
+ physical_pos_in_schema_to_read = physical_column_positions_to_read.positions .size ();
135
+ physical_column_positions_to_read.positions .emplace_back (pos_in_schema);
136
+
137
+ // / json, array(json), tuple(..., json, ...)
138
+ if (column_in_storage.type ->hasDynamicSubcolumns ())
136
139
{
137
- if (column.isSubcolumn ())
140
+ // / We like to read parent json column once if multiple subcolumns of the same json are required
141
+ // / like `select json.a, json.b from stream`
142
+ auto find_iter = std::find_if (
143
+ physical_object_columns_to_read.begin (),
144
+ physical_object_columns_to_read.end (),
145
+ [&column](const auto & col_name_type) { return col_name_type.name == column.name ; });
146
+
147
+ if (find_iter == physical_object_columns_to_read.end ())
138
148
{
139
- // / When reading a subcolumn of a json like `select json.a from stream`, we will need read the parent `json` column
140
- auto name_and_type = all_extended_columns.tryGetByName (name_in_storage);
141
- assert (name_and_type);
142
- physical_object_columns_to_read.emplace_back (std::move (*name_and_type));
143
- }
144
- else
145
- {
146
- // / This column is parent json column, like `select json from stream`, use the name and type directly
147
- physical_object_columns_to_read.emplace_back (column);
149
+ if (column.isSubcolumn ())
150
+ {
151
+ // / When reading a subcolumn of a json like `select json.a from stream`, we will need read the parent `json` column
152
+ auto name_and_type = all_extended_columns.tryGetByName (name_in_storage);
153
+ assert (name_and_type);
154
+ physical_object_columns_to_read.emplace_back (std::move (*name_and_type));
155
+ }
156
+ else
157
+ {
158
+ // / This column is parent json column, like `select json from stream`, use the name and type directly
159
+ physical_object_columns_to_read.emplace_back (column);
160
+ }
148
161
}
149
162
}
150
163
}
164
+ else
165
+ physical_pos_in_schema_to_read = physical_pos_iter - physical_column_positions_to_read.positions .begin ();
151
166
}
152
- else
153
- physical_pos_in_schema_to_read = physical_pos_iter - physical_column_positions_to_read.positions .begin ();
154
167
155
168
// / For subcolumn, which dependents on the main column
156
169
if (column.isSubcolumn ())
@@ -181,7 +194,7 @@ SourceColumnsDescription::SourceColumnsDescription(const NamesAndTypesList & col
181
194
physical_column_positions_to_read.subcolumns .erase (pos);
182
195
183
196
// / Clients like to read virtual columns only, add `_tp_time`, then we know how many rows
184
- if (physical_column_positions_to_read.positions .empty ())
197
+ if (enable_partial_read && physical_column_positions_to_read.positions .empty ())
185
198
physical_column_positions_to_read.positions .emplace_back (schema.getPositionByName (ProtonConsts::RESERVED_EVENT_TIME));
186
199
}
187
200
}
0 commit comments