Skip to content

Commit 96ceb7f

Browse files
committed
Try fix default value filling logic
Signed-off-by: JaySon-Huang <tshent@qq.com>
1 parent e8a3c0a commit 96ceb7f

File tree

5 files changed

+91
-38
lines changed

5 files changed

+91
-38
lines changed

dbms/src/Core/Block.cpp

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -256,15 +256,14 @@ void Block::checkNumberOfRows() const
256256
{
257257
auto first_col = data.front();
258258
throw Exception(
259-
fmt::format(
260-
"Sizes of columns doesn't match: {}(id={}): {}, {}(id={}): {}",
261-
first_col.name,
262-
first_col.column_id,
263-
rows,
264-
elem.name,
265-
elem.column_id,
266-
size),
267-
ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
259+
ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH,
260+
"Sizes of columns doesn't match: {}(id={}): rows={}, {}(id={}): rows={}",
261+
first_col.name,
262+
first_col.column_id,
263+
rows,
264+
elem.name,
265+
elem.column_id,
266+
size);
268267
}
269268
}
270269
}

dbms/src/Storages/KVStore/Decode/RegionBlockReader.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,7 @@ bool RegionBlockReader::readImpl(Block & block, const ReadList & data_list, bool
189189

190190
VersionColResolver<ReadList> version_col_resolver;
191191
version_col_resolver.check(block, schema_snapshot->column_defines->size());
192+
// The column_ids to read according to schema_snapshot, each elem is (column_id, block_pos)
192193
const auto & read_column_ids = schema_snapshot->getColId2BlockPosMap();
193194
const auto & pk_column_ids = schema_snapshot->pk_column_ids;
194195
const auto & pk_pos_map = schema_snapshot->pk_pos_map;
@@ -204,12 +205,12 @@ bool RegionBlockReader::readImpl(Block & block, const ReadList & data_list, bool
204205
/// extra handle, del, version column is with column id smaller than other visible column id,
205206
/// so they must exists before all other columns, and we can get them before decoding other columns
206207
ColumnUInt8 * raw_delmark_col = nullptr;
207-
const size_t invalid_column_pos = std::numeric_limits<size_t>::max();
208+
const static size_t INVALID_COLUMN_POS = std::numeric_limits<size_t>::max();
208209
// we cannot figure out extra_handle's column type now, so we just remember it's pos here
209-
size_t extra_handle_column_pos = invalid_column_pos;
210+
size_t extra_handle_column_pos = INVALID_COLUMN_POS;
210211

211212
while (raw_delmark_col == nullptr || version_col_resolver.needBuild()
212-
|| extra_handle_column_pos == invalid_column_pos)
213+
|| extra_handle_column_pos == INVALID_COLUMN_POS)
213214
{
214215
if (column_ids_iter->first == MutSup::delmark_col_id)
215216
{
@@ -279,6 +280,8 @@ bool RegionBlockReader::readImpl(Block & block, const ReadList & data_list, bool
279280
else
280281
{
281282
// Parse column value from encoded value
283+
// Decode the column_ids from `column_ids_iter` to `read_column_ids.end()`
284+
// and insert into `block` at position starting from `next_column_pos`
282285
if (!appendRowToBlock(
283286
*value_ptr,
284287
column_ids_iter,

dbms/src/Storages/KVStore/MultiRaft/RaftCommands.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -368,7 +368,11 @@ std::pair<EngineStoreApplyRes, DM::WriteResult> Region::handleWriteRaftCmd(
368368
{
369369
auto tikv_key = TiKVKey(cmds.keys[i].data, cmds.keys[i].len);
370370
auto tikv_value = TiKVValue(cmds.vals[i].data, cmds.vals[i].len);
371-
LOG_INFO(Logger::get("dddddddddd"), "Region::handleWriteRaftCmd Put key={}, value={}", tikv_key.toDebugString(), tikv_value.toDebugString());
371+
LOG_INFO(
372+
Logger::get("dddddddddd"),
373+
"Region::handleWriteRaftCmd Put key={}, value={}",
374+
tikv_key.toDebugString(),
375+
tikv_value.toDebugString());
372376
if (cf == ColumnFamilyType::Write)
373377
{
374378
write_put_key_count++;

dbms/src/Storages/KVStore/tests/gtest_region_block_reader.cpp

Lines changed: 43 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -354,7 +354,9 @@ TEST_F(RegionBlockReaderTest, MissingColumnRowV2)
354354
encodeColumns(table_info, fields, RowEncodeVersion::RowV2);
355355
auto new_table_info = getTableInfoWithMoreColumns({MutSup::extra_handle_id}, false);
356356
auto new_decoding_schema = getDecodingStorageSchemaSnapshot(new_table_info);
357-
ASSERT_TRUE(decodeAndCheckColumns(new_decoding_schema, false));
357+
// `decodeAndCheckColumns` will return false because "column1" not_null=true but no_default_value=false
358+
// FIXME: Re check the logic here
359+
ASSERT_FALSE(decodeAndCheckColumns(new_decoding_schema, false));
358360
}
359361

360362
TEST_F(RegionBlockReaderTest, MissingColumnRowV1)
@@ -363,7 +365,9 @@ TEST_F(RegionBlockReaderTest, MissingColumnRowV1)
363365
encodeColumns(table_info, fields, RowEncodeVersion::RowV1);
364366
auto new_table_info = getTableInfoWithMoreColumns({MutSup::extra_handle_id}, false);
365367
auto new_decoding_schema = getDecodingStorageSchemaSnapshot(new_table_info);
366-
ASSERT_TRUE(decodeAndCheckColumns(new_decoding_schema, false));
368+
// `decodeAndCheckColumns` will return false because "column1" not_null=true but no_default_value=false
369+
// FIXME: Re check the logic here
370+
ASSERT_FALSE(decodeAndCheckColumns(new_decoding_schema, false));
367371
}
368372

369373
TEST_F(RegionBlockReaderTest, ExtraColumnRowV2)
@@ -696,11 +700,6 @@ CATCH
696700
TEST_F(RegionBlockReaderTest, ReadFromRegionDefaultValue)
697701
try
698702
{
699-
// With this table_info, c1 can be decoded to NULL value
700-
// TableInfo table_info(
701-
// R"({"cols":[{"id":1,"name":{"L":"c0","O":"c0"},"offset":0,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":4,"Tp":1}},{"id":2,"name":{"L":"handle","O":"handle"},"offset":1,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":515,"Flen":11,"Tp":3}},{"id":7,"name":{"L":"c1","O":"c1"},"offset":2,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":20,"Tp":8}},{"id":4,"name":{"L":"c2","O":"c2"},"offset":3,"origin_default":"0.07954397","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":-1,"Flag":4097,"Flen":12,"Tp":4}},{"id":5,"name":{"L":"c5","O":"c5"},"offset":4,"origin_default":"0","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":10,"Tp":246}},{"default":"247262911","id":6,"name":{"L":"c4","O":"c4"},"offset":5,"origin_default":"247262911","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":10,"Tp":246}}],"id":681,"index_info":[],"is_common_handle":false,"keyspace_id":4294967295,"name":{"L":"t0","O":"t0"},"pk_is_handle":true,"state":5,"tiflash_replica":{"Available":true,"Count":1},"update_timestamp":463844343842340870})",
702-
// NullspaceID);
703-
704703
// With this table_info, c1 is filled with "0" according to ori_default
705704
TableInfo table_info(
706705
R"({"cols":[{"id":1,"name":{"L":"c0","O":"c0"},"offset":0,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":4,"Tp":1}},{"id":2,"name":{"L":"handle","O":"handle"},"offset":1,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":515,"Flen":11,"Tp":3}},{"default":"-56083770","id":7,"name":{"L":"c1","O":"c1"},"offset":2,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":1,"Flen":20,"Tp":8}},{"id":4,"name":{"L":"c2","O":"c2"},"offset":3,"origin_default":"0.07954397","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":-1,"Flag":4097,"Flen":12,"Tp":4}},{"id":5,"name":{"L":"c5","O":"c5"},"offset":4,"origin_default":"0","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":10,"Tp":246}},{"default":"247262911","id":6,"name":{"L":"c4","O":"c4"},"offset":5,"origin_default":"247262911","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":10,"Tp":246}}],"id":711,"index_info":[],"is_common_handle":false,"keyspace_id":4294967295,"name":{"L":"t0","O":"t0"},"pk_is_handle":true,"state":5,"tiflash_replica":{"Available":true,"Count":1},"update_timestamp":463845180343844895})",
@@ -713,16 +712,16 @@ try
713712
auto region = RegionBench::makeRegionForRange(region_id, region_start_key, region_end_key);
714713
// the hex kv dump from SSTFile
715714
std::vector<std::tuple<std::string_view, std::string_view>> kvs = {
716-
{
717-
"7480000000000002FFA95F728000000000FF0000010000000000FAF9901806DEF7FFDA",
718-
"50A380A08892FFF9B706762C8000040000000405060708000F0016001A00BFDC4011A00000000A0080000000000A008000003CA339"
719-
"1ABC85",
720-
},
721-
{
722-
"7480000000000002FFA95F728000000000FF0000010000000000FAF9901806DEF7FFD8",
723-
"50A680A08892FFF9B706762C8000040000000405060708000F0016001A00BFDC4011A00000000A008033E04D600A008000003CA339"
724-
"1ABC85",
725-
},
715+
// {
716+
// "7480000000000002FFA95F728000000000FF0000010000000000FAF9901806DEF7FFDA",
717+
// "50A380A08892FFF9B706762C8000040000000405060708000F0016001A00BFDC4011A00000000A0080000000000A008000003CA339"
718+
// "1ABC85",
719+
// },
720+
// {
721+
// "7480000000000002FFA95F728000000000FF0000010000000000FAF9901806DEF7FFD8",
722+
// "50A680A08892FFF9B706762C8000040000000405060708000F0016001A00BFDC4011A00000000A008033E04D600A008000003CA339"
723+
// "1ABC85",
724+
// },
726725
{
727726
"7480000000000002FFA95F728000000000FF0000020000000000FAF9901806DE33FFE8",
728727
"509680B08E92FFF9B706762580000300000004050608000F001600BF720CDD400000000A0080000000010A00800000393C",
@@ -739,10 +738,35 @@ try
739738
auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info);
740739
{
741740
// force_decode=false can not decode because there are
742-
// missing value for column with primary key flag.
741+
// missing value for column with not null flag.
742+
auto reader = RegionBlockReader(decoding_schema);
743+
Block res_block = createBlockSortByColumnID(decoding_schema);
744+
ASSERT_FALSE(reader.read(res_block, *data_list_read, false));
745+
}
746+
{
747+
// force_decode=true can decode the block, and filling the default value for c1
748+
auto reader = RegionBlockReader(decoding_schema);
749+
Block res_block = createBlockSortByColumnID(decoding_schema);
750+
ASSERT_TRUE(reader.read(res_block, *data_list_read, true));
751+
// TODO: verify the default value is filled correctly
752+
LOG_INFO(
753+
Logger::get(),
754+
"Decoded block:\n{}",
755+
DB::tests::getColumnsContent(res_block.getColumnsWithTypeAndName()));
756+
}
757+
758+
// With this table_info, c1 does not have the "not null" flag
759+
TableInfo table_info_c1_nullable(
760+
R"({"cols":[{"id":1,"name":{"L":"c0","O":"c0"},"offset":0,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":4,"Tp":1}},{"id":2,"name":{"L":"handle","O":"handle"},"offset":1,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":515,"Flen":11,"Tp":3}},{"id":7,"name":{"L":"c1","O":"c1"},"offset":2,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":20,"Tp":8}},{"id":4,"name":{"L":"c2","O":"c2"},"offset":3,"origin_default":"0.07954397","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":-1,"Flag":4097,"Flen":12,"Tp":4}},{"id":5,"name":{"L":"c5","O":"c5"},"offset":4,"origin_default":"0","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":10,"Tp":246}},{"default":"247262911","id":6,"name":{"L":"c4","O":"c4"},"offset":5,"origin_default":"247262911","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":10,"Tp":246}}],"id":681,"index_info":[],"is_common_handle":false,"keyspace_id":4294967295,"name":{"L":"t0","O":"t0"},"pk_is_handle":true,"state":5,"tiflash_replica":{"Available":true,"Count":1},"update_timestamp":463844343842340870})",
761+
NullspaceID);
762+
763+
decoding_schema = getDecodingStorageSchemaSnapshot(table_info_c1_nullable);
764+
{
765+
// force_decode=false should be able to decode because c1 is nullable
743766
auto reader = RegionBlockReader(decoding_schema);
744767
Block res_block = createBlockSortByColumnID(decoding_schema);
745-
EXPECT_TRUE(reader.read(res_block, *data_list_read, false));
768+
ASSERT_TRUE(reader.read(res_block, *data_list_read, false));
769+
// TODO: verify the default value is filled correctly
746770
LOG_INFO(
747771
Logger::get(),
748772
"Decoded block:\n{}",

dbms/src/TiDB/Decode/RowCodec.cpp

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -414,10 +414,15 @@ inline bool addDefaultValueToColumnIfPossible(
414414
// fallthrough to fill default value when force_decode
415415
}
416416

417-
if (column_info.hasNoDefaultValueFlag() && column_info.hasNotNullFlag())
417+
if (column_info.hasNotNullFlag())
418418
{
419419
if (!force_decode)
420+
{
421+
// This is a Column that does not have encoded datum in the value, but it is defined as NOT NULL.
422+
// It could be a row encoded by newer schema after turning `NOT NULL` to `NULLABLE`.
423+
// Return false to trigger schema sync when `force_decode==false`.
420424
return false;
425+
}
421426
// Else the row does not contain this "not null" / "no default value" column,
422427
// it could be a row encoded by older schema.
423428
// fallthrough to fill default value when force_decode
@@ -468,7 +473,9 @@ bool appendRowV2ToBlockImpl(
468473
null_column_ids);
469474

470475
size_t values_start_pos = cursor;
476+
// how many not null columns have been processed
471477
size_t idx_not_null = 0;
478+
// how many null columns have been processed
472479
size_t idx_null = 0;
473480
// Merge ordered not null/null columns to keep order.
474481
while (idx_not_null < not_null_column_ids.size() || idx_null < null_column_ids.size())
@@ -487,22 +494,29 @@ bool appendRowV2ToBlockImpl(
487494

488495
auto next_datum_column_id = is_null ? null_column_ids[idx_null] : not_null_column_ids[idx_not_null];
489496
const auto next_column_id = column_ids_iter->first;
497+
LOG_INFO(
498+
Logger::get("dddddddddd"),
499+
"next_column_id={} next_datum_column_id={} force_decode={}",
500+
next_column_id,
501+
next_datum_column_id,
502+
force_decode);
490503
if (next_column_id > next_datum_column_id)
491504
{
492-
// The next column id to read is bigger than the column id of next datum in encoded row.
505+
// The next_column_id to read is bigger than the next_datum_column_id in encoded row.
493506
// It means this is the datum of extra column. May happen when reading after dropping
494507
// a column.
508+
// For `force_decode == false`, we should return false to let upper layer trigger schema sync.
495509
if (!force_decode)
496510
return false;
497-
// Ignore the extra column and continue to parse other datum
511+
// For `force_decode == true`, we just skip this extra column and continue to parse other datum.
498512
if (is_null)
499513
idx_null++;
500514
else
501515
idx_not_null++;
502516
}
503517
else if (next_column_id < next_datum_column_id)
504518
{
505-
// The next column id to read is less than the column id of next datum in encoded row.
519+
// The next_column_id to read is less than the next_datum_column_id in encoded row.
506520
// It means this is the datum of missing column. May happen when reading after adding
507521
// a column.
508522
// Fill with default value and continue to read data for next column id.
@@ -511,21 +525,24 @@ bool appendRowV2ToBlockImpl(
511525
Logger::get("dddddddddd"),
512526
"appendRowV2ToBlockImpl: fill default value for missing column,"
513527
" next_column_id={} next_datum_column_id={} block_column_pos={}"
514-
" column_info={{name={} id={} not_null={} default_value={}}}",
528+
" column_info={{name={} id={} not_null={} no_default_val={} default_value={}}}",
515529
next_column_id,
516530
next_datum_column_id,
517531
block_column_pos,
518532
column_info.name,
519533
column_info.id,
520534
column_info.hasNotNullFlag(),
535+
column_info.hasNoDefaultValueFlag(),
521536
applyVisitor(FieldVisitorToString(), column_info.defaultValueToField()));
522537
if (!addDefaultValueToColumnIfPossible(
523538
column_info,
524539
block,
525540
block_column_pos,
526541
ignore_pk_if_absent,
527542
force_decode))
543+
{
528544
return false;
545+
}
529546
column_ids_iter++;
530547
block_column_pos++;
531548
}
@@ -590,28 +607,34 @@ bool appendRowV2ToBlockImpl(
590607
block_column_pos++;
591608
}
592609
}
610+
// There are more columns to read other than the datum encoded in the row.
593611
while (column_ids_iter != column_ids_iter_end)
594612
{
613+
// Skip if the column is the same as `pk_handle_id`. The value of column
614+
// `pk_handle_id` will be filled in upper layer but not in this function.
595615
if (column_ids_iter->first != pk_handle_id)
596616
{
597617
const auto & column_info = column_infos[column_ids_iter->second];
598618
LOG_INFO(
599619
Logger::get("dddddddddd"),
600620
"appendRowV2ToBlockImpl: fill default value for missing column,"
601621
" block_column_pos={}"
602-
" column_info={{name={} id={} not_null={} default_value={}}}",
622+
" column_info={{name={} id={} not_null={} no_default_val={} default_value={}}}",
603623
block_column_pos,
604624
column_info.name,
605625
column_info.id,
606626
column_info.hasNotNullFlag(),
627+
column_info.hasNoDefaultValueFlag(),
607628
applyVisitor(FieldVisitorToString(), column_info.defaultValueToField()));
608629
if (!addDefaultValueToColumnIfPossible(
609630
column_info,
610631
block,
611632
block_column_pos,
612633
ignore_pk_if_absent,
613634
force_decode))
635+
{
614636
return false;
637+
}
615638
}
616639
column_ids_iter++;
617640
block_column_pos++;

0 commit comments

Comments
 (0)