Skip to content

Commit 62de9af

Browse files
ddl: Fix default value filling with finer granularity (#10682) (#10691)
close #10680 ddl: Fix default value filling with finer granularity - Tightens addDefaultValueToColumnIfPossible - For NOT NULL missing columns, force_decode=false now returns false unless there is an origin default. - This forces a schema sync instead of silently filling 0. - force_decode=true still fills a default value (best-effort). Signed-off-by: JaySon-Huang <tshent@qq.com> Co-authored-by: JaySon-Huang <tshent@qq.com>
1 parent 7451e2c commit 62de9af

File tree

9 files changed

+420
-22
lines changed

9 files changed

+420
-22
lines changed

dbms/src/Storages/KVStore/Decode/RegionBlockReader.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@ bool RegionBlockReader::readImpl(Block & block, const ReadList & data_list, bool
179179
{
180180
VersionColResolver<ReadList> version_col_resolver;
181181
version_col_resolver.check(block, schema_snapshot->column_defines->size());
182+
// The column_ids to read according to schema_snapshot, each elem is (column_id, block_pos)
182183
const auto & read_column_ids = schema_snapshot->getColId2BlockPosMap();
183184
const auto & pk_column_ids = schema_snapshot->pk_column_ids;
184185
const auto & pk_pos_map = schema_snapshot->pk_pos_map;
@@ -269,6 +270,8 @@ bool RegionBlockReader::readImpl(Block & block, const ReadList & data_list, bool
269270
else
270271
{
271272
// Parse column value from encoded value
273+
// Decode the column_ids from `column_ids_iter` to `read_column_ids.end()`
274+
// and insert into `block` at position starting from `next_column_pos`
272275
if (!appendRowToBlock(
273276
*value_ptr,
274277
column_ids_iter,
@@ -299,12 +302,12 @@ bool RegionBlockReader::readImpl(Block & block, const ReadList & data_list, bool
299302
if constexpr (pk_type == TMTPKType::INT64)
300303
static_cast<ColumnInt64 *>(raw_pk_column)->getData().push_back(handle_value);
301304
else if constexpr (pk_type == TMTPKType::UINT64)
302-
static_cast<ColumnUInt64 *>(raw_pk_column)->getData().push_back(UInt64(handle_value));
305+
static_cast<ColumnUInt64 *>(raw_pk_column)->getData().push_back(static_cast<UInt64>(handle_value));
303306
else
304307
{
305308
// The pk_type must be Int32/UInt32 or more narrow type
306309
// so cannot tell its' exact type here, just use `insert(Field)`
307-
raw_pk_column->insert(Field(handle_value));
310+
raw_pk_column->insert(Field{handle_value});
308311
if (unlikely(raw_pk_column->getInt(index) != handle_value))
309312
{
310313
if (!force_decode)

dbms/src/Storages/KVStore/tests/gtest_region_block_reader.cpp

Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -691,5 +691,178 @@ try
691691
}
692692
CATCH
693693

694+
TEST_F(RegionBlockReaderTest, ReadFromRegionDefaultValue)
695+
try
696+
{
697+
// With this table_info, column "c1" is "NOT NULL" and has no origin default
698+
TableInfo table_info_c1_not_null_no_origin_default(
699+
R"({"cols":[{"id":1,"name":{"L":"c0","O":"c0"},"offset":0,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":4,"Tp":1}},{"id":2,"name":{"L":"handle","O":"handle"},"offset":1,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":515,"Flen":11,"Tp":3}},{"default":"-56083770","id":7,"name":{"L":"c1","O":"c1"},"offset":2,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":1,"Flen":20,"Tp":8}},{"id":4,"name":{"L":"c2","O":"c2"},"offset":3,"origin_default":"0.07954397","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":-1,"Flag":4097,"Flen":12,"Tp":4}},{"id":5,"name":{"L":"c5","O":"c5"},"offset":4,"origin_default":"0","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":10,"Tp":246}},{"default":"247262911","id":6,"name":{"L":"c4","O":"c4"},"offset":5,"origin_default":"247262911","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":10,"Tp":246}}],"id":636,"index_info":[],"is_common_handle":false,"keyspace_id":4294967295,"name":{"L":"t0","O":"t0"},"pk_is_handle":true,"state":5,"tiflash_replica":{"Available":true,"Count":1},"update_timestamp":463845180343844895})",
700+
NullspaceID);
701+
702+
// With this table_info, column "c1" is "NOT NULL" and has no default value
703+
TableInfo table_info_c1_not_null_no_default_value(
704+
R"({"cols":[{"id":1,"name":{"L":"c0","O":"c0"},"offset":0,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":4,"Tp":1}},{"id":2,"name":{"L":"handle","O":"handle"},"offset":1,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":515,"Flen":11,"Tp":3}},{"id":7,"name":{"L":"c1","O":"c1"},"offset":2,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":4097,"Flen":20,"Tp":8}},{"id":4,"name":{"L":"c2","O":"c2"},"offset":3,"origin_default":"0.07954397","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":-1,"Flag":4097,"Flen":12,"Tp":4}},{"id":5,"name":{"L":"c5","O":"c5"},"offset":4,"origin_default":"0","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":10,"Tp":246}},{"default":"247262911","id":6,"name":{"L":"c4","O":"c4"},"offset":5,"origin_default":"247262911","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":10,"Tp":246}}],"id":636,"index_info":[],"is_common_handle":false,"keyspace_id":4294967295,"name":{"L":"t0","O":"t0"},"pk_is_handle":true,"state":5,"tiflash_replica":{"Available":true,"Count":1},"update_timestamp":463845180343844895})",
705+
NullspaceID);
706+
707+
// With this table_info, column "c1" has the "NOT NULL" flag and has origin default "-56083770"
708+
TableInfo table_info_c1_not_null_with_origin_default(
709+
R"({"cols":[{"id":1,"name":{"L":"c0","O":"c0"},"offset":0,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":4,"Tp":1}},{"id":2,"name":{"L":"handle","O":"handle"},"offset":1,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":515,"Flen":11,"Tp":3}},{"origin_default":"-56083770","id":7,"name":{"L":"c1","O":"c1"},"offset":2,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":1,"Flen":20,"Tp":8}},{"id":4,"name":{"L":"c2","O":"c2"},"offset":3,"origin_default":"0.07954397","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":-1,"Flag":4097,"Flen":12,"Tp":4}},{"id":5,"name":{"L":"c5","O":"c5"},"offset":4,"origin_default":"0","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":10,"Tp":246}},{"default":"247262911","id":6,"name":{"L":"c4","O":"c4"},"offset":5,"origin_default":"247262911","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":10,"Tp":246}}],"id":636,"index_info":[],"is_common_handle":false,"keyspace_id":4294967295,"name":{"L":"t0","O":"t0"},"pk_is_handle":true,"state":5,"tiflash_replica":{"Available":true,"Count":1},"update_timestamp":463845180343844895})",
710+
NullspaceID);
711+
712+
// With this table_info, column "c1" has the "NOT NULL" flag and has origin default "-56083770", but the state is not public
713+
TableInfo table_info_c1_not_null_with_origin_default_non_public(
714+
R"({"cols":[{"id":1,"name":{"L":"c0","O":"c0"},"offset":0,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":4,"Tp":1}},{"id":2,"name":{"L":"handle","O":"handle"},"offset":1,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":515,"Flen":11,"Tp":3}},{"origin_default":"-56083770","id":7,"name":{"L":"c1","O":"c1"},"offset":2,"state":3,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":1,"Flen":20,"Tp":8}},{"id":4,"name":{"L":"c2","O":"c2"},"offset":3,"origin_default":"0.07954397","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":-1,"Flag":4097,"Flen":12,"Tp":4}},{"id":5,"name":{"L":"c5","O":"c5"},"offset":4,"origin_default":"0","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":10,"Tp":246}},{"default":"247262911","id":6,"name":{"L":"c4","O":"c4"},"offset":5,"origin_default":"247262911","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":10,"Tp":246}}],"id":636,"index_info":[],"is_common_handle":false,"keyspace_id":4294967295,"name":{"L":"t0","O":"t0"},"pk_is_handle":true,"state":5,"tiflash_replica":{"Available":true,"Count":1},"update_timestamp":463845180343844895})",
715+
NullspaceID);
716+
717+
// With this table_info, column "c1" does not have the "NOT NULL" flag
718+
TableInfo table_info_c1_nullable(
719+
R"({"cols":[{"id":1,"name":{"L":"c0","O":"c0"},"offset":0,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":4,"Tp":1}},{"id":2,"name":{"L":"handle","O":"handle"},"offset":1,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":515,"Flen":11,"Tp":3}},{"id":7,"name":{"L":"c1","O":"c1"},"offset":2,"state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":20,"Tp":8}},{"id":4,"name":{"L":"c2","O":"c2"},"offset":3,"origin_default":"0.07954397","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":-1,"Flag":4097,"Flen":12,"Tp":4}},{"id":5,"name":{"L":"c5","O":"c5"},"offset":4,"origin_default":"0","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":10,"Tp":246}},{"default":"247262911","id":6,"name":{"L":"c4","O":"c4"},"offset":5,"origin_default":"247262911","state":5,"type":{"Charset":"binary","Collate":"binary","Decimal":0,"Flag":0,"Flen":10,"Tp":246}}],"id":636,"index_info":[],"is_common_handle":false,"keyspace_id":4294967295,"name":{"L":"t0","O":"t0"},"pk_is_handle":true,"state":5,"tiflash_replica":{"Available":true,"Count":1},"update_timestamp":463844343842340870})",
720+
NullspaceID);
721+
722+
RegionID region_id = 4;
723+
// the start_key and end_key for table_id = 68
724+
String region_start_key(bytesFromHexString("7480000000000002FF7C5F720000000000FA"));
725+
String region_end_key(bytesFromHexString("7480000000000002FF7D00000000000000F8"));
726+
auto region = makeRegion(region_id, region_start_key, region_end_key);
727+
// the hex kv dump from RaftLog
728+
std::vector<std::tuple<std::string_view, std::string_view>> kvs = {
729+
{
730+
"7480000000000002FFA95F728000000000FF0000010000000000FAF9901806DEF7FFDA",
731+
"50A380A08892FFF9B706762C8000040000000405060708000F0016001A00BFDC4011A00000000A0080000000000A008000003CA339"
732+
"1ABC85",
733+
},
734+
{
735+
"7480000000000002FFA95F728000000000FF0000010000000000FAF9901806DEF7FFD8",
736+
"50A680A08892FFF9B706762C8000040000000405060708000F0016001A00BFDC4011A00000000A008033E04D600A008000003CA339"
737+
"1ABC85",
738+
},
739+
{
740+
"7480000000000002FFA95F728000000000FF0000020000000000FAF9901806DE33FFE8",
741+
"509680B08E92FFF9B706762580000300000004050608000F001600BF720CDD400000000A0080000000010A00800000393C",
742+
},
743+
};
744+
for (const auto & [k, v] : kvs)
745+
{
746+
region->insert("write", TiKVKey(bytesFromHexString(k)), TiKVValue(bytesFromHexString(v)));
747+
}
748+
749+
auto data_list_read = ReadRegionCommitCache(region, true);
750+
ASSERT_TRUE(data_list_read.has_value());
751+
752+
// Test with `table_info_c1_not_null_no_origin_default`
753+
auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info_c1_not_null_no_origin_default);
754+
{
755+
// force_decode=false can not decode because there are
756+
// missing value for column with not null flag.
757+
auto reader = RegionBlockReader(decoding_schema);
758+
Block res_block = createBlockSortByColumnID(decoding_schema);
759+
ASSERT_FALSE(reader.read(res_block, *data_list_read, false));
760+
}
761+
{
762+
// force_decode=true can decode the block, and filling the "zero value" for c1
763+
auto reader = RegionBlockReader(decoding_schema);
764+
Block res_block = createBlockSortByColumnID(decoding_schema);
765+
ASSERT_TRUE(reader.read(res_block, *data_list_read, true));
766+
LOG_INFO(
767+
Logger::get(),
768+
"Decoded block:\n{}",
769+
DB::tests::getColumnsContent(res_block.getColumnsWithTypeAndName()));
770+
ASSERT_EQ(res_block.getByName("c1").type->getName(), "Int64");
771+
// verify the default value is filled correctly
772+
ASSERT_COLUMN_EQ( //
773+
res_block.getByName("c1"),
774+
createColumn<Int64>({-2051270087, -2051270087, 0}));
775+
}
776+
777+
// Test with `table_info_c1_not_null_no_default_value`
778+
decoding_schema = getDecodingStorageSchemaSnapshot(table_info_c1_not_null_no_default_value);
779+
{
780+
// force_decode=false can not decode because there are
781+
// missing value for column with not null flag.
782+
auto reader = RegionBlockReader(decoding_schema);
783+
Block res_block = createBlockSortByColumnID(decoding_schema);
784+
ASSERT_FALSE(reader.read(res_block, *data_list_read, false));
785+
}
786+
{
787+
// force_decode=true can decode the block, and filling the "zero value" for c1
788+
auto reader = RegionBlockReader(decoding_schema);
789+
Block res_block = createBlockSortByColumnID(decoding_schema);
790+
ASSERT_TRUE(reader.read(res_block, *data_list_read, true));
791+
LOG_INFO(
792+
Logger::get(),
793+
"Decoded block:\n{}",
794+
DB::tests::getColumnsContent(res_block.getColumnsWithTypeAndName()));
795+
ASSERT_EQ(res_block.getByName("c1").type->getName(), "Int64");
796+
// verify the default value is filled correctly
797+
ASSERT_COLUMN_EQ( //
798+
res_block.getByName("c1"),
799+
createColumn<Int64>({-2051270087, -2051270087, 0}));
800+
}
801+
802+
// Test with `table_info_c1_not_null_with_origin_default`
803+
decoding_schema = getDecodingStorageSchemaSnapshot(table_info_c1_not_null_with_origin_default);
804+
{
805+
// force_decode=false can decode because origin_default exists and NoDefaultValue flag is not set
806+
// so RegionBlockReader can use origin_default to fill the missing value`
807+
auto reader = RegionBlockReader(decoding_schema);
808+
Block res_block = createBlockSortByColumnID(decoding_schema);
809+
ASSERT_TRUE(reader.read(res_block, *data_list_read, false));
810+
LOG_INFO(
811+
Logger::get(),
812+
"Decoded block:\n{}",
813+
DB::tests::getColumnsContent(res_block.getColumnsWithTypeAndName()));
814+
ASSERT_EQ(res_block.getByName("c1").type->getName(), "Int64");
815+
// verify the default value is filled correctly
816+
ASSERT_COLUMN_EQ( //
817+
res_block.getByName("c1"),
818+
// the third elem is filled with origin_default
819+
createColumn<Int64>({-2051270087, -2051270087, -56083770}));
820+
}
821+
822+
// Test with `table_info_c1_not_null_with_origin_default_non_public`
823+
decoding_schema = getDecodingStorageSchemaSnapshot(table_info_c1_not_null_with_origin_default_non_public);
824+
{
825+
// force_decode=false can not decode because there are
826+
// missing value for column with not null flag and the state is not public
827+
auto reader = RegionBlockReader(decoding_schema);
828+
Block res_block = createBlockSortByColumnID(decoding_schema);
829+
ASSERT_FALSE(reader.read(res_block, *data_list_read, false));
830+
}
831+
{
832+
// force_decode=true can decode the block, and filling the default value for c1
833+
auto reader = RegionBlockReader(decoding_schema);
834+
Block res_block = createBlockSortByColumnID(decoding_schema);
835+
ASSERT_TRUE(reader.read(res_block, *data_list_read, true));
836+
LOG_INFO(
837+
Logger::get(),
838+
"Decoded block:\n{}",
839+
DB::tests::getColumnsContent(res_block.getColumnsWithTypeAndName()));
840+
ASSERT_EQ(res_block.getByName("c1").type->getName(), "Int64");
841+
// verify the default value is filled correctly
842+
ASSERT_COLUMN_EQ( //
843+
res_block.getByName("c1"),
844+
// the third elem is filled with origin_default
845+
createColumn<Int64>({-2051270087, -2051270087, -56083770}));
846+
}
847+
848+
// Test with `table_info_c1_nullable`
849+
decoding_schema = getDecodingStorageSchemaSnapshot(table_info_c1_nullable);
850+
{
851+
// force_decode=false should be able to decode because c1 is nullable
852+
auto reader = RegionBlockReader(decoding_schema);
853+
Block res_block = createBlockSortByColumnID(decoding_schema);
854+
ASSERT_TRUE(reader.read(res_block, *data_list_read, false));
855+
LOG_INFO(
856+
Logger::get(),
857+
"Decoded block:\n{}",
858+
DB::tests::getColumnsContent(res_block.getColumnsWithTypeAndName()));
859+
ASSERT_EQ(res_block.getByName("c1").type->getName(), "Nullable(Int64)");
860+
// verify the default value is filled with NULL correctly at the last row
861+
ASSERT_COLUMN_EQ( //
862+
res_block.getByName("c1"),
863+
createNullableColumn<Int64>({-2051270087, -2051270087, 0}, {0, 0, 1}));
864+
}
865+
}
866+
CATCH
694867

695868
} // namespace DB::tests

0 commit comments

Comments
 (0)