Skip to content

Commit d854139

Browse files
authored
branch-3.1: [fix](inverted index) catch IO exception to avoid coredump in inverted index string reader #51844 (#52867)
cherry pick from #51844
1 parent 64fef99 commit d854139

File tree

2 files changed

+203
-38
lines changed

2 files changed

+203
-38
lines changed

be/src/olap/rowset/segment_v2/inverted_index_reader.cpp

Lines changed: 37 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -364,32 +364,31 @@ Status StringTypeInvertedIndexReader::query(const io::IOContext* io_ctx,
364364
std::string search_str(search_query->data, act_len);
365365
VLOG_DEBUG << "begin to query the inverted index from clucene"
366366
<< ", column_name: " << column_name << ", search_str: " << search_str;
367+
try {
368+
auto index_file_key = _inverted_index_file_reader->get_index_file_cache_key(&_index_meta);
369+
// try to get query bitmap result from cache and return immediately on cache hit
370+
InvertedIndexQueryCache::CacheKey cache_key {index_file_key, column_name, query_type,
371+
search_str};
372+
auto* cache = InvertedIndexQueryCache::instance();
373+
InvertedIndexQueryCacheHandle cache_handler;
374+
auto cache_status = handle_query_cache(cache, cache_key, &cache_handler, stats, bit_map);
375+
if (cache_status.ok()) {
376+
return Status::OK();
377+
}
367378

368-
auto index_file_key = _inverted_index_file_reader->get_index_file_cache_key(&_index_meta);
369-
// try to get query bitmap result from cache and return immediately on cache hit
370-
InvertedIndexQueryCache::CacheKey cache_key {index_file_key, column_name, query_type,
371-
search_str};
372-
auto* cache = InvertedIndexQueryCache::instance();
373-
InvertedIndexQueryCacheHandle cache_handler;
374-
auto cache_status = handle_query_cache(cache, cache_key, &cache_handler, stats, bit_map);
375-
if (cache_status.ok()) {
376-
return Status::OK();
377-
}
378-
379-
std::wstring column_name_ws = StringUtil::string_to_wstring(column_name);
379+
std::wstring column_name_ws = StringUtil::string_to_wstring(column_name);
380380

381-
InvertedIndexQueryInfo query_info;
382-
query_info.field_name = column_name_ws;
383-
query_info.term_infos.emplace_back(search_str, 0);
381+
InvertedIndexQueryInfo query_info;
382+
query_info.field_name = column_name_ws;
383+
query_info.term_infos.emplace_back(search_str, 0);
384384

385-
auto result = std::make_shared<roaring::Roaring>();
386-
FulltextIndexSearcherPtr* searcher_ptr = nullptr;
387-
InvertedIndexCacheHandle inverted_index_cache_handle;
388-
RETURN_IF_ERROR(handle_searcher_cache(&inverted_index_cache_handle, io_ctx, stats));
389-
auto searcher_variant = inverted_index_cache_handle.get_index_searcher();
390-
searcher_ptr = std::get_if<FulltextIndexSearcherPtr>(&searcher_variant);
391-
if (searcher_ptr != nullptr) {
392-
try {
385+
auto result = std::make_shared<roaring::Roaring>();
386+
FulltextIndexSearcherPtr* searcher_ptr = nullptr;
387+
InvertedIndexCacheHandle inverted_index_cache_handle;
388+
RETURN_IF_ERROR(handle_searcher_cache(&inverted_index_cache_handle, io_ctx, stats));
389+
auto searcher_variant = inverted_index_cache_handle.get_index_searcher();
390+
searcher_ptr = std::get_if<FulltextIndexSearcherPtr>(&searcher_variant);
391+
if (searcher_ptr != nullptr) {
393392
switch (query_type) {
394393
case InvertedIndexQueryType::MATCH_ANY_QUERY:
395394
case InvertedIndexQueryType::MATCH_ALL_QUERY:
@@ -441,27 +440,28 @@ Status StringTypeInvertedIndexReader::query(const io::IOContext* io_ctx,
441440
return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>(
442441
"invalid query type when query untokenized inverted index");
443442
}
444-
} catch (const CLuceneError& e) {
445-
if (is_range_query(query_type) && e.number() == CL_ERR_TooManyClauses) {
446-
return Status::Error<ErrorCode::INVERTED_INDEX_BYPASS>(
447-
"range query term exceeds limits, try to downgrade from inverted index, "
448-
"column "
449-
"name:{}, search_str:{}",
450-
column_name, search_str);
451-
} else {
452-
return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
453-
"CLuceneError occured, error msg: {}, column name: {}, search_str: {}",
454-
e.what(), column_name, search_str);
455-
}
456443
}
457-
458444
// add to cache
459445
result->runOptimize();
460446
cache->insert(cache_key, result, &cache_handler);
461447

462448
bit_map = result;
449+
return Status::OK();
450+
} catch (const CLuceneError& e) {
451+
if (is_range_query(query_type) && e.number() == CL_ERR_TooManyClauses) {
452+
return Status::Error<ErrorCode::INVERTED_INDEX_BYPASS>(
453+
"range query term exceeds limits, try to downgrade from inverted index, "
454+
"column "
455+
"name:{}, search_str:{}",
456+
column_name, search_str);
457+
} else {
458+
LOG(ERROR) << "CLuceneError occurred, error msg: " << e.what()
459+
<< ", column name: " << column_name << ", search_str: " << search_str;
460+
return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
461+
"CLuceneError occurred, error msg: {}, column name: {}, search_str: {}",
462+
e.what(), column_name, search_str);
463+
}
463464
}
464-
return Status::OK();
465465
}
466466

467467
InvertedIndexReaderType StringTypeInvertedIndexReader::type() {

be/test/olap/rowset/segment_v2/inverted_index_reader_test.cpp

Lines changed: 166 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1531,6 +1531,161 @@ class InvertedIndexReaderTest : public testing::Test {
15311531
}
15321532
}
15331533

1534+
class MockStringTypeInvertedIndexReader final : public StringTypeInvertedIndexReader {
1535+
public:
1536+
static std::shared_ptr<MockStringTypeInvertedIndexReader> create_shared(
1537+
const TabletIndex* idx_meta,
1538+
std::shared_ptr<InvertedIndexFileReader>& file_reader) {
1539+
return std::shared_ptr<MockStringTypeInvertedIndexReader>(
1540+
new MockStringTypeInvertedIndexReader(idx_meta, file_reader));
1541+
}
1542+
1543+
protected:
1544+
Status handle_searcher_cache(InvertedIndexCacheHandle*, const io::IOContext*,
1545+
OlapReaderStatistics*) override {
1546+
CLuceneError err;
1547+
err.set(CL_ERR_IO, "mock handle_searcher_cache failure");
1548+
throw err;
1549+
}
1550+
1551+
private:
1552+
MockStringTypeInvertedIndexReader(const TabletIndex* idx_meta,
1553+
std::shared_ptr<InvertedIndexFileReader>& file_reader)
1554+
: StringTypeInvertedIndexReader(idx_meta, file_reader) {}
1555+
};
1556+
1557+
// Mock class for testing tokenized index query exceptions
1558+
class MockTokenizedStringTypeInvertedIndexReader final : public FullTextIndexReader {
1559+
public:
1560+
static std::shared_ptr<MockTokenizedStringTypeInvertedIndexReader> create_shared(
1561+
const TabletIndex* idx_meta,
1562+
std::shared_ptr<InvertedIndexFileReader>& file_reader) {
1563+
return std::shared_ptr<MockTokenizedStringTypeInvertedIndexReader>(
1564+
new MockTokenizedStringTypeInvertedIndexReader(idx_meta, file_reader));
1565+
}
1566+
1567+
protected:
1568+
Status handle_searcher_cache(InvertedIndexCacheHandle*, const io::IOContext*,
1569+
OlapReaderStatistics*) override {
1570+
CLuceneError err;
1571+
err.set(CL_ERR_IO, "mock tokenized index searcher cache failure");
1572+
throw err;
1573+
}
1574+
1575+
private:
1576+
MockTokenizedStringTypeInvertedIndexReader(
1577+
const TabletIndex* idx_meta, std::shared_ptr<InvertedIndexFileReader>& file_reader)
1578+
: FullTextIndexReader(idx_meta, file_reader) {}
1579+
};
1580+
1581+
void test_cache_error_scenarios() {
1582+
std::string_view rowset_id = "test_handle_searcher_cache_exception";
1583+
int seg_id = 0;
1584+
std::vector<Slice> values = {Slice("apple"), Slice("banana")};
1585+
1586+
TabletIndex idx_meta;
1587+
{
1588+
auto index_meta_pb = std::make_unique<TabletIndexPB>();
1589+
index_meta_pb->set_index_type(IndexType::INVERTED);
1590+
index_meta_pb->set_index_id(1);
1591+
index_meta_pb->set_index_name("test_mock_cache");
1592+
index_meta_pb->add_col_unique_id(1); // c2
1593+
idx_meta.init_from_pb(*index_meta_pb);
1594+
}
1595+
1596+
std::string index_path_prefix;
1597+
prepare_string_index(rowset_id, seg_id, values, &idx_meta, &index_path_prefix);
1598+
1599+
auto file_reader = std::make_shared<InvertedIndexFileReader>(
1600+
io::global_local_filesystem(), index_path_prefix, InvertedIndexStorageFormatPB::V2);
1601+
ASSERT_TRUE(file_reader->init().ok());
1602+
1603+
auto mock_reader = MockStringTypeInvertedIndexReader::create_shared(&idx_meta, file_reader);
1604+
ASSERT_NE(mock_reader, nullptr);
1605+
1606+
io::IOContext io_ctx;
1607+
OlapReaderStatistics stats;
1608+
RuntimeState runtime_state;
1609+
TQueryOptions opts;
1610+
runtime_state.set_query_options(opts);
1611+
1612+
std::shared_ptr<roaring::Roaring> bitmap = std::make_shared<roaring::Roaring>();
1613+
std::string field_name = "1"; // c2 unique_id
1614+
StringRef query_val(values[0].data, values[0].size);
1615+
1616+
Status st = mock_reader->query(&io_ctx, &stats, &runtime_state, field_name, &query_val,
1617+
InvertedIndexQueryType::EQUAL_QUERY, bitmap);
1618+
1619+
EXPECT_FALSE(st.ok());
1620+
EXPECT_EQ(st.code(), ErrorCode::INVERTED_INDEX_CLUCENE_ERROR);
1621+
}
1622+
1623+
void test_tokenized_index_query_error_scenarios() {
1624+
std::string_view rowset_id = "test_tokenized_index_query_exception";
1625+
int seg_id = 0;
1626+
std::vector<Slice> values = {Slice("Hello world this is a test"),
1627+
Slice("Apache Doris is a modern analytics database"),
1628+
Slice("Inverted index provides fast text search")};
1629+
1630+
TabletIndex idx_meta;
1631+
{
1632+
auto index_meta_pb = std::make_unique<TabletIndexPB>();
1633+
index_meta_pb->set_index_type(IndexType::INVERTED);
1634+
index_meta_pb->set_index_id(2);
1635+
index_meta_pb->set_index_name("test_tokenized_mock_cache");
1636+
index_meta_pb->add_col_unique_id(1); // c2
1637+
1638+
// Set tokenized index properties
1639+
auto* properties = index_meta_pb->mutable_properties();
1640+
(*properties)[INVERTED_INDEX_PARSER_KEY] = INVERTED_INDEX_PARSER_ENGLISH;
1641+
(*properties)[INVERTED_INDEX_PARSER_PHRASE_SUPPORT_KEY] =
1642+
INVERTED_INDEX_PARSER_PHRASE_SUPPORT_YES;
1643+
(*properties)[INVERTED_INDEX_PARSER_LOWERCASE_KEY] = INVERTED_INDEX_PARSER_TRUE;
1644+
1645+
idx_meta.init_from_pb(*index_meta_pb);
1646+
}
1647+
1648+
std::string index_path_prefix;
1649+
prepare_string_index(rowset_id, seg_id, values, &idx_meta, &index_path_prefix);
1650+
1651+
auto file_reader = std::make_shared<InvertedIndexFileReader>(
1652+
io::global_local_filesystem(), index_path_prefix, InvertedIndexStorageFormatPB::V2);
1653+
ASSERT_TRUE(file_reader->init().ok());
1654+
1655+
auto mock_reader =
1656+
MockTokenizedStringTypeInvertedIndexReader::create_shared(&idx_meta, file_reader);
1657+
ASSERT_NE(mock_reader, nullptr);
1658+
1659+
io::IOContext io_ctx;
1660+
OlapReaderStatistics stats;
1661+
RuntimeState runtime_state;
1662+
TQueryOptions opts;
1663+
runtime_state.set_query_options(opts);
1664+
1665+
std::shared_ptr<roaring::Roaring> bitmap = std::make_shared<roaring::Roaring>();
1666+
std::string field_name = "1"; // c2 unique_id
1667+
1668+
// Test tokenized query with "world" which should be found in "Hello world this is a test"
1669+
std::string query_term = "world";
1670+
StringRef query_val(query_term.data(), query_term.size());
1671+
1672+
Status st = mock_reader->query(&io_ctx, &stats, &runtime_state, field_name, &query_val,
1673+
InvertedIndexQueryType::MATCH_ANY_QUERY, bitmap);
1674+
1675+
EXPECT_FALSE(st.ok());
1676+
EXPECT_EQ(st.code(), ErrorCode::INVERTED_INDEX_CLUCENE_ERROR);
1677+
1678+
// Test phrase query
1679+
std::string phrase_query = "Apache Doris";
1680+
StringRef phrase_query_val(phrase_query.data(), phrase_query.size());
1681+
1682+
st = mock_reader->query(&io_ctx, &stats, &runtime_state, field_name, &phrase_query_val,
1683+
InvertedIndexQueryType::MATCH_PHRASE_QUERY, bitmap);
1684+
1685+
EXPECT_FALSE(st.ok());
1686+
EXPECT_EQ(st.code(), ErrorCode::INVERTED_INDEX_CLUCENE_ERROR);
1687+
}
1688+
15341689
private:
15351690
std::unique_ptr<InvertedIndexSearcherCache> _inverted_index_searcher_cache;
15361691
std::unique_ptr<InvertedIndexQueryCache> _inverted_index_query_cache;
@@ -1561,4 +1716,14 @@ TEST_F(InvertedIndexReaderTest, CompatibleTest) {
15611716
test_compatible_read_cross_platform();
15621717
}
15631718

1564-
} // namespace doris::segment_v2
1719+
// Test cache error scenarios that could crash BE
1720+
TEST_F(InvertedIndexReaderTest, CacheErrorScenarios) {
1721+
test_cache_error_scenarios();
1722+
}
1723+
1724+
// Test tokenized index query error scenarios
1725+
TEST_F(InvertedIndexReaderTest, TokenizedIndexQueryErrorScenarios) {
1726+
test_tokenized_index_query_error_scenarios();
1727+
}
1728+
1729+
} // namespace doris::segment_v2

0 commit comments

Comments
 (0)