Skip to content

Commit cf13542

Browse files
raakella1Ravi Nagarjun Akella
andauthored
Fix bugs in Simple Cache (#263)
* Fix bugs in Simple Cache * use try_lock to delete the evicted entry from hashmap on a best effort basis * fix bugs in the do_evict logic * add commenst to explain the LRU cache functionality better --------- Co-authored-by: Ravi Nagarjun Akella <raakella1@$HOSTNAME>
1 parent 7db06b1 commit cf13542

File tree

8 files changed

+231
-42
lines changed

8 files changed

+231
-42
lines changed

conanfile.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
class SISLConan(ConanFile):
1111
name = "sisl"
12-
version = "12.3.3"
12+
version = "12.4.1"
1313

1414
homepage = "https://github.com/eBay/sisl"
1515
description = "Library for fast data structures, utilities"

include/sisl/cache/evictor.hpp

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,17 @@ typedef ValueEntryBase CacheRecord;
2828

2929
class Evictor {
3030
public:
31-
typedef std::function< bool(const CacheRecord&) > can_evict_cb_t;
31+
typedef std::function< bool(const CacheRecord&) > eviction_cb_t;
32+
using can_evict_cb_t = eviction_cb_t;
33+
34+
// struct to hold the eviction callbacks for each record family
35+
// can_evict_cb: called before eviction to check if the record can be evicted.
36+
// post_eviction_cb: called after eviction to do any cleanup. If this returns false, the record is reinserted.
37+
// and we try to evict the next record.
38+
struct RecordFamily {
39+
Evictor::eviction_cb_t can_evict_cb{nullptr};
40+
Evictor::eviction_cb_t post_eviction_cb{nullptr};
41+
};
3242

3343
Evictor(const int64_t max_size, const uint32_t num_partitions) :
3444
m_max_size{max_size}, m_num_partitions{num_partitions} {}
@@ -38,12 +48,12 @@ class Evictor {
3848
Evictor& operator=(Evictor&&) noexcept = delete;
3949
virtual ~Evictor() = default;
4050

41-
uint32_t register_record_family(can_evict_cb_t can_evict_cb = nullptr) {
51+
uint32_t register_record_family(RecordFamily record_family) {
4252
uint32_t id{0};
4353
std::unique_lock lk(m_reg_mtx);
44-
while (id < m_can_evict_cbs.size()) {
45-
if (m_can_evict_cbs[id].first == false) {
46-
m_can_evict_cbs[id] = std::make_pair(true, can_evict_cb);
54+
while (id < m_eviction_cbs.size()) {
55+
if (m_eviction_cbs[id].first == false) {
56+
m_eviction_cbs[id] = std::make_pair(true, record_family);
4757
return id;
4858
}
4959
++id;
@@ -54,7 +64,7 @@ class Evictor {
5464

5565
void unregister_record_family(const uint32_t record_type_id) {
5666
std::unique_lock lk(m_reg_mtx);
57-
m_can_evict_cbs[record_type_id] = std::make_pair(false, nullptr);
67+
m_eviction_cbs[record_type_id] = std::make_pair(false, RecordFamily{});
5868
}
5969

6070
virtual bool add_record(uint64_t hash_code, CacheRecord& record) = 0;
@@ -64,13 +74,14 @@ class Evictor {
6474

6575
int64_t max_size() const { return m_max_size; }
6676
uint32_t num_partitions() const { return m_num_partitions; }
67-
const can_evict_cb_t& can_evict_cb(const uint32_t record_id) const { return m_can_evict_cbs[record_id].second; }
77+
const eviction_cb_t& can_evict_cb(const uint32_t record_id) const { return m_eviction_cbs[record_id].second.can_evict_cb; }
78+
const eviction_cb_t& post_eviction_cb(const uint32_t record_id) const { return m_eviction_cbs[record_id].second.post_eviction_cb; }
6879

6980
private:
7081
int64_t m_max_size;
7182
uint32_t m_num_partitions;
7283

7384
std::mutex m_reg_mtx;
74-
std::array< std::pair< bool, can_evict_cb_t >, CacheRecord::max_record_families() > m_can_evict_cbs;
85+
std::array< std::pair< bool /*registered*/, RecordFamily >, CacheRecord::max_record_families() > m_eviction_cbs;
7586
};
7687
} // namespace sisl

include/sisl/cache/lru_evictor.hpp

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,15 @@ class LRUEvictor : public Evictor {
4949

5050
void record_resized(uint64_t hash_code, const CacheRecord& record, uint32_t old_size) override;
5151

52+
// for testing purpose
53+
int64_t filled_size() {
54+
int64_t filled_size{0};
55+
for (uint32_t i{0}; i < num_partitions(); ++i) {
56+
filled_size += m_partitions[i].filled_size();
57+
}
58+
return filled_size;
59+
}
60+
5261
private:
5362
typedef list<
5463
ValueEntryBase,
@@ -82,10 +91,16 @@ class LRUEvictor : public Evictor {
8291
void record_accessed(CacheRecord& record);
8392
void record_resized(const CacheRecord& record, uint32_t old_size);
8493

94+
// for testing purpose
95+
int64_t filled_size() {
96+
std::unique_lock guard{m_list_guard};
97+
return m_filled_size;
98+
}
99+
85100
private:
86101
bool do_evict(const uint32_t record_fid, const uint32_t needed_size);
87102
bool will_fill(const uint32_t new_size) const { return ((m_filled_size + new_size) > m_max_size); }
88-
bool is_full() const { return will_fill(0); }
103+
bool is_full() const { return (m_filled_size >= m_max_size); }
89104
};
90105

91106
private:

include/sisl/cache/range_cache.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ class RangeCache {
3939
m_map{RangeHashMap< K >(num_buckets, bind_this(RangeCache< K >::extract_value, 3),
4040
bind_this(RangeCache< K >::on_hash_operation, 4))},
4141
m_per_value_size{per_val_size} {
42-
m_evictor->register_record_family(std::move(evict_cb));
42+
m_evictor->register_record_family(Evictor::RecordFamily{.can_evict_cb = evict_cb});
4343
}
4444

4545
~RangeCache() { m_evictor->unregister_record_family(m_record_family_id); }

include/sisl/cache/simple_cache.hpp

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,19 +37,40 @@ class SimpleCache {
3737

3838
public:
3939
SimpleCache(const std::shared_ptr< Evictor >& evictor, uint32_t num_buckets, uint32_t per_val_size,
40-
key_extractor_cb_t< K, V >&& extract_cb, Evictor::can_evict_cb_t evict_cb = nullptr) :
40+
key_extractor_cb_t< K, V >&& extract_cb, Evictor::eviction_cb_t evict_cb = nullptr) :
4141
m_evictor{evictor},
4242
m_key_extract_cb{std::move(extract_cb)},
4343
m_map{num_buckets, m_key_extract_cb, std::bind(&SimpleCache< K, V >::on_hash_operation, this, _1, _2, _3)},
4444
m_per_value_size{per_val_size} {
45-
m_record_family_id = m_evictor->register_record_family(std::move(evict_cb));
45+
// Register the record family callbacks with the evictor:
46+
// - `can_evict_cb`: Provided by the user of the `SimpleCache`. This callback determines whether a record can be evicted.
47+
// - `post_eviction_cb`: Owned by the `SimpleCache`. This callback is used to remove the evicted record from the hashmap.
48+
// Note: We cannot directly call `erase` on the hashmap as it might result in deadlocks. Instead, we use `try_erase`,
49+
// which attempts to acquire the bucket lock using `try_lock`. If the lock cannot be acquired, the method returns `false`.
50+
// In such cases, we notify the evictor to skip evicting this record and try the next one.
51+
m_record_family_id = m_evictor->register_record_family(Evictor::RecordFamily{.can_evict_cb = evict_cb
52+
, .post_eviction_cb = [this](const CacheRecord& record) {
53+
V const value = reinterpret_cast<SingleEntryHashNode< V >*>(const_cast< CacheRecord* >(&record))->m_value;
54+
K key = m_key_extract_cb(value);
55+
return m_map.try_erase(key);
56+
}});
4657
}
4758

4859
~SimpleCache() { m_evictor->unregister_record_family(m_record_family_id); }
4960

5061
bool insert(const V& value) {
5162
K k = m_key_extract_cb(value);
52-
return m_map.insert(k, value);
63+
bool ret = m_map.insert(k, value);
64+
if (t_failed_keys.size()) {
65+
// There are some failures to add for some keys
66+
for (auto& key : t_failed_keys) {
67+
V dummy_v;
68+
m_map.erase(key, dummy_v);
69+
ret = false;
70+
}
71+
t_failed_keys.clear();
72+
}
73+
return ret;
5374
}
5475

5576
bool upsert(const V& value) {
@@ -101,4 +122,5 @@ class SimpleCache {
101122

102123
template < typename K, typename V >
103124
thread_local std::set< K > SimpleCache< K, V >::t_failed_keys;
125+
104126
} // namespace sisl

include/sisl/cache/simple_hashmap.hpp

Lines changed: 48 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ class SimpleHashMap {
7373
bool upsert(const K& key, const V& value);
7474
bool get(const K& input_key, V& out_val);
7575
bool erase(const K& key, V& out_val);
76+
bool try_erase(const K& key);
7677
bool update(const K& key, auto&& update_cb);
7778
bool upsert_or_delete(const K& key, auto&& update_or_delete_cb);
7879

@@ -184,27 +185,24 @@ class SimpleHashBucket {
184185
#ifndef GLOBAL_HASHSET_LOCK
185186
folly::SharedMutexWritePriority::WriteHolder holder(m_lock);
186187
#endif
187-
SingleEntryHashNode< V >* n = nullptr;
188-
189-
auto it = m_list.begin();
190-
for (auto itend{m_list.end()}; it != itend; ++it) {
191-
const K k = SimpleHashMap< K, V >::extractor_cb()(it->m_value);
192-
if (input_key > k) {
193-
break;
194-
} else if (input_key == k) {
195-
n = &*it;
196-
break;
197-
}
198-
}
188+
return erase_unsafe(input_key, out_val, true /* call_access_cb */);
189+
}
199190

200-
if (n) {
201-
access_cb(*n, input_key, hash_op_t::DELETE);
202-
out_val = n->m_value;
203-
m_list.erase(it);
204-
delete n;
205-
return true;
191+
bool try_erase(const K& input_key) {
192+
V dummy_val;
193+
#ifndef GLOBAL_HASHSET_LOCK
194+
if(m_lock.try_lock()) {
195+
bool ret = erase_unsafe(input_key, dummy_val, false /* call_access_cb */);
196+
m_lock.unlock();
197+
return ret;
198+
} else {
199+
return false;
206200
}
207-
return false;
201+
#else
202+
// We are in global hashset lock
203+
return erase_unsafe(input_key, dummy_val, false); #endif
204+
#endif
205+
return erase_unsafe(input_key, dummy_val, false);
208206
}
209207

210208
bool upsert_or_delete(const K& input_key, auto&& update_or_delete_cb) {
@@ -266,9 +264,33 @@ class SimpleHashBucket {
266264
static void access_cb(const SingleEntryHashNode< V >& node, const K& key, hash_op_t op) {
267265
SimpleHashMap< K, V >::call_access_cb((const ValueEntryBase&)node, key, op);
268266
}
267+
268+
bool erase_unsafe(const K& input_key, V& out_val, bool call_access_cb) {
269+
SingleEntryHashNode< V >* n = nullptr;
270+
271+
auto it = m_list.begin();
272+
for (auto itend{m_list.end()}; it != itend; ++it) {
273+
const K k = SimpleHashMap< K, V >::extractor_cb()(it->m_value);
274+
if (input_key > k) {
275+
break;
276+
} else if (input_key == k) {
277+
n = &*it;
278+
break;
279+
}
280+
}
281+
282+
if (n) {
283+
if (call_access_cb) { access_cb(*n, input_key, hash_op_t::DELETE); }
284+
out_val = n->m_value;
285+
m_list.erase(it);
286+
delete n;
287+
return true;
288+
}
289+
return false;
290+
}
269291
};
270292

271-
///////////////////////////////////////////// RangeHashMap Definitions ///////////////////////////////////
293+
///////////////////////////////////////////// SimpleHashMap Definitions ///////////////////////////////////
272294
template < typename K, typename V >
273295
SimpleHashMap< K, V >::SimpleHashMap(uint32_t nBuckets, const key_extractor_cb_t< K, V >& extract_cb,
274296
key_access_cb_t< K > access_cb) :
@@ -317,6 +339,12 @@ bool SimpleHashMap< K, V >::erase(const K& key, V& out_val) {
317339
return get_bucket(key).erase(key, out_val);
318340
}
319341

342+
template < typename K, typename V >
343+
bool SimpleHashMap< K, V >::try_erase(const K& key) {
344+
set_current_instance(this);
345+
return get_bucket(key).try_erase(key);
346+
}
347+
320348
/// This is a special atomic operation where user can insert_or_update_or_erase based on condition atomically. It
321349
/// performs differently based on certain conditions.
322350
///

src/cache/lru_evictor.cpp

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ void LRUEvictor::record_resized(uint64_t hash_code, const CacheRecord &record,
4646

4747
bool LRUEvictor::LRUPartition::add_record(CacheRecord &record) {
4848
std::unique_lock guard{m_list_guard};
49-
if (will_fill(record.size()) > m_max_size) {
49+
if (will_fill(record.size())) {
5050
if (!do_evict(record.record_family_id(), record.size())) {
5151
return false;
5252
}
@@ -58,13 +58,27 @@ bool LRUEvictor::LRUPartition::add_record(CacheRecord &record) {
5858

5959
void LRUEvictor::LRUPartition::remove_record(CacheRecord &record) {
6060
std::unique_lock guard{m_list_guard};
61+
62+
// accessing the iterator to the record crashes if the record is not present in the list.
63+
if (!record.m_member_hook.is_linked()) {
64+
LOGERROR("Not expected! Record not found in partition {}", m_partition_num);
65+
DEBUG_ASSERT(false, "Not expected! Record not found");
66+
return;
67+
}
6168
auto it = m_list.iterator_to(record);
6269
m_filled_size -= record.size();
6370
m_list.erase(it);
6471
}
6572

6673
void LRUEvictor::LRUPartition::record_accessed(CacheRecord &record) {
6774
std::unique_lock guard{m_list_guard};
75+
76+
// accessing the iterator to the record crashes if the record is not present in the list.
77+
if (!record.m_member_hook.is_linked()) {
78+
LOGERROR("Not Expected! Record not found in partition {}", m_partition_num);
79+
DEBUG_ASSERT(false, "Not expected! Record not found");
80+
return;
81+
}
6882
m_list.erase(m_list.iterator_to(record));
6983
m_list.push_back(record);
7084
}
@@ -82,14 +96,24 @@ bool LRUEvictor::LRUPartition::do_evict(const uint32_t record_fid,
8296
auto it = std::begin(m_list);
8397
while (will_fill(needed_size) && (it != std::end(m_list))) {
8498
CacheRecord &rec = *it;
85-
99+
bool eviction_failed{true};
86100
/* return the next element */
87-
if (!rec.is_pinned() && m_evictor->can_evict_cb(record_fid)(rec)) {
88-
m_filled_size -= rec.size();
101+
if (!rec.is_pinned() && (!m_evictor->can_evict_cb(record_fid) || m_evictor->can_evict_cb(record_fid)(rec))) {
102+
auto const rec_size = rec.size();
89103
it = m_list.erase(it);
90-
} else {
91-
++count;
92-
it = std::next(it);
104+
if (m_evictor->post_eviction_cb(record_fid) && !m_evictor->post_eviction_cb(record_fid)(rec)) {
105+
// If the post eviction callback fails, we need to reinsert the record
106+
// back into the list.
107+
it = m_list.insert(it, rec);
108+
} else {
109+
eviction_failed = false;
110+
m_filled_size -= rec_size;
111+
}
112+
}
113+
114+
if (eviction_failed) {
115+
++count;
116+
it = std::next(it);
93117
}
94118
}
95119

0 commit comments

Comments
 (0)