@@ -2,20 +2,8 @@ package cache
22
33import (
44 "context"
5- "fmt"
6- "runtime/debug"
7- "sync"
8- "time"
95
10- lru "github.com/hashicorp/golang-lru"
11- "github.com/prometheus/client_golang/prometheus"
12- "k8s.io/client-go/util/workqueue"
13- "k8s.io/utils/clock"
14-
15- "github.com/flyteorg/flyte/flytestdlib/contextutils"
166 "github.com/flyteorg/flyte/flytestdlib/errors"
17- "github.com/flyteorg/flyte/flytestdlib/logger"
18- "github.com/flyteorg/flyte/flytestdlib/promutils"
197)
208
219type ItemID = string
@@ -45,16 +33,6 @@ type AutoRefresh interface {
4533 DeleteDelayed (id ItemID ) error
4634}
4735
48- type metrics struct {
49- SyncErrors prometheus.Counter
50- Evictions prometheus.Counter
51- SyncLatency promutils.StopWatch
52- CacheHit prometheus.Counter
53- CacheMiss prometheus.Counter
54- Size prometheus.Gauge
55- scope promutils.Scope
56- }
57-
5836type Item interface {
5937 IsTerminal () bool
6038}
@@ -91,346 +69,3 @@ type SyncFunc func(ctx context.Context, batch Batch) (
9169// CreateBatchesFunc is a func type. Your implementation of this function for your cache instance is responsible for
9270// subdividing the list of cache items into batches.
9371type CreateBatchesFunc func (ctx context.Context , snapshot []ItemWrapper ) (batches []Batch , err error )
94-
95- type itemWrapper struct {
96- id ItemID
97- item Item
98- }
99-
100- func (i itemWrapper ) GetID () ItemID {
101- return i .id
102- }
103-
104- func (i itemWrapper ) GetItem () Item {
105- return i .item
106- }
107-
108- // Thread-safe general purpose auto-refresh cache that watches for updates asynchronously for the keys after they are added to
109- // the cache. An item can be inserted only once.
110- //
111- // Get reads from sync.map while refresh is invoked on a snapshot of keys. Cache eventually catches up on deleted items.
112- //
113- // Sync is run as a fixed-interval-scheduled-task, and is skipped if sync from previous cycle is still running.
114- type autoRefresh struct {
115- name string
116- metrics metrics
117- syncCb SyncFunc
118- createBatchesCb CreateBatchesFunc
119- lruMap * lru.Cache
120- // Items that are currently being processed are in the processing set.
121- // It will prevent the same item from being processed multiple times by different workers.
122- processing * sync.Map
123- toDelete * syncSet
124- syncPeriod time.Duration
125- workqueue workqueue.RateLimitingInterface
126- parallelism uint
127- lock sync.RWMutex
128- clock clock.Clock
129- }
130-
131- func getEvictionFunction (counter prometheus.Counter ) func (key interface {}, value interface {}) {
132- return func (_ interface {}, _ interface {}) {
133- counter .Inc ()
134- }
135- }
136-
137- func SingleItemBatches (_ context.Context , snapshot []ItemWrapper ) (batches []Batch , err error ) {
138- res := make ([]Batch , 0 , len (snapshot ))
139- for _ , item := range snapshot {
140- res = append (res , Batch {item })
141- }
142-
143- return res , nil
144- }
145-
146- func newMetrics (scope promutils.Scope ) metrics {
147- return metrics {
148- SyncErrors : scope .MustNewCounter ("sync_errors" , "Counter for sync errors." ),
149- Evictions : scope .MustNewCounter ("lru_evictions" , "Counter for evictions from LRU." ),
150- SyncLatency : scope .MustNewStopWatch ("latency" , "Latency for sync operations." , time .Millisecond ),
151- CacheHit : scope .MustNewCounter ("cache_hit" , "Counter for cache hits." ),
152- CacheMiss : scope .MustNewCounter ("cache_miss" , "Counter for cache misses." ),
153- Size : scope .MustNewGauge ("size" , "Current size of the cache" ),
154- scope : scope ,
155- }
156- }
157-
158- func (w * autoRefresh ) Start (ctx context.Context ) error {
159- for i := uint (0 ); i < w .parallelism ; i ++ {
160- go func (ctx context.Context ) {
161- err := w .sync (ctx )
162- if err != nil {
163- logger .Errorf (ctx , "Failed to sync. Error: %v" , err )
164- }
165- }(contextutils .WithGoroutineLabel (ctx , fmt .Sprintf ("%v-worker-%v" , w .name , i )))
166- }
167-
168- enqueueCtx := contextutils .WithGoroutineLabel (ctx , fmt .Sprintf ("%v-enqueue" , w .name ))
169- go w .enqueueLoop (enqueueCtx )
170-
171- return nil
172- }
173-
174- func (w * autoRefresh ) enqueueLoop (ctx context.Context ) {
175- timer := w .clock .NewTimer (w .syncPeriod )
176- defer timer .Stop ()
177-
178- for {
179- select {
180- case <- ctx .Done ():
181- return
182- case <- timer .C ():
183- err := w .enqueueBatches (ctx )
184- if err != nil {
185- logger .Errorf (ctx , "Failed to enqueue. Error: %v" , err )
186- }
187- timer .Reset (w .syncPeriod )
188- }
189- }
190- }
191-
192- // Update updates the item only if it exists in the cache, return true if we updated the item.
193- func (w * autoRefresh ) Update (id ItemID , item Item ) (ok bool ) {
194- w .lock .Lock ()
195- defer w .lock .Unlock ()
196- ok = w .lruMap .Contains (id )
197- if ok {
198- w .lruMap .Add (id , item )
199- }
200- return ok
201- }
202-
203- // Delete deletes the item from the cache if it exists.
204- func (w * autoRefresh ) Delete (key interface {}) {
205- w .lock .Lock ()
206- defer w .lock .Unlock ()
207- w .toDelete .Remove (key )
208- w .lruMap .Remove (key )
209- }
210-
211- func (w * autoRefresh ) Get (id ItemID ) (Item , error ) {
212- if val , ok := w .lruMap .Get (id ); ok {
213- w .metrics .CacheHit .Inc ()
214- return val .(Item ), nil
215- }
216-
217- w .metrics .CacheMiss .Inc ()
218- return nil , errors .Errorf (ErrNotFound , "Item with id [%v] not found." , id )
219- }
220-
221- // Return the item if exists else create it.
222- // Create should be invoked only once. recreating the object is not supported.
223- func (w * autoRefresh ) GetOrCreate (id ItemID , item Item ) (Item , error ) {
224- if val , ok := w .lruMap .Get (id ); ok {
225- w .metrics .CacheHit .Inc ()
226- return val .(Item ), nil
227- }
228-
229- w .lruMap .Add (id , item )
230- w .metrics .CacheMiss .Inc ()
231-
232- // It fixes cold start issue in the AutoRefreshCache by adding the item to the workqueue when it is created.
233- // This way, the item will be processed without waiting for the next sync cycle (30s by default).
234- batch := make ([]ItemWrapper , 0 , 1 )
235- batch = append (batch , itemWrapper {id : id , item : item })
236- w .workqueue .AddRateLimited (& batch )
237- w .processing .Store (id , w .clock .Now ())
238- return item , nil
239- }
240-
241- // DeleteDelayed queues an item for deletion. It Will get deleted as part of the next Sync cycle. Until the next sync
242- // cycle runs, Get and GetOrCreate will continue to return the Item in its previous state.
243- func (w * autoRefresh ) DeleteDelayed (id ItemID ) error {
244- w .toDelete .Insert (id )
245- return nil
246- }
247-
248- // This function is called internally by its own timer. Roughly, it will list keys, create batches of keys based on
249- // createBatchesCb and, enqueue all the batches into the workqueue.
250- func (w * autoRefresh ) enqueueBatches (ctx context.Context ) error {
251- keys := w .lruMap .Keys ()
252- w .metrics .Size .Set (float64 (len (keys )))
253-
254- snapshot := make ([]ItemWrapper , 0 , len (keys ))
255- for _ , k := range keys {
256- if w .toDelete .Contains (k ) {
257- w .Delete (k )
258- continue
259- }
260- // If not ok, it means evicted between the item was evicted between getting the keys and this update loop
261- // which is fine, we can just ignore.
262- if value , ok := w .lruMap .Peek (k ); ok {
263- if item , ok := value .(Item ); ! ok || (ok && ! item .IsTerminal () && ! w .inProcessing (k )) {
264- snapshot = append (snapshot , itemWrapper {
265- id : k .(ItemID ),
266- item : value .(Item ),
267- })
268- }
269- }
270- }
271-
272- batches , err := w .createBatchesCb (ctx , snapshot )
273- if err != nil {
274- return err
275- }
276-
277- for _ , batch := range batches {
278- b := batch
279- w .workqueue .AddRateLimited (& b )
280- for i := 1 ; i < len (b ); i ++ {
281- w .processing .Store (b [i ].GetID (), w .clock .Now ())
282- }
283- }
284-
285- return nil
286- }
287-
288- // There are w.parallelism instances of this function running all the time, each one will:
289- // - Retrieve an item from the workqueue
290- // - For each batch of the keys, call syncCb, which tells us if the items have been updated
291- // -- If any has, then overwrite the item in the cache.
292- //
293- // What happens when the number of things that a user is trying to keep track of exceeds the size
294- // of the cache? Trivial case where the cache is size 1 and we're trying to keep track of two things.
295- // * Plugin asks for update on item 1 - cache evicts item 2, stores 1 and returns it unchanged
296- // * Plugin asks for update on item 2 - cache evicts item 1, stores 2 and returns it unchanged
297- // * Sync loop updates item 2, repeat
298- func (w * autoRefresh ) sync (ctx context.Context ) (err error ) {
299- defer func () {
300- var isErr bool
301- rVal := recover ()
302- if rVal == nil {
303- return
304- }
305-
306- if err , isErr = rVal .(error ); isErr {
307- err = fmt .Errorf ("worker panic'd and is shutting down. Error: %w with Stack: %v" , err , string (debug .Stack ()))
308- } else {
309- err = fmt .Errorf ("worker panic'd and is shutting down. Panic value: %v with Stack: %v" , rVal , string (debug .Stack ()))
310- }
311-
312- logger .Error (ctx , err )
313- }()
314-
315- for {
316- select {
317- case <- ctx .Done ():
318- return nil
319- default :
320- batch , shutdown := w .workqueue .Get ()
321- if shutdown {
322- logger .Debugf (ctx , "Shutting down worker" )
323- return nil
324- }
325- // Since we create batches every time we sync, we will just remove the item from the queue here
326- // regardless of whether it succeeded the sync or not.
327- w .workqueue .Forget (batch )
328- w .workqueue .Done (batch )
329-
330- newBatch := make (Batch , 0 , len (* batch .(* Batch )))
331- for _ , b := range * batch .(* Batch ) {
332- itemID := b .GetID ()
333- w .processing .Delete (itemID )
334- item , ok := w .lruMap .Get (itemID )
335- if ! ok {
336- logger .Debugf (ctx , "item with id [%v] not found in cache" , itemID )
337- continue
338- }
339- if item .(Item ).IsTerminal () {
340- logger .Debugf (ctx , "item with id [%v] is terminal" , itemID )
341- continue
342- }
343- newBatch = append (newBatch , b )
344- }
345- if len (newBatch ) == 0 {
346- continue
347- }
348-
349- t := w .metrics .SyncLatency .Start ()
350- updatedBatch , err := w .syncCb (ctx , newBatch )
351-
352- if err != nil {
353- w .metrics .SyncErrors .Inc ()
354- logger .Errorf (ctx , "failed to get latest copy of a batch. Error: %v" , err )
355- t .Stop ()
356- continue
357- }
358-
359- for _ , item := range updatedBatch {
360- if item .Action == Update {
361- // Updates an existing item.
362- w .Update (item .ID , item .Item )
363- }
364- }
365-
366- w .toDelete .Range (func (key interface {}) bool {
367- w .Delete (key )
368- return true
369- })
370-
371- t .Stop ()
372- }
373- }
374- }
375-
376- // Checks if the item is currently being processed and returns false if the item has been in processing for too long
377- func (w * autoRefresh ) inProcessing (key interface {}) bool {
378- item , found := w .processing .Load (key )
379- if found {
380- // handle potential race conditions where the item is in processing but not in the workqueue
381- if timeItem , ok := item .(time.Time ); ok && w .clock .Since (timeItem ) > (w .syncPeriod * 5 ) {
382- w .processing .Delete (key )
383- return false
384- }
385- return true
386- }
387- return false
388- }
389-
390- // Instantiates a new AutoRefresh Cache that syncs items in batches.
391- func NewAutoRefreshBatchedCache (name string , createBatches CreateBatchesFunc , syncCb SyncFunc , syncRateLimiter workqueue.RateLimiter ,
392- resyncPeriod time.Duration , parallelism , size uint , scope promutils.Scope ) (AutoRefresh , error ) {
393- return newAutoRefreshBatchedCacheWithClock (name , createBatches , syncCb , syncRateLimiter , resyncPeriod , parallelism , size , scope , clock.RealClock {})
394- }
395-
396- func newAutoRefreshBatchedCacheWithClock (name string , createBatches CreateBatchesFunc , syncCb SyncFunc , syncRateLimiter workqueue.RateLimiter ,
397- resyncPeriod time.Duration , parallelism , size uint , scope promutils.Scope , clock clock.WithTicker ) (AutoRefresh , error ) {
398-
399- metrics := newMetrics (scope )
400- // #nosec G115
401- lruCache , err := lru .NewWithEvict (int (size ), getEvictionFunction (metrics .Evictions ))
402- if err != nil {
403- return nil , err
404- }
405-
406- cache := & autoRefresh {
407- name : name ,
408- metrics : metrics ,
409- parallelism : parallelism ,
410- createBatchesCb : createBatches ,
411- syncCb : syncCb ,
412- lruMap : lruCache ,
413- processing : & sync.Map {},
414- toDelete : newSyncSet (),
415- syncPeriod : resyncPeriod ,
416- workqueue : workqueue .NewRateLimitingQueueWithConfig (syncRateLimiter , workqueue.RateLimitingQueueConfig {
417- Name : scope .CurrentScope (),
418- Clock : clock ,
419- }),
420- clock : clock ,
421- }
422-
423- return cache , nil
424- }
425-
426- // Instantiates a new AutoRefresh Cache that syncs items periodically.
427- func NewAutoRefreshCache (name string , syncCb SyncFunc , syncRateLimiter workqueue.RateLimiter , resyncPeriod time.Duration ,
428- parallelism , size uint , scope promutils.Scope ) (AutoRefresh , error ) {
429-
430- return NewAutoRefreshBatchedCache (name , SingleItemBatches , syncCb , syncRateLimiter , resyncPeriod , parallelism , size , scope )
431- }
432-
433- func newAutoRefreshCacheWithClock (name string , syncCb SyncFunc , syncRateLimiter workqueue.RateLimiter , resyncPeriod time.Duration ,
434- parallelism , size uint , scope promutils.Scope , clock clock.WithTicker ) (AutoRefresh , error ) {
435- return newAutoRefreshBatchedCacheWithClock (name , SingleItemBatches , syncCb , syncRateLimiter , resyncPeriod , parallelism , size , scope , clock )
436- }
0 commit comments