@@ -8,9 +8,18 @@ use std::collections::HashMap;
8
8
use std:: fs;
9
9
use std:: io:: { Read , Write } ;
10
10
use std:: path:: { Path , PathBuf } ;
11
+ #[ cfg( feature = "tokio" ) ]
12
+ use std:: sync:: atomic:: AtomicU64 ;
11
13
use std:: sync:: atomic:: { AtomicUsize , Ordering } ;
12
14
use std:: sync:: { Arc , Mutex , RwLock } ;
13
15
16
+ #[ cfg( feature = "tokio" ) ]
17
+ use core:: future:: Future ;
18
+ #[ cfg( feature = "tokio" ) ]
19
+ use core:: pin:: Pin ;
20
+ #[ cfg( feature = "tokio" ) ]
21
+ use lightning:: util:: persist:: KVStore ;
22
+
14
23
#[ cfg( target_os = "windows" ) ]
15
24
use { std:: ffi:: OsStr , std:: os:: windows:: ffi:: OsStrExt } ;
16
25
@@ -30,47 +39,76 @@ fn path_to_windows_str<T: AsRef<OsStr>>(path: &T) -> Vec<u16> {
30
39
path. as_ref ( ) . encode_wide ( ) . chain ( Some ( 0 ) ) . collect ( )
31
40
}
32
41
33
- // The number of read/write/remove/list operations after which we clean up our `locks` HashMap.
34
- const GC_LOCK_INTERVAL : usize = 25 ;
35
-
36
42
// The number of times we retry listing keys in `FilesystemStore::list` before we give up reaching
37
43
// a consistent view and error out.
38
44
const LIST_DIR_CONSISTENCY_RETRIES : usize = 10 ;
39
45
40
- /// A [`KVStoreSync`] implementation that writes to and reads from the file system.
41
- pub struct FilesystemStore {
46
+ struct FilesystemStoreInner {
42
47
data_dir : PathBuf ,
43
48
tmp_file_counter : AtomicUsize ,
44
- gc_counter : AtomicUsize ,
45
- locks : Mutex < HashMap < PathBuf , Arc < RwLock < ( ) > > > > ,
49
+
50
+ // Per path lock that ensures that we don't have concurrent writes to the same file. The lock also encapsulates the
51
+ // latest written version per key.
52
+ locks : Mutex < HashMap < PathBuf , Arc < RwLock < u64 > > > > ,
53
+ }
54
+
55
+ /// A [`KVStore`] and [`KVStoreSync`] implementation that writes to and reads from the file system.
56
+ ///
57
+ /// [`KVStore`]: lightning::util::persist::KVStore
58
+ pub struct FilesystemStore {
59
+ inner : Arc < FilesystemStoreInner > ,
60
+
61
+ // Version counter to ensure that writes are applied in the correct order. It is assumed that read, list and remove
62
+ // operations aren't sensitive to the order of execution.
63
+ #[ cfg( feature = "tokio" ) ]
64
+ version_counter : AtomicU64 ,
46
65
}
47
66
48
67
impl FilesystemStore {
49
68
/// Constructs a new [`FilesystemStore`].
50
69
pub fn new ( data_dir : PathBuf ) -> Self {
51
70
let locks = Mutex :: new ( HashMap :: new ( ) ) ;
52
71
let tmp_file_counter = AtomicUsize :: new ( 0 ) ;
53
- let gc_counter = AtomicUsize :: new ( 1 ) ;
54
- Self { data_dir, tmp_file_counter, gc_counter, locks }
72
+ Self {
73
+ inner : Arc :: new ( FilesystemStoreInner { data_dir, tmp_file_counter, locks } ) ,
74
+ #[ cfg( feature = "tokio" ) ]
75
+ version_counter : AtomicU64 :: new ( 0 ) ,
76
+ }
55
77
}
56
78
57
79
/// Returns the data directory.
58
80
pub fn get_data_dir ( & self ) -> PathBuf {
59
- self . data_dir . clone ( )
81
+ self . inner . data_dir . clone ( )
60
82
}
83
+ }
61
84
62
- fn garbage_collect_locks ( & self ) {
63
- let gc_counter = self . gc_counter . fetch_add ( 1 , Ordering :: AcqRel ) ;
85
+ impl KVStoreSync for FilesystemStore {
86
+ fn read (
87
+ & self , primary_namespace : String , secondary_namespace : String , key : String ,
88
+ ) -> Result < Vec < u8 > , lightning:: io:: Error > {
89
+ self . inner . read ( primary_namespace, secondary_namespace, key)
90
+ }
64
91
65
- if gc_counter % GC_LOCK_INTERVAL == 0 {
66
- // Take outer lock for the cleanup.
67
- let mut outer_lock = self . locks . lock ( ) . unwrap ( ) ;
92
+ fn write (
93
+ & self , primary_namespace : String , secondary_namespace : String , key : String , buf : Vec < u8 > ,
94
+ ) -> Result < ( ) , lightning:: io:: Error > {
95
+ self . inner . write_version ( primary_namespace, secondary_namespace, key, buf, None )
96
+ }
68
97
69
- // Garbage collect all lock entries that are not referenced anymore.
70
- outer_lock. retain ( |_, v| Arc :: strong_count ( & v) > 1 ) ;
71
- }
98
+ fn remove (
99
+ & self , primary_namespace : String , secondary_namespace : String , key : String , lazy : bool ,
100
+ ) -> Result < ( ) , lightning:: io:: Error > {
101
+ self . inner . remove ( primary_namespace, secondary_namespace, key, lazy)
72
102
}
73
103
104
+ fn list (
105
+ & self , primary_namespace : String , secondary_namespace : String ,
106
+ ) -> Result < Vec < String > , lightning:: io:: Error > {
107
+ self . inner . list ( primary_namespace, secondary_namespace)
108
+ }
109
+ }
110
+
111
+ impl FilesystemStoreInner {
74
112
fn get_dest_dir_path (
75
113
& self , primary_namespace : & str , secondary_namespace : & str ,
76
114
) -> std:: io:: Result < PathBuf > {
@@ -94,9 +132,7 @@ impl FilesystemStore {
94
132
95
133
Ok ( dest_dir_path)
96
134
}
97
- }
98
135
99
- impl KVStoreSync for FilesystemStore {
100
136
fn read (
101
137
& self , primary_namespace : String , secondary_namespace : String , key : String ,
102
138
) -> lightning:: io:: Result < Vec < u8 > > {
@@ -118,13 +154,14 @@ impl KVStoreSync for FilesystemStore {
118
154
f. read_to_end ( & mut buf) ?;
119
155
}
120
156
121
- self . garbage_collect_locks ( ) ;
122
-
123
157
Ok ( buf)
124
158
}
125
159
126
- fn write (
160
+ /// Writes a specific version of a key to the filesystem. If a newer version has been written already, this function
161
+ /// returns early without writing.
162
+ fn write_version (
127
163
& self , primary_namespace : String , secondary_namespace : String , key : String , buf : Vec < u8 > ,
164
+ version : Option < u64 > ,
128
165
) -> lightning:: io:: Result < ( ) > {
129
166
check_namespace_key_validity (
130
167
& primary_namespace,
@@ -164,7 +201,18 @@ impl KVStoreSync for FilesystemStore {
164
201
let mut outer_lock = self . locks . lock ( ) . unwrap ( ) ;
165
202
Arc :: clone ( & outer_lock. entry ( dest_file_path. clone ( ) ) . or_default ( ) )
166
203
} ;
167
- let _guard = inner_lock_ref. write ( ) . unwrap ( ) ;
204
+ let mut last_written_version = inner_lock_ref. write ( ) . unwrap ( ) ;
205
+
206
+ // If a version is provided, we check if we already have a newer version written. This is used in async
207
+ // contexts to realize eventual consistency.
208
+ if let Some ( version) = version {
209
+ if version <= * last_written_version {
210
+ // If the version is not greater, we don't write the file.
211
+ return Ok ( ( ) ) ;
212
+ }
213
+
214
+ * last_written_version = version;
215
+ }
168
216
169
217
#[ cfg( not( target_os = "windows" ) ) ]
170
218
{
@@ -211,8 +259,6 @@ impl KVStoreSync for FilesystemStore {
211
259
}
212
260
} ;
213
261
214
- self . garbage_collect_locks ( ) ;
215
-
216
262
res
217
263
}
218
264
@@ -312,8 +358,6 @@ impl KVStoreSync for FilesystemStore {
312
358
}
313
359
}
314
360
315
- self . garbage_collect_locks ( ) ;
316
-
317
361
Ok ( ( ) )
318
362
}
319
363
@@ -364,12 +408,75 @@ impl KVStoreSync for FilesystemStore {
364
408
break ' retry_list;
365
409
}
366
410
367
- self . garbage_collect_locks ( ) ;
368
-
369
411
Ok ( keys)
370
412
}
371
413
}
372
414
415
+ #[ cfg( feature = "tokio" ) ]
416
+ impl KVStore for FilesystemStore {
417
+ fn read (
418
+ & self , primary_namespace : String , secondary_namespace : String , key : String ,
419
+ ) -> Pin < Box < dyn Future < Output = Result < Vec < u8 > , lightning:: io:: Error > > + ' static + Send > > {
420
+ let this = Arc :: clone ( & self . inner ) ;
421
+
422
+ Box :: pin ( async move {
423
+ tokio:: task:: spawn_blocking ( move || {
424
+ this. read ( primary_namespace, secondary_namespace, key)
425
+ } )
426
+ . await
427
+ . unwrap_or_else ( |e| Err ( lightning:: io:: Error :: new ( lightning:: io:: ErrorKind :: Other , e) ) )
428
+ } )
429
+ }
430
+
431
+ fn write (
432
+ & self , primary_namespace : String , secondary_namespace : String , key : String , buf : Vec < u8 > ,
433
+ ) -> Pin < Box < dyn Future < Output = Result < ( ) , lightning:: io:: Error > > + ' static + Send > > {
434
+ let this = Arc :: clone ( & self . inner ) ;
435
+
436
+ // Obtain a version number to retain the call sequence.
437
+ let version = self . version_counter . fetch_add ( 1 , Ordering :: Relaxed ) ;
438
+ if version == u64:: MAX {
439
+ panic ! ( "FilesystemStore version counter overflowed" ) ;
440
+ }
441
+
442
+ Box :: pin ( async move {
443
+ tokio:: task:: spawn_blocking ( move || {
444
+ this. write_version ( primary_namespace, secondary_namespace, key, buf, Some ( version) )
445
+ } )
446
+ . await
447
+ . unwrap_or_else ( |e| Err ( lightning:: io:: Error :: new ( lightning:: io:: ErrorKind :: Other , e) ) )
448
+ } )
449
+ }
450
+
451
+ fn remove (
452
+ & self , primary_namespace : String , secondary_namespace : String , key : String , lazy : bool ,
453
+ ) -> Pin < Box < dyn Future < Output = Result < ( ) , lightning:: io:: Error > > + ' static + Send > > {
454
+ let this = Arc :: clone ( & self . inner ) ;
455
+
456
+ Box :: pin ( async move {
457
+ tokio:: task:: spawn_blocking ( move || {
458
+ this. remove ( primary_namespace, secondary_namespace, key, lazy)
459
+ } )
460
+ . await
461
+ . unwrap_or_else ( |e| Err ( lightning:: io:: Error :: new ( lightning:: io:: ErrorKind :: Other , e) ) )
462
+ } )
463
+ }
464
+
465
+ fn list (
466
+ & self , primary_namespace : String , secondary_namespace : String ,
467
+ ) -> Pin < Box < dyn Future < Output = Result < Vec < String > , lightning:: io:: Error > > + ' static + Send > > {
468
+ let this = Arc :: clone ( & self . inner ) ;
469
+
470
+ Box :: pin ( async move {
471
+ tokio:: task:: spawn_blocking ( move || this. list ( primary_namespace, secondary_namespace) )
472
+ . await
473
+ . unwrap_or_else ( |e| {
474
+ Err ( lightning:: io:: Error :: new ( lightning:: io:: ErrorKind :: Other , e) )
475
+ } )
476
+ } )
477
+ }
478
+ }
479
+
373
480
fn dir_entry_is_key ( dir_entry : & fs:: DirEntry ) -> Result < bool , lightning:: io:: Error > {
374
481
let p = dir_entry. path ( ) ;
375
482
if let Some ( ext) = p. extension ( ) {
@@ -460,7 +567,7 @@ fn get_key_from_dir_entry_path(p: &Path, base_path: &Path) -> Result<String, lig
460
567
461
568
impl MigratableKVStore for FilesystemStore {
462
569
fn list_all_keys ( & self ) -> Result < Vec < ( String , String , String ) > , lightning:: io:: Error > {
463
- let prefixed_dest = & self . data_dir ;
570
+ let prefixed_dest = & self . inner . data_dir ;
464
571
if !prefixed_dest. exists ( ) {
465
572
return Ok ( Vec :: new ( ) ) ;
466
573
}
@@ -547,7 +654,7 @@ mod tests {
547
654
fn drop ( & mut self ) {
548
655
// We test for invalid directory names, so it's OK if directory removal
549
656
// fails.
550
- match fs:: remove_dir_all ( & self . data_dir ) {
657
+ match fs:: remove_dir_all ( & self . inner . data_dir ) {
551
658
Err ( e) => println ! ( "Failed to remove test persister directory: {}" , e) ,
552
659
_ => { } ,
553
660
}
@@ -562,6 +669,75 @@ mod tests {
562
669
do_read_write_remove_list_persist ( & fs_store) ;
563
670
}
564
671
672
+ #[ cfg( feature = "tokio" ) ]
673
+ #[ tokio:: test]
674
+ async fn read_write_remove_list_persist_async ( ) {
675
+ use crate :: fs_store:: FilesystemStore ;
676
+ use lightning:: util:: persist:: KVStore ;
677
+ use std:: sync:: Arc ;
678
+
679
+ let mut temp_path = std:: env:: temp_dir ( ) ;
680
+ temp_path. push ( "test_read_write_remove_list_persist_async" ) ;
681
+ let fs_store: Arc < dyn KVStore > = Arc :: new ( FilesystemStore :: new ( temp_path) ) ;
682
+
683
+ let data1 = vec ! [ 42u8 ; 32 ] ;
684
+ let data2 = vec ! [ 43u8 ; 32 ] ;
685
+
686
+ let primary_namespace = "testspace" ;
687
+ let secondary_namespace = "testsubspace" ;
688
+ let key = "testkey" ;
689
+
690
+ // Test writing the same key twice with different data. Execute the asynchronous part out of order to ensure
691
+ // that eventual consistency works.
692
+ let fut1 = fs_store. write (
693
+ primary_namespace. to_string ( ) ,
694
+ secondary_namespace. to_string ( ) ,
695
+ key. to_string ( ) ,
696
+ data1,
697
+ ) ;
698
+ let fut2 = fs_store. write (
699
+ primary_namespace. to_string ( ) ,
700
+ secondary_namespace. to_string ( ) ,
701
+ key. to_string ( ) ,
702
+ data2. clone ( ) ,
703
+ ) ;
704
+
705
+ fut2. await . unwrap ( ) ;
706
+ fut1. await . unwrap ( ) ;
707
+
708
+ // Test list.
709
+ let listed_keys = fs_store
710
+ . list ( primary_namespace. to_string ( ) , secondary_namespace. to_string ( ) )
711
+ . await
712
+ . unwrap ( ) ;
713
+ assert_eq ! ( listed_keys. len( ) , 1 ) ;
714
+ assert_eq ! ( listed_keys[ 0 ] , key) ;
715
+
716
+ // Test read. We expect to read data2, as the write call was initiated later.
717
+ let read_data = fs_store
718
+ . read ( primary_namespace. to_string ( ) , secondary_namespace. to_string ( ) , key. to_string ( ) )
719
+ . await
720
+ . unwrap ( ) ;
721
+ assert_eq ! ( data2, & * read_data) ;
722
+
723
+ // Test remove.
724
+ fs_store
725
+ . remove (
726
+ primary_namespace. to_string ( ) ,
727
+ secondary_namespace. to_string ( ) ,
728
+ key. to_string ( ) ,
729
+ false ,
730
+ )
731
+ . await
732
+ . unwrap ( ) ;
733
+
734
+ let listed_keys = fs_store
735
+ . list ( primary_namespace. to_string ( ) , secondary_namespace. to_string ( ) )
736
+ . await
737
+ . unwrap ( ) ;
738
+ assert_eq ! ( listed_keys. len( ) , 0 ) ;
739
+ }
740
+
565
741
#[ test]
566
742
fn test_data_migration ( ) {
567
743
let mut source_temp_path = std:: env:: temp_dir ( ) ;
0 commit comments