Skip to content

Commit c86a07d

Browse files
authored
Turbopack: Streaming write of SST files (#82048)
### What? Write SST while processing blocks to avoid keeping all blocks in memory. Reduces temporary memory used by persisting.
1 parent 4f41092 commit c86a07d

File tree

8 files changed

+529
-451
lines changed

8 files changed

+529
-451
lines changed

turbopack/crates/turbo-persistence-tools/src/main.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ fn main() -> Result<()> {
3131
sequence_number,
3232
min_hash,
3333
max_hash,
34-
aqmf_size,
35-
aqmf_entries,
34+
amqf_size,
35+
amqf_entries,
3636
sst_size,
3737
key_compression_dictionary_size,
3838
value_compression_dictionary_size,
@@ -43,7 +43,7 @@ fn main() -> Result<()> {
4343
" SST {sequence_number:08}.sst: {min_hash:016x} - {max_hash:016x} (p = 1/{})",
4444
u64::MAX / (max_hash - min_hash + 1)
4545
);
46-
println!(" AQMF {aqmf_entries} entries = {} KiB", aqmf_size / 1024);
46+
println!(" AMQF {amqf_entries} entries = {} KiB", amqf_size / 1024);
4747
println!(
4848
" {} KiB = {} kiB key compression dict + {} KiB value compression dict + \
4949
{block_count} blocks (avg {} bytes/block)",

turbopack/crates/turbo-persistence/README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ There are four different file types:
2121
- Static Sorted Table (SST, `*.sst`): These files contain key value pairs.
2222
- Blob files (`*.blob`): These files contain large values.
2323
- Delete files (`*.del`): These files contain a list of sequence numbers of files that should be considered as deleted.
24-
- Meta files (`*.meta`): These files contain metadata about the SST files. They contains the hash range and a AQMF for quick filtering.
24+
- Meta files (`*.meta`): These files contain metadata about the SST files. They contains the hash range and a AMQF for quick filtering.
2525

2626
Therefore there are there value types:
2727

@@ -50,9 +50,9 @@ A meta file can contain metadata about multiple SST files. The metadata is store
5050
- 8 bytes min hash
5151
- 8 bytes max hash
5252
- 8 bytes SST file size
53-
- 4 bytes end of AQMF offset relative to start of all AQMF data
53+
- 4 bytes end of AMQF offset relative to start of all AMQF data
5454
- foreach described SST file
55-
- serialized AQMF
55+
- serialized AMQF
5656

5757
### SST file
5858

@@ -139,7 +139,7 @@ Reading start from the current sequence number and goes downwards.
139139

140140
- We have all SST files memory mapped
141141
- for i = CURRENT sequence number .. 0
142-
- Check AQMF from SST file for key existance -> if not continue
142+
- Check AMQF from SST file for key existance -> if not continue
143143
- let block = 0
144144
- loop
145145
- Index Block: find key range that contains the key by binary search

turbopack/crates/turbo-persistence/src/constants.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@ pub const DATA_THRESHOLD_PER_COMPACTED_FILE: usize = 256 * 1024 * 1024;
2121
/// MAX_ENTRIES_PER_INITIAL_FILE and DATA_THRESHOLD_PER_INITIAL_FILE.
2222
pub const THREAD_LOCAL_SIZE_SHIFT: usize = 7;
2323

24-
/// Maximum RAM bytes for AQMF cache
25-
pub const AQMF_CACHE_SIZE: u64 = 300 * 1024 * 1024;
26-
pub const AQMF_AVG_SIZE: usize = 37399;
24+
/// Maximum RAM bytes for AMQF cache
25+
pub const AMQF_CACHE_SIZE: u64 = 300 * 1024 * 1024;
26+
pub const AMQF_AVG_SIZE: usize = 37399;
2727

2828
/// Maximum RAM bytes for key block cache
2929
pub const KEY_BLOCK_CACHE_SIZE: u64 = 400 * 1024 * 1024;

turbopack/crates/turbo-persistence/src/db.rs

Lines changed: 24 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -27,18 +27,18 @@ use crate::{
2727
arc_slice::ArcSlice,
2828
compaction::selector::{Compactable, compute_metrics, get_merge_segments},
2929
constants::{
30-
AQMF_AVG_SIZE, AQMF_CACHE_SIZE, DATA_THRESHOLD_PER_COMPACTED_FILE, KEY_BLOCK_AVG_SIZE,
30+
AMQF_AVG_SIZE, AMQF_CACHE_SIZE, DATA_THRESHOLD_PER_COMPACTED_FILE, KEY_BLOCK_AVG_SIZE,
3131
KEY_BLOCK_CACHE_SIZE, MAX_ENTRIES_PER_COMPACTED_FILE, VALUE_BLOCK_AVG_SIZE,
3232
VALUE_BLOCK_CACHE_SIZE,
3333
},
3434
key::{StoreKey, hash_key},
3535
lookup_entry::{LookupEntry, LookupValue},
3636
merge_iter::MergeIter,
37-
meta_file::{AqmfCache, MetaFile, MetaLookupResult, StaticSortedFileRange},
37+
meta_file::{AmqfCache, MetaFile, MetaLookupResult, StaticSortedFileRange},
3838
meta_file_builder::MetaFileBuilder,
3939
sst_filter::SstFilter,
4040
static_sorted_file::{BlockCache, SstLookupResult},
41-
static_sorted_file_builder::{StaticSortedFileBuilder, StaticSortedFileBuilderMeta},
41+
static_sorted_file_builder::{StaticSortedFileBuilderMeta, write_static_stored_file},
4242
write_batch::{FinishResult, WriteBatch},
4343
};
4444

@@ -84,12 +84,12 @@ pub struct Statistics {
8484
pub sst_files: usize,
8585
pub key_block_cache: CacheStatistics,
8686
pub value_block_cache: CacheStatistics,
87-
pub aqmf_cache: CacheStatistics,
87+
pub amqf_cache: CacheStatistics,
8888
pub hits: u64,
8989
pub misses: u64,
9090
pub miss_family: u64,
9191
pub miss_range: u64,
92-
pub miss_aqmf: u64,
92+
pub miss_amqf: u64,
9393
pub miss_key: u64,
9494
}
9595

@@ -101,7 +101,7 @@ struct TrackedStats {
101101
hits_blob: std::sync::atomic::AtomicU64,
102102
miss_family: std::sync::atomic::AtomicU64,
103103
miss_range: std::sync::atomic::AtomicU64,
104-
miss_aqmf: std::sync::atomic::AtomicU64,
104+
miss_amqf: std::sync::atomic::AtomicU64,
105105
miss_key: std::sync::atomic::AtomicU64,
106106
miss_global: std::sync::atomic::AtomicU64,
107107
}
@@ -119,8 +119,8 @@ pub struct TurboPersistence {
119119
/// A flag to indicate if a write operation is currently active. Prevents multiple concurrent
120120
/// write operations.
121121
active_write_operation: AtomicBool,
122-
/// A cache for deserialized AQMF filters.
123-
aqmf_cache: AqmfCache,
122+
/// A cache for deserialized AMQF filters.
123+
amqf_cache: AmqfCache,
124124
/// A cache for decompressed key blocks.
125125
key_block_cache: BlockCache,
126126
/// A cache for decompressed value blocks.
@@ -158,9 +158,9 @@ impl TurboPersistence {
158158
current_sequence_number: 0,
159159
}),
160160
active_write_operation: AtomicBool::new(false),
161-
aqmf_cache: AqmfCache::with(
162-
AQMF_CACHE_SIZE as usize / AQMF_AVG_SIZE,
163-
AQMF_CACHE_SIZE,
161+
amqf_cache: AmqfCache::with(
162+
AMQF_CACHE_SIZE as usize / AMQF_AVG_SIZE,
163+
AMQF_CACHE_SIZE,
164164
Default::default(),
165165
Default::default(),
166166
Default::default(),
@@ -876,11 +876,11 @@ impl TurboPersistence {
876876
let index_in_meta = ssts_with_ranges[index].index_in_meta;
877877
let meta_file = &meta_files[meta_index];
878878
let entry = meta_file.entry(index_in_meta);
879-
let aqmf = Cow::Borrowed(entry.raw_aqmf(meta_file.aqmf_data()));
879+
let amqf = Cow::Borrowed(entry.raw_amqf(meta_file.amqf_data()));
880880
let meta = StaticSortedFileBuilderMeta {
881881
min_hash: entry.min_hash(),
882882
max_hash: entry.max_hash(),
883-
aqmf,
883+
amqf,
884884
key_compression_dictionary_length: entry
885885
.key_compression_dictionary_length(),
886886
value_compression_dictionary_length: entry
@@ -904,13 +904,12 @@ impl TurboPersistence {
904904
) -> Result<(u32, File, StaticSortedFileBuilderMeta<'static>)>
905905
{
906906
let _span = tracing::trace_span!("write merged sst file").entered();
907-
let builder = StaticSortedFileBuilder::new(
907+
let (meta, file) = write_static_stored_file(
908908
entries,
909909
total_key_size,
910910
total_value_size,
911+
&path.join(format!("{seq:08}.sst")),
911912
)?;
912-
let (meta, file) =
913-
builder.write(&path.join(format!("{seq:08}.sst")))?;
914913
Ok((seq, file, meta))
915914
}
916915

@@ -1148,7 +1147,7 @@ impl TurboPersistence {
11481147
family as u32,
11491148
hash,
11501149
key,
1151-
&self.aqmf_cache,
1150+
&self.amqf_cache,
11521151
&self.key_block_cache,
11531152
&self.value_block_cache,
11541153
)? {
@@ -1162,7 +1161,7 @@ impl TurboPersistence {
11621161
}
11631162
MetaLookupResult::QuickFilterMiss => {
11641163
#[cfg(feature = "stats")]
1165-
self.stats.miss_aqmf.fetch_add(1, Ordering::Relaxed);
1164+
self.stats.miss_amqf.fetch_add(1, Ordering::Relaxed);
11661165
}
11671166
MetaLookupResult::SstLookup(result) => match result {
11681167
SstLookupResult::Found(result) => match result {
@@ -1204,14 +1203,14 @@ impl TurboPersistence {
12041203
sst_files: inner.meta_files.iter().map(|m| m.entries().len()).sum(),
12051204
key_block_cache: CacheStatistics::new(&self.key_block_cache),
12061205
value_block_cache: CacheStatistics::new(&self.value_block_cache),
1207-
aqmf_cache: CacheStatistics::new(&self.aqmf_cache),
1206+
amqf_cache: CacheStatistics::new(&self.amqf_cache),
12081207
hits: self.stats.hits_deleted.load(Ordering::Relaxed)
12091208
+ self.stats.hits_small.load(Ordering::Relaxed)
12101209
+ self.stats.hits_blob.load(Ordering::Relaxed),
12111210
misses: self.stats.miss_global.load(Ordering::Relaxed),
12121211
miss_family: self.stats.miss_family.load(Ordering::Relaxed),
12131212
miss_range: self.stats.miss_range.load(Ordering::Relaxed),
1214-
miss_aqmf: self.stats.miss_aqmf.load(Ordering::Relaxed),
1213+
miss_amqf: self.stats.miss_amqf.load(Ordering::Relaxed),
12151214
miss_key: self.stats.miss_key.load(Ordering::Relaxed),
12161215
}
12171216
}
@@ -1228,14 +1227,14 @@ impl TurboPersistence {
12281227
.entries()
12291228
.iter()
12301229
.map(|entry| {
1231-
let aqmf = entry.raw_aqmf(meta_file.aqmf_data());
1230+
let amqf = entry.raw_amqf(meta_file.amqf_data());
12321231
MetaFileEntryInfo {
12331232
sequence_number: entry.sequence_number(),
12341233
min_hash: entry.min_hash(),
12351234
max_hash: entry.max_hash(),
12361235
sst_size: entry.size(),
1237-
aqmf_size: entry.aqmf_size(),
1238-
aqmf_entries: aqmf.len(),
1236+
amqf_size: entry.amqf_size(),
1237+
amqf_entries: amqf.len(),
12391238
key_compression_dictionary_size: entry
12401239
.key_compression_dictionary_length(),
12411240
value_compression_dictionary_size: entry
@@ -1273,8 +1272,8 @@ pub struct MetaFileEntryInfo {
12731272
pub sequence_number: u32,
12741273
pub min_hash: u64,
12751274
pub max_hash: u64,
1276-
pub aqmf_size: u32,
1277-
pub aqmf_entries: usize,
1275+
pub amqf_size: u32,
1276+
pub amqf_entries: usize,
12781277
pub sst_size: u64,
12791278
pub key_compression_dictionary_size: u16,
12801279
pub value_compression_dictionary_size: u16,

0 commit comments

Comments
 (0)