Skip to content

Commit eea6ebc

Browse files
feat: Deduplicate tagged blobs (#5512)
This PR adds (tagged) blob deduplication support. The main issue it solves is that all external calls to a motoko canister go through candid deserialization and blobs passed as arguments end up as fresh blobs on the motoko heap. Calling multiple times with the same blob as argument creates multiple copies of the same blob. To achieve deduplication, this PR does the following: * in `internals.mo`, it creates a fixed-size hash-table which solves collisions via chaining. * sets up a thin RTS interface to set/get the hash-table allocated in `internals.mo` to be tracked by the RTS layer such that the table is not garbage collected and it survives upgrades. * to achieve deduplication, the hash table stores weak references pointing to the actual objects; once objects are garbage collected, the weak references will point to null. * a thin client interface (in `prim.mo`) to walk the hash table and check which deduplicated blobs are alive/dead and prune the dead ones if neeed.
1 parent ae9b575 commit eea6ebc

26 files changed

+4244
-2971
lines changed

rts/motoko-rts/src/gc/incremental/roots/enhanced.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,16 @@ pub unsafe fn visit_roots<C, V: Fn(&mut C, *mut Value)>(
2929
visit_field(context, location);
3030
}
3131
}
32+
#[cfg(feature = "ic")]
33+
{
34+
// Always visit the dedup table as well.
35+
// Otherwise the dedup table will be garbage collected.
36+
use crate::persistence::get_dedup_table_ptr;
37+
let dedup_table = get_dedup_table_ptr();
38+
if dedup_table.is_non_null_ptr() {
39+
visit_field(context, dedup_table);
40+
}
41+
}
3242
}
3343

3444
#[cfg(feature = "ic")]
@@ -67,6 +77,7 @@ pub unsafe fn initialize_static_variables<M: crate::memory::Memory>(mem: &mut M,
6777
for index in 0..amount {
6878
array.initialize(index, NULL_POINTER, mem);
6979
}
80+
7081
let location = addr_of_mut!(STATIC_VARIABLES) as *mut Value;
7182
write_with_barrier(mem, location, variables);
7283
}

rts/motoko-rts/src/memory.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,3 +113,26 @@ pub unsafe fn weak_ref_is_live<M: Memory>(_mem: &mut M, weak_ref: Value) -> bool
113113
let weak_ref_obj = weak_ref.get_ptr() as *mut WeakRef;
114114
return (*weak_ref_obj).is_live();
115115
}
116+
117+
/// Get the dedup table.
118+
#[enhanced_orthogonal_persistence]
119+
#[ic_mem_fn]
120+
#[cfg(feature = "ic")]
121+
pub unsafe fn get_dedup_table<M: Memory>(_mem: &mut M) -> Value {
122+
use crate::persistence::get_dedup_table_ptr;
123+
*get_dedup_table_ptr()
124+
}
125+
126+
/// Set the dedup table.
127+
#[enhanced_orthogonal_persistence]
128+
#[ic_mem_fn]
129+
#[cfg(feature = "ic")]
130+
pub unsafe fn set_dedup_table<M: Memory>(mem: &mut M, dedup_table: Value) {
131+
use crate::persistence::set_dedup_table_ptr;
132+
if !dedup_table.is_array() {
133+
crate::rts_trap_with(
134+
"set_dedup_table: Invalid dedup table pointer. This is a bug, report to the Motoko team.",
135+
);
136+
}
137+
set_dedup_table_ptr(mem, dedup_table);
138+
}

rts/motoko-rts/src/persistence.rs

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,12 @@ struct PersistentMetadata {
5757
/// A Value representing a pointer to a MarkStack object
5858
/// used to collect weak references during the GC marking phase.
5959
weak_ref_registry: Value,
60+
/// Dedup metadata object. The dedup metadata object is an implementation of a hash table
61+
/// which works like an array of a fixed size, and for collisions we have a linked list.
62+
/// The implementation is done in the Motoko prelude file internals.mo, so it is entirely user-space.
63+
/// We keep a pointer to this here so that we can keep if alive across upgrades.
64+
/// To keep the dedup table live, we need to add it to roots as well.
65+
dedup_table: Value,
6066
}
6167

6268
/// Location of the persistent metadata. Prereserved and fixed forever.
@@ -105,6 +111,8 @@ impl PersistentMetadata {
105111
(*self).upgrade_instructions = 0;
106112
// Initialize the weak reference registry as the null pointer.
107113
(*self).weak_ref_registry = NULL_POINTER;
114+
// Initialize the dedup table as the null pointer.
115+
(*self).dedup_table = NULL_POINTER;
108116
}
109117
}
110118

@@ -122,6 +130,12 @@ pub unsafe fn initialize_memory<M: Memory>() {
122130
// support. We need to initialize the weak reference registry to NULL_POINTER.
123131
(*metadata).weak_ref_registry = NULL_POINTER;
124132
}
133+
// Explicit migration from a version of the RTS without dedup table support.
134+
if (*metadata).dedup_table.get_raw() == 0 {
135+
// This is the first upgrade from a version of the RTS without dedup table support.
136+
// We need to initialize the dedup table to NULL_POINTER.
137+
(*metadata).dedup_table = NULL_POINTER;
138+
}
125139
} else {
126140
metadata.initialize::<M>();
127141
}
@@ -335,3 +349,16 @@ unsafe fn is_weak_ref_registry_null() -> bool {
335349
// thus marking of previous weak ref object is not needed.
336350
(*metadata).weak_ref_registry == NULL_POINTER
337351
}
352+
353+
/// Accessor method for the dedup table.
354+
pub(crate) unsafe fn get_dedup_table_ptr() -> &'static mut Value {
355+
let metadata = PersistentMetadata::get();
356+
&mut (*metadata).dedup_table
357+
}
358+
359+
/// Setter method for the dedup table.
360+
pub(crate) unsafe fn set_dedup_table_ptr<M: Memory>(mem: &mut M, dedup_table: Value) {
361+
let metadata = PersistentMetadata::get();
362+
// Use barrier in case the dedup table is set during a GC phase.
363+
write_with_barrier(mem, &mut (*metadata).dedup_table, dedup_table);
364+
}

0 commit comments

Comments
 (0)