Skip to content

Commit 6b45bf2

Browse files
committed
feat: Disable wal_autocheckpoint
From https://www.sqlite.org/wal.html: > The default strategy is to allow successive write transactions to grow the WAL until the WAL becomes about 1000 pages in size, then to run a checkpoint operation for each subsequent COMMIT until the WAL is reset to be smaller than 1000 pages. By default, the checkpoint will be run automatically by the same thread that does the COMMIT that pushes the WAL over its size limit. This has the effect of causing most COMMIT operations to be very fast but an occasional COMMIT (those that trigger a checkpoint) to be much slower. And while autocheckpoint runs in the `PASSIVE` mode and thus doesn't block concurrent readers and writers, in our design it blocks writers because it's done under `write_mutex` locked and thus may cause the app to stuck for noticeable time. Let's disable autocheckpointing then, we can't rely on it anyway. Instead, run a `TRUNCATE` checkpoint from `inbox_loop()` if the WAL is >= 4K pages and a `PASSIVE` checkpoint otherwise.
1 parent 782a4dd commit 6b45bf2

File tree

4 files changed

+30
-9
lines changed

4 files changed

+30
-9
lines changed

src/context.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -410,7 +410,7 @@ impl Context {
410410

411411
/// Changes encrypted database passphrase.
412412
pub async fn change_passphrase(&self, passphrase: String) -> Result<()> {
413-
self.sql.change_passphrase(passphrase).await?;
413+
self.sql.change_passphrase(self, passphrase).await?;
414414
Ok(())
415415
}
416416

src/scheduler.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ use crate::location;
2525
use crate::log::{LogExt, error, info, warn};
2626
use crate::message::MsgId;
2727
use crate::smtp::{Smtp, send_smtp_messages};
28-
use crate::sql;
28+
use crate::sql::{self, Sql};
2929
use crate::stats::maybe_send_stats;
3030
use crate::tools::{self, duration_to_str, maybe_add_time_based_warnings, time, time_elapsed};
3131
use crate::{constants, stats};
@@ -498,6 +498,11 @@ async fn inbox_fetch_idle(ctx: &Context, imap: &mut Imap, mut session: Session)
498498
last_housekeeping_time.saturating_add(constants::HOUSEKEEPING_PERIOD);
499499
if next_housekeeping_time <= time() {
500500
sql::housekeeping(ctx).await.log_err(ctx).ok();
501+
} else {
502+
let force_truncate = false;
503+
if let Err(err) = Sql::wal_checkpoint(ctx, force_truncate).await {
504+
warn!(ctx, "wal_checkpoint() failed: {err:#}.");
505+
}
501506
}
502507
}
503508
Err(err) => {

src/sql.rs

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,11 @@ impl Sql {
298298
/// The database must already be encrypted and the passphrase cannot be empty.
299299
/// It is impossible to turn encrypted database into unencrypted
300300
/// and vice versa this way, use import/export for this.
301-
pub async fn change_passphrase(&self, passphrase: String) -> Result<()> {
301+
pub(crate) async fn change_passphrase(
302+
&self,
303+
_context: &Context,
304+
passphrase: String,
305+
) -> Result<()> {
302306
let mut lock = self.pool.write().await;
303307

304308
let pool = lock.take().context("SQL connection pool is not open")?;
@@ -683,8 +687,12 @@ impl Sql {
683687
&self.config_cache
684688
}
685689

686-
/// Runs a checkpoint operation in TRUNCATE mode, so the WAL file is truncated to 0 bytes.
687-
pub(crate) async fn wal_checkpoint(context: &Context) -> Result<()> {
690+
/// Runs a WAL checkpoint operation.
691+
///
692+
/// * `force_truncate` - Force TRUNCATE mode to truncate the WAL file to 0 bytes, otherwise only
693+
/// run PASSIVE mode if the WAL isn't too large. NB: Truncating blocks all db connections for
694+
/// some time.
695+
pub(crate) async fn wal_checkpoint(context: &Context, force_truncate: bool) -> Result<()> {
688696
let t_start = Time::now();
689697
let lock = context.sql.pool.read().await;
690698
let Some(pool) = lock.as_ref() else {
@@ -695,13 +703,19 @@ impl Sql {
695703
// Do as much work as possible without blocking anybody.
696704
let query_only = true;
697705
let conn = pool.get(query_only).await?;
698-
tokio::task::block_in_place(|| {
706+
let pages_total = tokio::task::block_in_place(|| {
699707
// Execute some transaction causing the WAL file to be opened so that the
700708
// `wal_checkpoint()` can proceed, otherwise it fails when called the first time,
701709
// see https://sqlite.org/forum/forumpost/7512d76a05268fc8.
702710
conn.query_row("PRAGMA table_list", [], |_| Ok(()))?;
703-
conn.query_row("PRAGMA wal_checkpoint(PASSIVE)", [], |_| Ok(()))
711+
conn.query_row("PRAGMA wal_checkpoint(PASSIVE)", [], |row| {
712+
let pages_total: i64 = row.get(1)?;
713+
Ok(pages_total)
714+
})
704715
})?;
716+
if !force_truncate && pages_total < 4096 {
717+
return Ok(());
718+
}
705719

706720
// Kick out writers.
707721
const _: () = assert!(Sql::N_DB_CONNECTIONS > 1, "Deadlock possible");
@@ -772,6 +786,7 @@ fn new_connection(path: &Path, passphrase: &str) -> Result<Connection> {
772786
PRAGMA busy_timeout = 0; -- fail immediately
773787
PRAGMA soft_heap_limit = 8388608; -- 8 MiB limit, same as set in Android SQLiteDatabase.
774788
PRAGMA foreign_keys=on;
789+
PRAGMA wal_autocheckpoint=N;
775790
",
776791
)?;
777792

@@ -880,7 +895,8 @@ pub async fn housekeeping(context: &Context) -> Result<()> {
880895
// bigger than 200M) and also make sure we truncate the WAL periodically. Auto-checkponting does
881896
// not normally truncate the WAL (unless the `journal_size_limit` pragma is set), see
882897
// https://www.sqlite.org/wal.html.
883-
if let Err(err) = Sql::wal_checkpoint(context).await {
898+
let force_truncate = true;
899+
if let Err(err) = Sql::wal_checkpoint(context, force_truncate).await {
884900
warn!(context, "wal_checkpoint() failed: {err:#}.");
885901
debug_assert!(false);
886902
}

src/sql/sql_tests.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ async fn test_sql_change_passphrase() -> Result<()> {
263263
sql.open(&t, "foo".to_string())
264264
.await
265265
.context("failed to open the database second time")?;
266-
sql.change_passphrase("bar".to_string())
266+
sql.change_passphrase(&t, "bar".to_string())
267267
.await
268268
.context("failed to change passphrase")?;
269269

0 commit comments

Comments
 (0)