Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
1458074
Prototype qualification testing
jzombie Nov 13, 2025
35cf185
Increase insert batch size (significantly)
jzombie Nov 13, 2025
2524369
Prototype loading status reporting
jzombie Nov 13, 2025
903a9f3
Add sub-step progress monitoring
jzombie Nov 13, 2025
e996129
Increase batch size and add TODO
jzombie Nov 14, 2025
5933f74
Cache foreign key checks
jzombie Nov 14, 2025
09cbfdd
Operate on prebuilt `UniqueKey`s so inserts stop cloning multi-column…
jzombie Nov 14, 2025
b4b00e9
Prototype scoped FK caches
jzombie Nov 14, 2025
88f1cf4
Prototype `write_hints` for efficient data writes
jzombie Nov 14, 2025
6ae1922
Add "Write-Sizing Hints & Loader Tuning" section
jzombie Nov 14, 2025
c79a562
Draft perf plan
jzombie Nov 14, 2025
ad81817
Prototype pager diagnostic reporting
jzombie Nov 14, 2025
9d15daa
Separate table and pager diagnostics
jzombie Nov 14, 2025
1062635
Update w/ progress
jzombie Nov 14, 2025
c8c3892
Refactor diagnostics
jzombie Nov 14, 2025
3f4e5fa
Remove `chrono` crate
jzombie Nov 14, 2025
9b2e374
Prototype integrity check adjustments
jzombie Nov 14, 2025
3208812
Prototype ingestion while skipping integrity checks
jzombie Nov 14, 2025
c99bd34
Re-route TPC-H loader through prepared `InsertPlan`
jzombie Nov 15, 2025
bceefc5
Add TODO
jzombie Nov 15, 2025
15d1962
Add `SqlTypeFamily`
jzombie Nov 15, 2025
1b54920
Display TPC-H loading constraints
jzombie Nov 15, 2025
8d0dde6
Prototype information schema as real table
jzombie Nov 16, 2025
05065fa
Use memory pager for information schema
jzombie Nov 16, 2025
50d9100
Prevent noisy "failed to reload" warnings on information_schema refresh
jzombie Nov 16, 2025
c283faf
Resolve Clippy warnings
jzombie Nov 16, 2025
306ad8e
Continue resolving Clippy warnings
jzombie Nov 16, 2025
d7ae33d
Gate tests and benches behind successful lint gate
jzombie Nov 16, 2025
beaada0
Revert "Gate tests and benches behind successful lint gate"
jzombie Nov 16, 2025
479d986
Add `Int32` sort support
jzombie Nov 16, 2025
dfef25f
Rename existing `ScanOrderTransform::IdentityInt64` for consistency
jzombie Nov 16, 2025
49fa20b
Improve code reuse for primitive ordering
jzombie Nov 16, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
273 changes: 245 additions & 28 deletions Cargo.lock

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ rand = "0.9.1"
rayon = "1.10.0"
regex = "1.12.2"
roaring = "0.11.2"
rust_decimal = "1.39.0"
sqlparser = "0.59.0"
tempfile = "3.23.0"
thiserror = "2.0.17"
Expand All @@ -57,7 +58,7 @@ rustc-hash = "2.1.1"
serde = { version = "1.0.228", features = ["derive"] }
serde_json = "1.0.145"
sqllogictest = "0.28.4"
time = { version = "0.3.44", features = ["parsing"] }
time = { version = "0.3.44", features = ["formatting", "macros", "parsing"] }
tokio = { version = "1.48.0", features = ["macros", "rt-multi-thread"] }
tpchgen = "2.0.1"
tracing = "0.1.41"
Expand Down
4 changes: 4 additions & 0 deletions llkv-column-map/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@
- Maintain the column catalog that maps `LogicalFieldId` to the physical keys storing metadata and data pages.
- Provide gather, scan, and append APIs used by [`llkv-table`](../llkv-table/) and the executor layer.

### Write-Sizing Hints & Loader Tuning

`ColumnStore::write_hints()` exposes the storage layer’s current guidance for chunk sizing, insert batch rows, and variable-width fallbacks. Bulk data loaders should resolve their batch size through this API instead of hard-coding constants, in order to keep memory usage at a minimum and ease write thrashing.

## Logical vs Physical Keys

- `LogicalFieldId` encodes namespace (user data, row-id shadow, MVCC metadata), table ID, and field ID to avoid collisions across tables.
Expand Down
5 changes: 5 additions & 0 deletions llkv-column-map/src/store/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,11 @@ where
})
}

/// Return heuristics that guide upstream writers when sizing batches.
pub fn write_hints(&self) -> ColumnStoreWriteHints {
ColumnStoreWriteHints::from_config(&self.cfg)
}

/// Creates and persists an index for a column.
///
/// Builds the specified index type for all existing data in the column and
Expand Down
3 changes: 3 additions & 0 deletions llkv-column-map/src/store/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,6 @@ use dtype_cache::DTypeCache;

pub mod indexing;
pub use indexing::*;

mod write_hints;
pub use write_hints::ColumnStoreWriteHints;
73 changes: 73 additions & 0 deletions llkv-column-map/src/store/write_hints.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
use super::config::ColumnStoreConfig;
use super::constants::TARGET_CHUNK_BYTES;
use crate::types::RowId;

/// Heuristics that guide callers when sizing write batches for the column store.
///
/// The values are derived from the store's ingest configuration so higher layers can
/// adapt without duplicating storage-level constants. Callers should treat these
/// numbers as soft targets: exceeding the maximum batch rows will be clamped, but
/// smaller batches are always accepted.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct ColumnStoreWriteHints {
/// Target chunk size used when splitting incoming arrays.
pub target_chunk_bytes: usize,
/// Preferred number of rows to buffer per insert before flushing.
pub recommended_insert_batch_rows: usize,
/// Hard ceiling for literal INSERT batches before storage splits them eagerly.
pub max_insert_batch_rows: usize,
/// Fallback slice size for exotic variable-width arrays lacking offset metadata.
pub varwidth_fallback_rows_per_slice: usize,
}

impl ColumnStoreWriteHints {
pub(crate) fn from_config(cfg: &ColumnStoreConfig) -> Self {
let row_id_width = std::mem::size_of::<RowId>().max(1);
let recommended_rows = (TARGET_CHUNK_BYTES / row_id_width).max(1);
let max_rows = recommended_rows.saturating_mul(4).max(recommended_rows);
Self {
target_chunk_bytes: TARGET_CHUNK_BYTES,
recommended_insert_batch_rows: recommended_rows,
max_insert_batch_rows: max_rows,
varwidth_fallback_rows_per_slice: cfg.varwidth_fallback_rows_per_slice,
}
}

/// Clamp a requested batch size to the store's supported envelope.
pub fn clamp_insert_batch_rows(&self, requested_rows: usize) -> usize {
match requested_rows {
0 => 0,
_ => requested_rows.min(self.max_insert_batch_rows),
}
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn from_config_uses_target_chunk_bytes() {
let cfg = ColumnStoreConfig {
varwidth_fallback_rows_per_slice: 2048,
};
let hints = ColumnStoreWriteHints::from_config(&cfg);
let expected_rows = TARGET_CHUNK_BYTES / std::mem::size_of::<RowId>();
assert_eq!(hints.target_chunk_bytes, TARGET_CHUNK_BYTES);
assert_eq!(hints.varwidth_fallback_rows_per_slice, 2048);
assert_eq!(hints.recommended_insert_batch_rows, expected_rows);
assert!(hints.max_insert_batch_rows >= hints.recommended_insert_batch_rows);
}

#[test]
fn clamp_insert_batch_rows_caps_large_values() {
let cfg = ColumnStoreConfig {
varwidth_fallback_rows_per_slice: 512,
};
let hints = ColumnStoreWriteHints::from_config(&cfg);
assert_eq!(hints.clamp_insert_batch_rows(0), 0);
let max = hints.max_insert_batch_rows;
assert_eq!(hints.clamp_insert_batch_rows(max * 10), max);
assert_eq!(hints.clamp_insert_batch_rows(max - 1), max - 1);
}
}
3 changes: 2 additions & 1 deletion llkv-executor/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9894,7 +9894,8 @@ where

let transform = match order_plan.sort_type {
OrderSortType::Native => match column.data_type {
DataType::Int64 => ScanOrderTransform::IdentityInteger,
DataType::Int64 => ScanOrderTransform::IdentityInt64,
DataType::Int32 => ScanOrderTransform::IdentityInt32,
DataType::Utf8 => ScanOrderTransform::IdentityUtf8,
ref other => {
return Err(Error::InvalidArgumentError(format!(
Expand Down
Loading
Loading