Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -492,13 +492,17 @@ localnet-debug: localnet-stop localnet-build-dlv localnet-build-nodes

.PHONY: localnet-start localnet-stop localnet-debug localnet-build-env localnet-build-dlv localnet-build-nodes

test-system: build-v53 build
build-system-test-current: build
mkdir -p ./tests/systemtests/binaries/
cp $(BUILDDIR)/simd ./tests/systemtests/binaries/

test-system: build-v53 build-system-test-current
mkdir -p ./tests/systemtests/binaries/
cp $(BUILDDIR)/simd ./tests/systemtests/binaries/
mkdir -p ./tests/systemtests/binaries/v0.53
mv $(BUILDDIR)/simdv53 ./tests/systemtests/binaries/v0.53/simd
$(MAKE) -C tests/systemtests test
.PHONY: test-system
.PHONY: test-system build-system-test-current

# build-v53 checks out the v0.53.x branch, builds the binary, and renames it to simdv53.
build-v53:
Expand Down
2 changes: 2 additions & 0 deletions client/v2/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ require (
github.com/99designs/keyring v1.2.1 // indirect
github.com/DataDog/datadog-go v3.2.0+incompatible // indirect
github.com/DataDog/zstd v1.5.7 // indirect
github.com/alitto/pond/v2 v2.5.0 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/bgentry/speakeasy v0.2.0 // indirect
github.com/bytedance/sonic v1.14.0 // indirect
Expand Down Expand Up @@ -62,6 +63,7 @@ require (
github.com/dgraph-io/ristretto/v2 v2.1.0 // indirect
github.com/dustin/go-humanize v1.0.1 // indirect
github.com/dvsekhvalnov/jose2go v1.6.0 // indirect
github.com/edsrzf/mmap-go v1.0.0 // indirect
github.com/emicklei/dot v1.8.0 // indirect
github.com/fatih/color v1.18.0 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
Expand Down
3 changes: 3 additions & 0 deletions client/v2/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuy
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho=
github.com/alitto/pond/v2 v2.5.0 h1:vPzS5GnvSDRhWQidmj2djHllOmjFExVFbDGCw1jdqDw=
github.com/alitto/pond/v2 v2.5.0/go.mod h1:xkjYEgQ05RSpWdfSd1nM3OVv7TBhLdy7rMp3+2Nq+yE=
github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
github.com/apache/thrift v0.12.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ=
github.com/apache/thrift v0.13.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ=
Expand Down Expand Up @@ -199,6 +201,7 @@ github.com/dvsekhvalnov/jose2go v1.6.0/go.mod h1:QsHjhyTlD/lAVqn/NSbVZmSCGeDehTB
github.com/eapache/go-resiliency v1.1.0/go.mod h1:kFI+JgMyC7bLPUVY133qvEBtVayf5mFgVsvEsIPBvNs=
github.com/eapache/go-xerial-snappy v0.0.0-20180814174437-776d5712da21/go.mod h1:+020luEh2TKB4/GOp8oxxtq0Daoen/Cii55CzbTV6DU=
github.com/eapache/queue v1.1.0/go.mod h1:6eCeP0CKFpHLu8blIFXhExK/dRa7WDZfr6jVFPTqq+I=
github.com/edsrzf/mmap-go v1.0.0 h1:CEBF7HpRnUCSJgGUb5h1Gm7e3VkmVDrR8lvWVLtrOFw=
github.com/edsrzf/mmap-go v1.0.0/go.mod h1:YO35OhQPt3KJa3ryjFM5Bs14WD66h8eGKpfaBNrHW5M=
github.com/emicklei/dot v1.8.0 h1:HnD60yAKFAevNeT+TPYr9pb8VB9bqdeSo0nzwIW6IOI=
github.com/emicklei/dot v1.8.0/go.mod h1:DeV7GvQtIw4h2u73RKBkkFdvVAz0D9fzeJrgPW6gy/s=
Expand Down
6 changes: 4 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ require (
cosmossdk.io/store v1.3.0-beta.0
cosmossdk.io/x/tx v0.14.0
github.com/99designs/keyring v1.2.1
github.com/alitto/pond/v2 v2.5.0
github.com/bgentry/speakeasy v0.2.0
github.com/bits-and-blooms/bitset v1.24.1
github.com/chzyer/readline v1.5.1
Expand All @@ -25,8 +26,10 @@ require (
github.com/cosmos/go-bip39 v1.0.0
github.com/cosmos/gogogateway v1.2.0
github.com/cosmos/gogoproto v1.7.0
github.com/cosmos/iavl v1.2.6
github.com/cosmos/ledger-cosmos-go v0.16.0
github.com/decred/dcrd/dcrec/secp256k1/v4 v4.4.0
github.com/edsrzf/mmap-go v1.0.0
github.com/golang/protobuf v1.5.4
github.com/google/go-cmp v0.7.0
github.com/google/gofuzz v1.2.0
Expand Down Expand Up @@ -59,6 +62,7 @@ require (
github.com/tidwall/btree v1.8.1
go.uber.org/mock v0.6.0
golang.org/x/crypto v0.43.0
golang.org/x/exp v0.0.0-20250506013437-ce4c2cf36ca6
golang.org/x/sync v0.17.0
google.golang.org/genproto/googleapis/api v0.0.0-20250818200422-3122310a409c
google.golang.org/grpc v1.76.0
Expand Down Expand Up @@ -118,7 +122,6 @@ require (
github.com/cockroachdb/redact v1.1.6 // indirect
github.com/cockroachdb/tokenbucket v0.0.0-20250429170803-42689b6311bb // indirect
github.com/cometbft/cometbft-db v0.14.1 // indirect
github.com/cosmos/iavl v1.2.6 // indirect
github.com/cosmos/ics23/go v0.11.0 // indirect
github.com/danieljoos/wincred v1.2.2 // indirect
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
Expand Down Expand Up @@ -217,7 +220,6 @@ require (
go.yaml.in/yaml/v2 v2.4.3 // indirect
go.yaml.in/yaml/v3 v3.0.4 // indirect
golang.org/x/arch v0.21.0 // indirect
golang.org/x/exp v0.0.0-20250506013437-ce4c2cf36ca6 // indirect
golang.org/x/net v0.46.0 // indirect
golang.org/x/oauth2 v0.31.0 // indirect
golang.org/x/sys v0.37.0 // indirect
Expand Down
3 changes: 3 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuy
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho=
github.com/alitto/pond/v2 v2.5.0 h1:vPzS5GnvSDRhWQidmj2djHllOmjFExVFbDGCw1jdqDw=
github.com/alitto/pond/v2 v2.5.0/go.mod h1:xkjYEgQ05RSpWdfSd1nM3OVv7TBhLdy7rMp3+2Nq+yE=
github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
github.com/apache/thrift v0.12.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ=
github.com/apache/thrift v0.13.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ=
Expand Down Expand Up @@ -274,6 +276,7 @@ github.com/dvsekhvalnov/jose2go v1.6.0/go.mod h1:QsHjhyTlD/lAVqn/NSbVZmSCGeDehTB
github.com/eapache/go-resiliency v1.1.0/go.mod h1:kFI+JgMyC7bLPUVY133qvEBtVayf5mFgVsvEsIPBvNs=
github.com/eapache/go-xerial-snappy v0.0.0-20180814174437-776d5712da21/go.mod h1:+020luEh2TKB4/GOp8oxxtq0Daoen/Cii55CzbTV6DU=
github.com/eapache/queue v1.1.0/go.mod h1:6eCeP0CKFpHLu8blIFXhExK/dRa7WDZfr6jVFPTqq+I=
github.com/edsrzf/mmap-go v1.0.0 h1:CEBF7HpRnUCSJgGUb5h1Gm7e3VkmVDrR8lvWVLtrOFw=
github.com/edsrzf/mmap-go v1.0.0/go.mod h1:YO35OhQPt3KJa3ryjFM5Bs14WD66h8eGKpfaBNrHW5M=
github.com/emicklei/dot v1.8.0 h1:HnD60yAKFAevNeT+TPYr9pb8VB9bqdeSo0nzwIW6IOI=
github.com/emicklei/dot v1.8.0/go.mod h1:DeV7GvQtIw4h2u73RKBkkFdvVAz0D9fzeJrgPW6gy/s=
Expand Down
147 changes: 147 additions & 0 deletions iavl/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
# iavl

## Code Organization

### Node Types, Memory & Disk Layouts

Much of this code was influenced by memiavl and sometimes even copied directly from it.
The `NodeID` design is mainly from iavl/v2.
The `NodePointer` design introduces the possibility of doing node eviction similar to iavl/v2,
but with non-blocking thread safety using `atomic.Pointer` so that eviction can happen in the background without
blocking reads or writes.

* `node.go`: the `Node` interface which all 3 node types implement (`MemNode`, `BranchPersisted`, `LeafPersisted`)
* `mem_node.go`: in-memory node structure, new nodes always use the `MemNode` type
* `node_pointer.go`: all child references are wrapped in `NodePointer` which can point to either an in-memory node or an
on-disk node, or both (if the node has been written and node evicted)
* `node_id.go`: defines `NodeID` (version + index + leaf) and `NodeRef` (either a `NodeID` or a node offset in the
changeset file)
* `branch_layout.go`: defines the on-disk layout for branch nodes
* `leaf_layout.go`: defines the on-disk layout for leaf nodes
* `branch_persisted.go`: a wrapper around `BranchLayout` which implements the `Node` interface and also tracks a store
reference
* `leaf_persisted.go`: a wrapper around `LeafLayout` which implements the `Node` interface and also tracks a store
reference

### Tree Management & Updating

For managing tree state, we define two core types `Tree` and `CommitTree`.
We directly read from and apply updates to `Tree`s but these updates only affect the persistent state of the tree if
they are applied and committed to a `CommitTree`.

* `tree.go`: a `Tree` struct which implements the Cosmos SDK `KVStore` interface and implements the key methods (get,
set,
delete, commit, etc). `Tree`s can be mutated, and changes can either be committed or discarded. This is essentially an
in-memory reference to a tree at a specific version that could be used read-only or mutated ad hoc without affecting
the underlying persistent tree (say for instance in `CheckTx`).
* `commit_tree.go`: defines the `CommitTree` structure which manages the persistent tree state. Using `CommitTree` you
can
create new mutable `Tree` instance using `Branch` and decide to `Apply` its changes to the persistent tree or discard
them. Calling `Commit` flushes changes to the underlying `TreeStore` which does all of the on disk state management
and cleanup. In `CommitTree` we also have an asynchronous WAL writing process (optional) and maintain a background
eviction process.
* `update.go`: types for batching changes which can later be commited or discarded
* `node_update.go` and : the code for setting and deleting nodes and doing tree rebalancing, adapted from memiavl and
iavl/v1
* `node_hash.go`: code for computing node hashes, adapted from memiavl and iavl/v1
* `iterator.go`: implements the Cosmos SDK `Iterator` interface, adapted from memiavl and iavl/v1

### Disk State Management

### Central Coordination

These files are the central core of managing on-disk state across multiple changesets which may be in the process of
being written or compacted. **This is the most complex part of the codebase.**

* `tree_store.go`: code for dispatching read operations to the correct changeset, writing commits to new changesets,
and coordinating background compaction and cleanup of old changesets
* `cleanup.go`: the actual background cleanup and compaction thread

#### Changeset Reading, Writing and Compaction

* `changeset_files.go`: `ChangesetFiles` represents the five files which make up a changeset:
* `kv.log`: all of the key/value pairs in the changeset, and optionally the write-ahead log for replay (this is
configurable)
* `leaves.dat`: an array of `LeafLayout` structs
* `branches.dat`: an array of `BranchLayout` structs
* `verions.dat`: an array of `VersionInfo` structs, one for each version in the changeset
* `info.dat`: a single `ChangesetInfo` struct which tracks metadata about the changeset including the range of
versions
it contains and the number of orphaned nodes
* `changeset.go`: the `Changeset` struct wraps mmap's of the five changeset files and provides
methods for reading nodes from disk and marking them as orphaned. It includes some complex code for safely disposing
of `Changeset` instances because we need to either 1) reopen the memmap to change its size, or 2) close the
`Changeset` because it has been compacted and will be deleted. This is managed using pinning, a reference count, and
atomic booleans to track eviction (the desire to dispose and delete) and disposal (the actual disposal).
* `changeset_writer.go`: code for iteratively writing changesets to disk node by node in post-order traversal order.
Node references can either be by
`NodeID` or offsets (offsets have been disabled due to some unresolved bugs)
* `compactor.go`: code for rewriting one or more changesets into a new compacted changeset, skipping
orphaned nodes and updating offsets as needed (this offset rewrite code is currently buggy and disabled)

#### Helpers

* `version_info.go`: defines the on-disk layout for version info records, which track the root node and other metadata
for
each version
* `changeset_info.go`: defines the on-disk layout for the changeset info record, which tracks metadata
about the entire changeset including version range and number of orphaned nodes
* `kvlog.go`: code for reading key/value pairs from the `kv.log` file
* `kvlog_writer.go`: code for writing key/value pairs to the `kv.log` file, which can be structured as a write-ahead
operation log for replay and crash recovery (reply and recovery aren't implemented yet)
* `mmap.go`: the `MmapFile` mem-map wrapper
* `writer.go`: `FileWriter` and `StructWriter` wrappers for writing raw bytes and structs to files
* `reader.go`: `StructMap` and `NodeMap` wrappers for representing memory-mapped arrays of structs and nodes

### Multi-tree Management

* `multi_tree.go`: wraps multiple `Tree`s into a `MultiTree` which provides a mutable way to write a tree without
committing the changes to the persistent tree immediately (can be discarded)
* `commit_multi_tree.go`: wraps multiple `CommitTree`s into a `CommitMultiTree` which provides a way to create mutable
`MultiTree`s and commit their changes to the underlying persistent trees (or discard them). This can eventually
implement `RootMultiStore` and replace the SDK's store package. `CommitMultiTree` makes the optimization of running
`Commit` in parallel across all `CommitTree`s which could improve performance.

### Options

Options are mantained by the `Options` struct in `options.go`. Many options have a getter which uses a default value if
the option is not set.

The main options we're controlling now are:

* `WriteWAL`: whether we write all updates to the kv-log as a replayable write-ahead log (WAL). If this is enabled we
will fsync the WAL either asynchronously or synchronously (based on the `WalSyncBuffer` option). Enabling WAL could
actually improve performance because we asynchronously write key/value data in advance of `CommitTree.Commit` being
called.
* `EvictDepth`: the depth of the tree beyond which we will evict nodes from memory as soon as they are on disk. This is
the main lever for controlling memory usage. Using more memory could improve performance.
* `RetainVersions`: the number of recent versions to retain when we are compacting. Eventually we also want to enable
some sort of snapshot-based compaction (retaining full trees every N versions).
* `MinCompactionSeconds`: the minimum number of seconds to wait before starting a new compaction run (note that this
currently includes the time it takes to compact).
* `CompactWAL`: whether to compact the WAL when we are compacting changesets. In the future, we can distinguish between
compacting the WAL before our first checkpoint and retaining it after the first checkpoint.
* `ChangesetMaxTarget`: the size of a changeset after which we will roll over to a new changeset for the next version.
* `CompactionMaxTarget`: the target size of a compacted changeset. When adding a new changeset into our compaction will
stay below this number, we will join multiple changesets into a single compacted changeset.
* `CompactionOrphanRatio`: the ratio of orphaned nodes in a changeset beyond which we will trigger it for early
compaction (used together with `CompactionOrphanAge`)
* `CompactionOrphanAge`: the average age of orphaned nodes in a changeset beyond which we will trigger it for early
compaction (used together with `CompactionOrphanRatio`)
* `CompactAfterVersions`: the number of versions after which we will trigger a compaction when any orphans are present,
measured in versions since the last compaction.
* `ReaderUpdateInterval`: when writing multiple versions to a changeset, the number of versions after which we will open
the changeset for reading even if it has not been completed, so that readers can access the latest versions sooner and
flush memory. Set this to a shorter interval if we want to constrain memory usage more tightly and longer if we want
to reduce the number of times memmaps are re-opened for reading.

### Utilities

* `dot_graph.go`: code for exporting trees to Graphviz dot graph format for visualization
* `verify.go`: code for verifying tree integrity

### Tests

* `tree_test.go`: the only tests we have so far. These do, however, use property-based testing so we are generating
random operation sets, applying them to both iavlx and iavl/v1 trees. At each step, we confirm that behavior is
identical, including verification of hashes and verifying that invariants are maintained.
31 changes: 31 additions & 0 deletions iavl/branch_layout.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package iavlx

import (
"fmt"
"unsafe"

Check notice

Code scanning / CodeQL

Sensitive package import Note

Certain system packages contain functions which may be a possible source of non-determinism
)

func init() {
if unsafe.Sizeof(BranchLayout{}) != SizeBranch {
panic(fmt.Sprintf("invalid BranchLayout size: got %d, want %d", unsafe.Sizeof(BranchLayout{}), SizeBranch))
}
}

const (
SizeBranch = 72
)

type BranchLayout struct {
Id NodeID
Left NodeRef
Right NodeRef
KeyOffset uint32
Height uint8
Size uint32 // TODO 5 bytes?
OrphanVersion uint32 // TODO 5 bytes?
Hash [32]byte
}

func (b BranchLayout) ID() NodeID {
return b.Id
}
Loading
Loading