Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
80 commits
Select commit Hold shift + click to select a range
05faf10
add StoreChunkDataPacks
zhangchiqing Sep 29, 2025
a75653e
update store chunk data pack
zhangchiqing Sep 29, 2025
82f14f0
fix test case
zhangchiqing Sep 29, 2025
e690d56
fix for execution state
zhangchiqing Sep 29, 2025
70f3cc6
fix test
zhangchiqing Sep 29, 2025
c998e28
fix builder
zhangchiqing Sep 29, 2025
9ab5e0a
update testutil engine
zhangchiqing Sep 29, 2025
6a4b59e
update chunk data packs and tests
zhangchiqing Sep 30, 2025
17821a5
fix executor tests
zhangchiqing Sep 30, 2025
848c9e7
fix lint
zhangchiqing Sep 30, 2025
fe584bd
update comments
zhangchiqing Sep 30, 2025
cbc5873
update comments in execution state
zhangchiqing Sep 30, 2025
936f38b
refactor BatchRemove
zhangchiqing Sep 30, 2025
7ad596e
add comments for BatchRemove
zhangchiqing Sep 30, 2025
e875957
Merge branch 'leo/refactor-insert-chunk-data-pack' into leo/refactor-…
zhangchiqing Sep 30, 2025
044325d
use two databases for chunk data pack tests
zhangchiqing Sep 30, 2025
d8e884d
add test cases for BatchRemove
zhangchiqing Sep 30, 2025
24744c5
update StoreChunkDataPack.Equals
zhangchiqing Sep 30, 2025
9d287d6
fix pruner tests
zhangchiqing Sep 30, 2025
be6f1ce
update mocks
zhangchiqing Sep 30, 2025
5e0e6a1
fix tests
zhangchiqing Sep 30, 2025
c37af9c
update comments
zhangchiqing Sep 30, 2025
7fa1ef6
update tests
zhangchiqing Sep 30, 2025
d22661a
Merge branch 'master' into leo/refactor-stored-chunk-data-pack
zhangchiqing Oct 1, 2025
58cd332
Merge branch 'master' into leo/refactor-stored-chunk-data-pack
zhangchiqing Oct 2, 2025
8dbfdc1
fix integration tests
zhangchiqing Oct 2, 2025
9d7cddd
fix tests
zhangchiqing Oct 2, 2025
792c69f
fix tests
zhangchiqing Oct 2, 2025
d7a2241
Merge branch 'master' into leo/refactor-stored-chunk-data-pack
zhangchiqing Oct 2, 2025
506dd4c
Merge branch 'master' into leo/refactor-stored-chunk-data-pack
zhangchiqing Oct 2, 2025
64a1cbd
revert tests
zhangchiqing Oct 3, 2025
4a4f529
update comments
zhangchiqing Oct 3, 2025
af1d4da
minor documentation extensions.
AlexHentschel Oct 7, 2025
6389c09
minor punctuation improvements
AlexHentschel Oct 8, 2025
944e733
documented that non-existent keys are no-ops for remove operations
AlexHentschel Oct 8, 2025
f8abc95
clarified documentation
AlexHentschel Oct 8, 2025
bd59da0
minor polishing of error documentation
AlexHentschel Oct 8, 2025
b83837d
alex/suggested_chunk-data-pack-id
AlexHentschel Oct 10, 2025
82022dc
Revert "alex/suggested_chunk-data-pack-id"
AlexHentschel Oct 10, 2025
bb7a347
added suggestions for Chunk Data Pack ID computation
AlexHentschel Oct 10, 2025
c585f6b
Merge pull request #8033 from onflow/alex/suggested_chunk-data-pack-id
zhangchiqing Oct 10, 2025
be3df47
Apply suggestions from code review
zhangchiqing Oct 10, 2025
9467928
Apply suggestions from code review
zhangchiqing Oct 10, 2025
944cfc6
update metrics
zhangchiqing Oct 10, 2025
c4e9b15
address review comments
zhangchiqing Oct 10, 2025
fb47317
Apply suggestions from code review
zhangchiqing Oct 11, 2025
8f75eaa
Apply suggestions from code review
zhangchiqing Oct 11, 2025
74ceef6
Apply suggestions from code review
zhangchiqing Oct 11, 2025
050ac83
Apply suggestions from code review
zhangchiqing Oct 11, 2025
704e30b
rename
zhangchiqing Oct 11, 2025
49cc4e9
rename
zhangchiqing Oct 11, 2025
c4ab85d
rename prefix
zhangchiqing Oct 11, 2025
5d63b5f
refactor protocol db variable
zhangchiqing Oct 11, 2025
ed453bb
Apply suggestions from code review
zhangchiqing Oct 11, 2025
96ebd52
rename batch
zhangchiqing Oct 11, 2025
c4cd113
update comments
zhangchiqing Oct 11, 2025
8da12cf
Apply suggestions from code review
zhangchiqing Oct 11, 2025
d0a810b
fix lint
zhangchiqing Oct 11, 2025
d9abbbc
Merge remote-tracking branch 'origin/leo/refactor-stored-chunk-data-p…
zhangchiqing Oct 11, 2025
fbf4612
fix lint
zhangchiqing Oct 11, 2025
903d24f
remove chunk data pack
zhangchiqing Oct 11, 2025
08fee4f
rename to FromUntrustedChunkDataPack
zhangchiqing Oct 11, 2025
50fd2d9
fix linter
zhangchiqing Oct 11, 2025
0d924c1
refactor rollback executed height
zhangchiqing Oct 11, 2025
713f6c9
fix tests
zhangchiqing Oct 11, 2025
8ebaa82
rename stored chunk data pack ids to chunk data pack ids
zhangchiqing Oct 11, 2025
72259e4
renaming to chunk data pack id
zhangchiqing Oct 11, 2025
1960afc
add back chunk data pack stored.BatchRemove
zhangchiqing Oct 11, 2025
22f5655
update mocks
zhangchiqing Oct 11, 2025
613f49c
reuse the original codeChunkDataPack prefix
zhangchiqing Oct 11, 2025
06a47ba
Merge branch 'master' into leo/refactor-stored-chunk-data-pack
zhangchiqing Oct 11, 2025
ed315a9
use chunk data pack constructor
zhangchiqing Oct 11, 2025
94a3825
rename
zhangchiqing Oct 11, 2025
ae9aaad
update comments
zhangchiqing Oct 11, 2025
36a9e08
update comments
zhangchiqing Oct 11, 2025
a6ccf29
update comments for StoredChunkDataPack.Equals
zhangchiqing Oct 11, 2025
2a8093e
update mocks
zhangchiqing Oct 11, 2025
39ff4a5
update storage tests
zhangchiqing Oct 11, 2025
52b05be
skip the chunk data pack roll back if storing failed
zhangchiqing Oct 15, 2025
28a3fae
Merge branch 'master' into leo/refactor-stored-chunk-data-pack
j1010001 Oct 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion cmd/execution_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -762,8 +762,12 @@ func (exeNode *ExecutionNode) LoadExecutionState(
}
return nil
})

chunkDB := pebbleimpl.ToDB(chunkDataPackDB)
storedChunkDataPacks := store.NewStoredChunkDataPacks(
node.Metrics.Cache, chunkDB, exeNode.exeConf.chunkDataPackCacheSize)
chunkDataPacks := store.NewChunkDataPacks(node.Metrics.Cache,
pebbleimpl.ToDB(chunkDataPackDB), exeNode.collections, exeNode.exeConf.chunkDataPackCacheSize)
chunkDB, storedChunkDataPacks, exeNode.collections, exeNode.exeConf.chunkDataPackCacheSize)

getLatestFinalized := func() (uint64, error) {
final, err := node.State.Final().Head()
Expand Down
3 changes: 2 additions & 1 deletion cmd/util/cmd/read-badger/cmd/chunk_data_pack.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,9 @@ var chunkDataPackCmd = &cobra.Command{

metrics := metrics.NewNoopCollector()
collections := store.NewCollections(db, store.NewTransactions(metrics, db))
storedChunkDataPacks := store.NewStoredChunkDataPacks(metrics, db, 1)
chunkDataPacks := store.NewChunkDataPacks(metrics,
db, collections, 1)
db, storedChunkDataPacks, collections, 1)

log.Info().Msgf("getting chunk data pack by chunk id: %v", chunkID)
chunkDataPack, err := chunkDataPacks.ByChunkID(chunkID)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,16 +87,14 @@ func runE(*cobra.Command, []string) error {
return fmt.Errorf("could not open chunk data pack DB at %v: %w", flagChunkDataPackDir, err)
}
chunkDataPacksDB := pebbleimpl.ToDB(chunkDataPacksPebbleDB)
chunkDataPacks := store.NewChunkDataPacks(metrics, chunkDataPacksDB, collections, 1000)
chunkBatch := chunkDataPacksDB.NewBatch()
defer chunkBatch.Close()

writeBatch := db.NewBatch()
defer writeBatch.Close()

err = removeExecutionResultsFromHeight(
writeBatch,
chunkBatch,
storedChunkDataPacks := store.NewStoredChunkDataPacks(metrics, chunkDataPacksDB, 1000)
chunkDataPacks := store.NewChunkDataPacks(metrics, chunkDataPacksDB, storedChunkDataPacks, collections, 1000)
protocolDBBatch := db.NewBatch()
defer protocolDBBatch.Close()

// collect chunk IDs to be removed
chunkIDs, err := removeExecutionResultsFromHeight(
protocolDBBatch,
state,
transactionResults,
commits,
Expand All @@ -112,12 +110,19 @@ func runE(*cobra.Command, []string) error {
}

// remove chunk data packs first, because otherwise the index to find chunk data pack will be removed.
err = chunkBatch.Commit()
if err != nil {
return fmt.Errorf("could not commit chunk batch at %v: %w", flagHeight, err)
if len(chunkIDs) > 0 {
chunkDataPackIDs, err := chunkDataPacks.BatchRemove(chunkIDs, protocolDBBatch)
if err != nil {
return fmt.Errorf("could not remove chunk data packs at %v: %w", flagHeight, err)
}

err = storedChunkDataPacks.Remove(chunkDataPackIDs)
if err != nil {
return fmt.Errorf("could not commit chunk batch at %v: %w", flagHeight, err)
}
Comment on lines +114 to +122
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ repeated removal (?)

chunkDataPacks.BatchRemove internally also calls storedChunkDataPacks.Remove

}

err = writeBatch.Commit()
err = protocolDBBatch.Commit()
if err != nil {
return fmt.Errorf("could not flush write batch at %v: %w", flagHeight, err)
}
Expand All @@ -141,8 +146,7 @@ func runE(*cobra.Command, []string) error {
// use badger instances directly instead of stroage interfaces so that the interface don't
// need to include the Remove methods
Comment on lines 146 to 147
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this doc could use an update, please.

func removeExecutionResultsFromHeight(
writeBatch storage.Batch,
chunkBatch storage.Batch,
protocolDBBatch storage.Batch,
protoState protocol.State,
transactionResults storage.TransactionResults,
commits storage.Commits,
Expand All @@ -151,40 +155,42 @@ func removeExecutionResultsFromHeight(
myReceipts storage.MyExecutionReceipts,
events storage.Events,
serviceEvents storage.ServiceEvents,
fromHeight uint64) error {
fromHeight uint64) ([]flow.Identifier, error) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I prefer to put the return value on the next line. Thereby, the function body is visually separated from eh header via a line without indentation. It helps human brains to quicker structure the visually seen data according to broad structural information (color, indentation, etc)

Suggested change
fromHeight uint64) ([]flow.Identifier, error) {
fromHeight uint64,
) ([]flow.Identifier, error) {

log.Info().Msgf("removing results for blocks from height: %v", fromHeight)

root := protoState.Params().FinalizedRoot()

if fromHeight <= root.Height {
return fmt.Errorf("can only remove results for block above root block. fromHeight: %v, rootHeight: %v", fromHeight, root.Height)
return nil, fmt.Errorf("can only remove results for block above root block. fromHeight: %v, rootHeight: %v", fromHeight, root.Height)
}

final, err := protoState.Final().Head()
if err != nil {
return fmt.Errorf("could get not finalized height: %w", err)
return nil, fmt.Errorf("could get not finalized height: %w", err)
}

if fromHeight > final.Height {
return fmt.Errorf("could not remove results for unfinalized height: %v, finalized height: %v", fromHeight, final.Height)
return nil, fmt.Errorf("could not remove results for unfinalized height: %v, finalized height: %v", fromHeight, final.Height)
}

finalRemoved := 0
total := int(final.Height-fromHeight) + 1
var allChunkIDs []flow.Identifier

// removing for finalized blocks
for height := fromHeight; height <= final.Height; height++ {
head, err := protoState.AtHeight(height).Head()
if err != nil {
return fmt.Errorf("could not get header at height: %w", err)
return nil, fmt.Errorf("could not get header at height: %w", err)
}

blockID := head.ID()

err = removeForBlockID(writeBatch, chunkBatch, commits, transactionResults, results, chunkDataPacks, myReceipts, events, serviceEvents, blockID)
chunkIDs, err := removeForBlockID(protocolDBBatch, commits, transactionResults, results, chunkDataPacks, myReceipts, events, serviceEvents, blockID)
if err != nil {
return fmt.Errorf("could not remove result for finalized block: %v, %w", blockID, err)
return nil, fmt.Errorf("could not remove result for finalized block: %v, %w", blockID, err)
}
allChunkIDs = append(allChunkIDs, chunkIDs...)

finalRemoved++
log.Info().Msgf("result at height %v has been removed. progress (%v/%v)", height, finalRemoved, total)
Expand All @@ -193,18 +199,18 @@ func removeExecutionResultsFromHeight(
// removing for pending blocks
pendings, err := protoState.Final().Descendants()
if err != nil {
return fmt.Errorf("could not get pending block: %w", err)
return nil, fmt.Errorf("could not get pending block: %w", err)
}

pendingRemoved := 0
total = len(pendings)

for _, pending := range pendings {
err = removeForBlockID(writeBatch, chunkBatch, commits, transactionResults, results, chunkDataPacks, myReceipts, events, serviceEvents, pending)

chunkIDs, err := removeForBlockID(protocolDBBatch, commits, transactionResults, results, chunkDataPacks, myReceipts, events, serviceEvents, pending)
if err != nil {
return fmt.Errorf("could not remove result for pending block %v: %w", pending, err)
return nil, fmt.Errorf("could not remove result for pending block %v: %w", pending, err)
}
allChunkIDs = append(allChunkIDs, chunkIDs...)

pendingRemoved++
log.Info().Msgf("result for pending block %v has been removed. progress (%v/%v) ", pending, pendingRemoved, total)
Expand All @@ -213,15 +219,14 @@ func removeExecutionResultsFromHeight(
log.Info().Msgf("removed height from %v. removed for %v finalized blocks, and %v pending blocks",
fromHeight, finalRemoved, pendingRemoved)

return nil
return allChunkIDs, nil
}

// removeForBlockID remove block execution related data for a given block.
// All data to be removed will be removed in a batch write.
// It bubbles up any error encountered
func removeForBlockID(
writeBatch storage.Batch,
chunkBatch storage.Batch,
protocolDBBatch storage.Batch,
commits storage.Commits,
transactionResults storage.TransactionResults,
results storage.ExecutionResults,
Expand All @@ -230,74 +235,70 @@ func removeForBlockID(
events storage.Events,
serviceEvents storage.ServiceEvents,
blockID flow.Identifier,
) error {
) ([]flow.Identifier, error) {
result, err := results.ByBlockID(blockID)
if errors.Is(err, storage.ErrNotFound) {
log.Info().Msgf("result not found for block %v", blockID)
return nil
return nil, nil
}

if err != nil {
return fmt.Errorf("could not find result for block %v: %w", blockID, err)
return nil, fmt.Errorf("could not find result for block %v: %w", blockID, err)
}

chunkIDs := make([]flow.Identifier, 0, len(result.Chunks))
for _, chunk := range result.Chunks {
chunkID := chunk.ID()
// remove chunk data pack
err := chunks.BatchRemove(chunkID, chunkBatch)
if err != nil {
return fmt.Errorf("could not remove chunk id %v for block id %v: %w", chunkID, blockID, err)
}

chunkIDs = append(chunkIDs, chunkID)
}

// remove commits
err = commits.BatchRemoveByBlockID(blockID, writeBatch)
err = commits.BatchRemoveByBlockID(blockID, protocolDBBatch)
if err != nil {
if errors.Is(err, storage.ErrNotFound) {
return fmt.Errorf("could not remove by block ID %v: %w", blockID, err)
return nil, fmt.Errorf("could not remove by block ID %v: %w", blockID, err)
}

log.Warn().Msgf("statecommitment not found for block %v", blockID)
}

// remove transaction results
err = transactionResults.BatchRemoveByBlockID(blockID, writeBatch)
err = transactionResults.BatchRemoveByBlockID(blockID, protocolDBBatch)
if err != nil {
return fmt.Errorf("could not remove transaction results by BlockID %v: %w", blockID, err)
return nil, fmt.Errorf("could not remove transaction results by BlockID %v: %w", blockID, err)
}

// remove own execution results index
err = myReceipts.BatchRemoveIndexByBlockID(blockID, writeBatch)
err = myReceipts.BatchRemoveIndexByBlockID(blockID, protocolDBBatch)
if err != nil {
if !errors.Is(err, storage.ErrNotFound) {
return fmt.Errorf("could not remove own receipt by BlockID %v: %w", blockID, err)
return nil, fmt.Errorf("could not remove own receipt by BlockID %v: %w", blockID, err)
}

log.Warn().Msgf("own receipt not found for block %v", blockID)
}

// remove events
err = events.BatchRemoveByBlockID(blockID, writeBatch)
err = events.BatchRemoveByBlockID(blockID, protocolDBBatch)
if err != nil {
return fmt.Errorf("could not remove events by BlockID %v: %w", blockID, err)
return nil, fmt.Errorf("could not remove events by BlockID %v: %w", blockID, err)
}

// remove service events
err = serviceEvents.BatchRemoveByBlockID(blockID, writeBatch)
err = serviceEvents.BatchRemoveByBlockID(blockID, protocolDBBatch)
if err != nil {
return fmt.Errorf("could not remove service events by blockID %v: %w", blockID, err)
return nil, fmt.Errorf("could not remove service events by blockID %v: %w", blockID, err)
}

// remove execution result index
err = results.BatchRemoveIndexByBlockID(blockID, writeBatch)
err = results.BatchRemoveIndexByBlockID(blockID, protocolDBBatch)
if err != nil {
if !errors.Is(err, storage.ErrNotFound) {
return fmt.Errorf("could not remove result by BlockID %v: %w", blockID, err)
return nil, fmt.Errorf("could not remove result by BlockID %v: %w", blockID, err)
}

log.Warn().Msgf("result not found for block %v", blockID)
}

Copy link
Member

@AlexHentschel AlexHentschel Oct 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[This might be more of a suggestion for your follow up PR you mentioned above.]

I would include the chunk data pack index here:

  • we first read the IDs of chunk data packs (this logic)
  • essentially then we remove the index (these lines)
  • In other words, we would be only remove the chunk data pack index chunkID -> chunkDataPackID here.
  • and we would return the IDs of the chunk data packs encountered.

return nil
return chunkIDs, nil
}
Loading
Loading