Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
57 commits
Select commit Hold shift + click to select a range
5cbb377
add original mtrie implementation as base for comparison
zhangchiqing May 29, 2026
278f069
add payloadless node and trie implementation
zhangchiqing May 29, 2026
df7c91d
remove ExtendHashToHeight and replace baseHash to leafHash in naming
zhangchiqing Jun 18, 2026
dd66c68
add comments for NewMTrie
zhangchiqing Jun 18, 2026
4466ce1
add original tests for payloadless trie
zhangchiqing May 29, 2026
0b4ca27
update payloadless tests
zhangchiqing May 29, 2026
3a39ef5
add original forest and trieCache implementation as base for comparison
zhangchiqing May 29, 2026
26df549
adapt forest and trieCache to payloadless implementation
zhangchiqing May 29, 2026
51cdfc7
add original forest_test and trieCache_test as base for comparison
zhangchiqing May 29, 2026
d16391e
adapt forest and trieCache tests to payloadless implementation
zhangchiqing May 29, 2026
f6935ff
add forest equivilence tests
zhangchiqing May 30, 2026
8b0347b
use copy
zhangchiqing Jun 8, 2026
89acac3
copy the leaf hash for forest
zhangchiqing Jun 18, 2026
1e3d2ac
fix error message in partial trie
zhangchiqing Jun 3, 2026
e3abfee
add payloadless ledger base
zhangchiqing Jun 4, 2026
9b85e83
add ledger/complete/payloadless_ledger.go
zhangchiqing Jun 4, 2026
de9df7f
add payloadless ledger tests
zhangchiqing Jun 4, 2026
fc80c43
remote ledger service
zhangchiqing Jun 4, 2026
8cf9ad5
add payloadless view committer
zhangchiqing Jun 4, 2026
a66eeaa
refactor reconstructPayloadlessProof
zhangchiqing Jun 8, 2026
ef9fbdf
add --payloadless flag
zhangchiqing Jun 5, 2026
f138980
fix lint
zhangchiqing Jun 8, 2026
47e5882
add v7 checkpointer
zhangchiqing Jun 5, 2026
4b67815
add checkpoint v7
zhangchiqing Jun 5, 2026
76fa77f
add checkpoint v6->v7 converter
zhangchiqing Jun 6, 2026
f0a338e
reduce diff between checkpoint v7 and checkpoint v6
zhangchiqing Jun 9, 2026
4986800
update checkpoint cmd util
zhangchiqing Jun 9, 2026
b76f302
refactor to minimize the change
zhangchiqing Jun 9, 2026
8d341b5
rename LatestCheckpoint to LatestCheckpointV6
zhangchiqing Jun 9, 2026
2334ee4
add payloadless WAL
zhangchiqing Jun 6, 2026
aaa9aab
rename v6 checkpointer related functions
zhangchiqing Jun 10, 2026
e90c749
update logger
zhangchiqing Jun 11, 2026
0fb467e
Automatic V6→V7 root-checkpoint conversion at bootstrap, and refactor
zhangchiqing Jun 11, 2026
2203740
refactor ReplayOnPayloadlessForest
zhangchiqing Jun 11, 2026
419eb97
minimize diff in ledger/complete/payloadless_compactor.go
zhangchiqing Jun 11, 2026
08fe4e5
remove unused Ledger.Checkpointer method
zhangchiqing Jun 11, 2026
e91d3ae
remove duplicated logger in ledger_with_compactor
zhangchiqing Jun 11, 2026
4a54b42
fix replay segments
zhangchiqing Jun 17, 2026
d799f88
remote ledger / local ledger factory using config
zhangchiqing Jun 6, 2026
68fdf8f
support payloadless in localnet
zhangchiqing Jun 6, 2026
38617e8
bootstrap by converting v6 root checkpoint into v7
zhangchiqing Jun 7, 2026
53fd916
remove LocalLedgerFactory simplify ledger factory
zhangchiqing Jun 12, 2026
99fd28e
fix ledger cmd for payloadless mode
zhangchiqing Jun 13, 2026
22e7da8
auto convert v7 root checkpoint in payloadless mode for localnet
zhangchiqing Jun 13, 2026
50af9ca
fix --payloadless mode for execution builder in remote ledger model
zhangchiqing Jun 15, 2026
8177757
fix require v7 checkpoint
zhangchiqing Jun 18, 2026
4a9565a
add convert_stream
zhangchiqing Jun 15, 2026
e4a5840
remove temp files
zhangchiqing Jun 16, 2026
9566083
remove verify leaf hash flag
zhangchiqing Jun 18, 2026
a9c6645
fix lint
zhangchiqing Jun 18, 2026
6e25ca1
add checkpoint iterate nodes function and util
zhangchiqing Jun 24, 2026
adab97d
handle interim node with default node as child
zhangchiqing Jun 25, 2026
534061d
update comments
zhangchiqing Jun 25, 2026
f4dcc0e
update checkpoint list tries
zhangchiqing Jun 26, 2026
3b2789a
checkpoint collect stats only support v6
zhangchiqing Jun 26, 2026
1cbad8e
add util - checkpoint-verify-hash
zhangchiqing Jun 26, 2026
2e0691e
log progress
zhangchiqing Jun 29, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 109 additions & 9 deletions cmd/execution_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ import (
"github.com/onflow/flow-go/engine/execution/checker"
"github.com/onflow/flow-go/engine/execution/computation"
"github.com/onflow/flow-go/engine/execution/computation/committer"
"github.com/onflow/flow-go/engine/execution/computation/computer"
txmetrics "github.com/onflow/flow-go/engine/execution/computation/metrics"
"github.com/onflow/flow-go/engine/execution/ingestion"
"github.com/onflow/flow-go/engine/execution/ingestion/fetcher"
Expand All @@ -64,6 +65,7 @@ import (
"github.com/onflow/flow-go/fvm/storage/snapshot"
"github.com/onflow/flow-go/fvm/systemcontracts"
"github.com/onflow/flow-go/ledger"
"github.com/onflow/flow-go/ledger/complete"
"github.com/onflow/flow-go/ledger/complete/wal"
ledgerfactory "github.com/onflow/flow-go/ledger/factory"
modelbootstrap "github.com/onflow/flow-go/model/bootstrap"
Expand Down Expand Up @@ -125,12 +127,13 @@ type ExecutionNode struct {

ingestionUnit *engine.Unit

collector *metrics.ExecutionCollector
executionState state.ExecutionState
followerState protocol.FollowerState
committee hotstuff.DynamicCommittee
ledgerStorage ledger.Ledger
registerStore *storehouse.RegisterStore
collector *metrics.ExecutionCollector
executionState state.ExecutionState
followerState protocol.FollowerState
committee hotstuff.DynamicCommittee
ledgerStorage ledger.Ledger // set iff !exeConf.payloadless
payloadlessLedger ledger.PayloadlessLedger // set iff exeConf.payloadless
registerStore *storehouse.RegisterStore

// storage
events storageerr.Events
Expand Down Expand Up @@ -626,7 +629,16 @@ func (exeNode *ExecutionNode) LoadProviderEngine(
})
}

ledgerViewCommitter := committer.NewLedgerViewCommitter(exeNode.ledgerStorage, node.Tracer)
var ledgerViewCommitter computer.ViewCommitter
if exeNode.exeConf.payloadless {
ledgerViewCommitter = committer.NewPayloadlessLedgerViewCommitter(
exeNode.payloadlessLedger,
node.Tracer,
complete.DefaultPathFinderVersion,
)
} else {
ledgerViewCommitter = committer.NewLedgerViewCommitter(exeNode.ledgerStorage, node.Tracer)
}
exeNode.exeConf.computationConfig.TokenTrackingEnabled = exeNode.exeConf.tokenTrackingEnabled
manager, err := computation.New(
node.Logger,
Expand Down Expand Up @@ -801,8 +813,21 @@ func (exeNode *ExecutionNode) LoadExecutionState(

// migrate execution data for last sealed and executed block

// In full mode, both args are the same *complete.Ledger; in payloadless
// mode the first is the payloadless ledger (narrow LedgerStateChecker)
// and the snapshot-source slot is nil because storehouse is required.
var stateChecker state.LedgerStateChecker
var snapshotLedger ledger.Ledger
if exeNode.exeConf.payloadless {
stateChecker = exeNode.payloadlessLedger
snapshotLedger = nil
} else {
stateChecker = exeNode.ledgerStorage
snapshotLedger = exeNode.ledgerStorage
}
exeNode.executionState = state.NewExecutionState(
exeNode.ledgerStorage,
stateChecker,
snapshotLedger,
exeNode.commits,
node.Storage.Blocks,
node.Storage.Headers,
Expand Down Expand Up @@ -916,7 +941,41 @@ func (exeNode *ExecutionNode) LoadExecutionStateLedger(
module.ReadyDoneAware,
error,
) {
// Create ledger using factory
// Ledger selection is two independent choices passed to the factory:
// - --payloadless picks the payloadless vs. full ledger (this branch).
// - --ledger-service-addr (Config.LedgerServiceAddr), when set, means this
// node connects to a remote ledger service rather than running a local
// ledger; the factory then returns a gRPC client instead of a local one.
// Combined: payloadless + remote address -> remote payloadless client;
// payloadless + no address -> local payloadless ledger; likewise for full mode.
if exeNode.exeConf.payloadless {
// Payloadless mode. ValidateFlags enforces --enable-storehouse,
// so the storehouse is the value source for reads.
pl, err := ledgerfactory.NewPayloadlessLedger(ledgerfactory.Config{
LedgerServiceAddr: exeNode.exeConf.ledgerServiceAddr,
LedgerMaxRequestSize: exeNode.exeConf.ledgerMaxRequestSize,
LedgerMaxResponseSize: exeNode.exeConf.ledgerMaxResponseSize,
Triedir: exeNode.exeConf.triedir,
MTrieCacheSize: exeNode.exeConf.mTrieCacheSize,
CheckpointDistance: exeNode.exeConf.checkpointDistance,
CheckpointsToKeep: exeNode.exeConf.checkpointsToKeep,
MetricsRegisterer: node.MetricsRegisterer,
WALMetrics: exeNode.collector,
LedgerMetrics: exeNode.collector,
Logger: node.Logger,
}, exeNode.toTriggerCheckpoint)
if err != nil {
return nil, fmt.Errorf("could not create payloadless ledger: %w", err)
}
exeNode.payloadlessLedger = pl
// exeNode.ledgerStorage stays nil in payloadless mode; the
// LedgerStateChecker slot in state.NewExecutionState receives the
// payloadless ledger directly, and the snapshotLedger slot stays
// nil because the storehouse is the value source.
return pl, nil
}

// Full mode (default): WAL-backed ledger via the factory.
ledgerStorage, err := ledgerfactory.NewLedger(ledgerfactory.Config{
LedgerServiceAddr: exeNode.exeConf.ledgerServiceAddr,
LedgerMaxRequestSize: exeNode.exeConf.ledgerMaxRequestSize,
Expand Down Expand Up @@ -1426,11 +1485,52 @@ func (exeNode *ExecutionNode) LoadBootstrapper(node *NodeConfig) error {

// when bootstrapping, the bootstrap folder must have a checkpoint file
// we need to cover this file to the trie folder to restore the trie to restore the execution state.
//
// Note: in payloadless mode the V6 root checkpoint placed here is later
// converted to root.checkpoint.v7 by ledgerfactory.NewPayloadlessLedger
// before the bundle reads it. Bootstrap itself stays mode-agnostic.
err = copyBootstrapState(node.BootstrapDir, exeNode.exeConf.triedir)
if err != nil {
return fmt.Errorf("could not load bootstrap state from checkpoint file: %w", err)
}

// In payloadless (V7) mode the spork only produces a V6 root.checkpoint.
// Convert it to a V7 root checkpoint here so the payloadless ledger can
// seed its forest from it on first boot; later restarts reuse this file
// (or a newer numbered V7 checkpoint written by the compactor). The
// HasRootCheckpointV7 guard keeps a re-entry after an interrupted
// bootstrap from hitting ConvertCheckpointV6ToV7's "output exists" check.
//
// Only nodes running a local payloadless ledger need this: a node using a
// remote ledger service (ledgerServiceAddr set) never reads its local trie
// dir, and the remote ledger service performs its own V7 bootstrap. Skipping
// the conversion avoids a needless full-forest load on remote-ledger nodes.
//
// TODO: ConvertCheckpointV6ToV7 reads the entire V6 forest into memory
// before emitting V7, a memory/time spike at first boot for mainnet-scale
// root checkpoints. A future optimization is to convert subtrie-by-subtrie
// without loading the whole forest.
if exeNode.exeConf.payloadless && exeNode.exeConf.ledgerServiceAddr == "" {
triedir := exeNode.exeConf.triedir
hasV7Root, err := wal.HasRootCheckpointV7(triedir)
if err != nil {
return fmt.Errorf("could not check for V7 root checkpoint: %w", err)
}
if !hasV7Root {
err = wal.ConvertCheckpointV6ToV7(
triedir,
modelbootstrap.FilenameWALRootCheckpoint,
triedir,
modelbootstrap.FilenameWALRootCheckpoint+wal.V7FileSuffix,
node.Logger,
16,
)
if err != nil {
return fmt.Errorf("could not convert V6 root checkpoint to V7 for payloadless node: %w", err)
}
}
}

err = bootstrapper.BootstrapExecutionDatabase(node.StorageLockMgr, node.ProtocolDB, node.RootSeal)
if err != nil {
return fmt.Errorf("could not bootstrap execution database: %w", err)
Expand Down
12 changes: 12 additions & 0 deletions cmd/execution_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ type ExecutionConfig struct {
// file descriptors causing connection failures.
onflowOnlyLNs bool
enableStorehouse bool
payloadless bool
enableBackgroundStorehouseIndexing bool
backgroundIndexerHeightsPerSecond uint64
enableChecker bool
Expand Down Expand Up @@ -154,6 +155,9 @@ func (exeConf *ExecutionConfig) SetupFlags(flags *pflag.FlagSet) {

flags.BoolVar(&exeConf.onflowOnlyLNs, "temp-onflow-only-lns", false, "do not use unless required. forces node to only request collections from onflow collection nodes")
flags.BoolVar(&exeConf.enableStorehouse, "enable-storehouse", false, "enable storehouse to store registers on disk, default is false")
flags.BoolVar(&exeConf.payloadless, "payloadless", false,
"run the execution node with a payloadless ledger that stores only leaf hashes; "+
"register values are read from the storehouse during execution. requires --enable-storehouse.")
flags.BoolVar(&exeConf.enableBackgroundStorehouseIndexing, "enable-background-storehouse-indexing", false, "enable background indexing of storehouse data while storehouse is disabled to eliminate downtime when enabling it. default: false.")
flags.Uint64Var(&exeConf.backgroundIndexerHeightsPerSecond, "background-indexer-heights-per-second", storehouse.DefaultHeightsPerSecond, fmt.Sprintf("rate limit for background indexer in heights per second. 0 means no rate limiting. default: %v", storehouse.DefaultHeightsPerSecond))
flags.BoolVar(&exeConf.enableChecker, "enable-checker", true, "enable checker to check the correctness of the execution result, default is true")
Expand Down Expand Up @@ -198,5 +202,13 @@ func (exeConf *ExecutionConfig) ValidateFlags() error {
if exeConf.enableStorehouse {
exeConf.enableBackgroundStorehouseIndexing = false
}
// Payloadless requires storehouse: the payloadless ledger does not retain
// register values; the storehouse is the only available value source for
// both proof reconstruction and snapshot reads.
if exeConf.payloadless && !exeConf.enableStorehouse {
return errors.New("--payloadless requires --enable-storehouse: " +
"the payloadless ledger does not store register values; " +
"the storehouse must provide them at execution time")
}
return nil
}
60 changes: 50 additions & 10 deletions cmd/ledger/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@ import (
"go.uber.org/atomic"
"google.golang.org/grpc"

"github.com/onflow/flow-go/ledger"
ledgerfactory "github.com/onflow/flow-go/ledger/factory"
ledgerpb "github.com/onflow/flow-go/ledger/protobuf"
"github.com/onflow/flow-go/ledger/remote"
"github.com/onflow/flow-go/module"
"github.com/onflow/flow-go/module/irrecoverable"
"github.com/onflow/flow-go/module/metrics"
)
Expand All @@ -35,6 +37,7 @@ var (
checkpointDist = flag.Uint("checkpoint-distance", 100, "Checkpoint distance")
checkpointsToKeep = flag.Uint("checkpoints-to-keep", 3, "Number of checkpoints to keep")
logLevel = flag.String("loglevel", "info", "Log level (panic, fatal, error, warn, info, debug)")
payloadless = flag.Bool("payloadless", false, "Run the ledger service in payloadless mode (stores leaf hashes instead of full payloads; requires a V7 checkpoint in --triedir).")
maxRequestSize = flag.Uint("max-request-size", 1<<30, "Maximum request message size in bytes (default: 1 GiB)")
maxResponseSize = flag.Uint("max-response-size", 1<<30, "Maximum response message size in bytes (default: 1 GiB)")
)
Expand Down Expand Up @@ -72,14 +75,18 @@ func main() {
Str("admin_addr", *adminAddr).
Uint("metrics_port", *metricsPort).
Int("mtrie_cache_size", *mtrieCacheSize).
Bool("payloadless", *payloadless).
Msg("starting ledger service")

// Create trigger for manual checkpointing (used by admin command)
triggerCheckpointOnNextSegmentFinish := atomic.NewBool(false)

// Create ledger using factory
// Create ledger using factory. The same config drives both modes; the
// payloadless flag selects which factory constructor (and gRPC service) is
// wired up. A ledger gRPC server registers either the full [remote.Service]
// or the [remote.PayloadlessService], never both.
metricsCollector := metrics.NewLedgerCollector("ledger", "wal")
ledgerStorage, err := ledgerfactory.NewLedger(ledgerfactory.Config{
factoryConfig := ledgerfactory.Config{
Triedir: *triedir,
MTrieCacheSize: uint32(*mtrieCacheSize),
CheckpointDistance: *checkpointDist,
Expand All @@ -88,24 +95,58 @@ func main() {
WALMetrics: metricsCollector,
LedgerMetrics: metricsCollector,
Logger: logger,
}, triggerCheckpointOnNextSegmentFinish)
if err != nil {
logger.Fatal().Err(err).Msg("failed to create ledger")
}

// ledgerStorage is the lifecycle handle used for readiness, health check,
// and shutdown regardless of mode. registerService binds the mode-specific
// gRPC service onto the server once it is created.
var ledgerStorage module.ReadyDoneAware
var registerService func(grpcServer *grpc.Server)

if *payloadless {
payloadlessLedger, err := ledgerfactory.NewPayloadlessLedger(factoryConfig, triggerCheckpointOnNextSegmentFinish)
if err != nil {
logger.Fatal().Err(err).Msg("failed to create payloadless ledger")
}
ledgerStorage = payloadlessLedger
registerService = func(grpcServer *grpc.Server) {
ledgerpb.RegisterPayloadlessLedgerServiceServer(grpcServer, remote.NewPayloadlessService(payloadlessLedger, logger))
}
} else {
fullLedger, err := ledgerfactory.NewLedger(factoryConfig, triggerCheckpointOnNextSegmentFinish)
if err != nil {
logger.Fatal().Err(err).Msg("failed to create ledger")
}
ledgerStorage = fullLedger
registerService = func(grpcServer *grpc.Server) {
ledgerpb.RegisterLedgerServiceServer(grpcServer, remote.NewService(fullLedger, logger))
}
}

// Wait for ledger to be ready (WAL replay)
logger.Info().Msg("waiting for ledger initialization...")
<-ledgerStorage.Ready()
logger.Info().Msg("ledger ready")

// Both the full and payloadless ledgers expose state inspection for the
// post-startup health check, though only the full ledger declares it on its
// public interface; assert it here so the check works in either mode.
inspector, ok := ledgerStorage.(interface {
StateCount() int
StateByIndex(index int) (ledger.State, error)
})
if !ok {
logger.Fatal().Msg("ledger does not support state inspection")
}

// Check if any trie is loaded after startup
stateCount := ledgerStorage.StateCount()
stateCount := inspector.StateCount()
if stateCount == 0 {
logger.Fatal().Msg("no trie loaded after startup - no states available")
}

// Get the last trie state for logging
lastState, err := ledgerStorage.StateByIndex(-1)
lastState, err := inspector.StateByIndex(-1)
if err != nil {
logger.Fatal().Err(err).Msg("failed to get last state for logging")
}
Expand All @@ -123,9 +164,8 @@ func main() {
grpc.MaxSendMsgSize(int(*maxResponseSize)),
)

// Create and register ledger service
ledgerService := remote.NewService(ledgerStorage, logger)
ledgerpb.RegisterLedgerServiceServer(grpcServer, ledgerService)
// Register the mode-specific ledger service
registerService(grpcServer)

// Create listeners based on provided flags
type listenerInfo struct {
Expand Down
37 changes: 37 additions & 0 deletions cmd/util/cmd/checkpoint-collect-stats/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package checkpoint_collect_stats
import (
"cmp"
"encoding/hex"
"fmt"
"math"
"slices"
"strings"
Expand Down Expand Up @@ -315,6 +316,15 @@ func getPayloadStatsFromCheckpoint(payloadCallBack func(payload *ledger.Payload)
memAllocBefore := debug.GetHeapAllocsBytes()
log.Info().Msgf("loading checkpoint(s) from %v", flagCheckpointDir)

// checkpoint-collect-stats analyzes payload contents (register types, sizes,
// account info). V7 (payloadless) checkpoints store only leaf hashes and contain
// no payloads, so they cannot be processed here. The WAL replay below loads only
// V6 checkpoints and silently ignores V7 files, which would otherwise produce
// misleading (stale or empty) stats. Fail fast with a clear error instead.
if err := requireV6Checkpoint(flagCheckpointDir); err != nil {
log.Fatal().Err(err).Msg("cannot collect stats from checkpoint")
}

diskWal, err := wal.NewDiskWAL(zerolog.Nop(), nil, &metrics.NoopCollector{}, flagCheckpointDir, complete.DefaultCacheSize, pathfinder.PathByteSize, wal.SegmentSize)
if err != nil {
log.Fatal().Err(err).Msg("cannot create WAL")
Expand Down Expand Up @@ -369,6 +379,33 @@ func getPayloadStatsFromCheckpoint(payloadCallBack func(payload *ledger.Payload)
return ledgerStats
}

// requireV6Checkpoint returns an error if the latest checkpoint in dir is a V7
// (payloadless) checkpoint. checkpoint-collect-stats requires full payloads,
// which V7 checkpoints do not contain.
//
// Only numbered checkpoints are considered (the WAL bootstrap loads the latest
// numbered V6 checkpoint). If the latest numbered checkpoint is V7, this command
// would otherwise silently fall back to an older V6 checkpoint or an empty state,
// reporting misleading stats.
//
// Expected error returns during normal operation:
// - an error when the latest checkpoint in dir is a V7 (payloadless) checkpoint
func requireV6Checkpoint(dir string) error {
_, latest, err := wal.ListCheckpointsWithInfo(dir)
if err != nil {
return fmt.Errorf("cannot list checkpoints in %s: %w", dir, err)
}

if latest != nil && latest.Version == wal.VersionV7 {
return fmt.Errorf(
"checkpoint %d in %s is a V7 (payloadless) checkpoint, which contains no payloads; "+
"checkpoint-collect-stats requires a V6 checkpoint",
latest.Number, dir)
}

return nil
}

func getRegisterStats(valueSizesByType sizesByType) []RegisterStatsByTypes {
domainStats := make([]RegisterStatsByTypes, 0, len(common.AllStorageDomains))
var allDomainSizes []float64
Expand Down
Loading
Loading