diff --git a/services/subtreevalidation/SubtreeValidation.go b/services/subtreevalidation/SubtreeValidation.go index adbb825b12..eb5a327a54 100644 --- a/services/subtreevalidation/SubtreeValidation.go +++ b/services/subtreevalidation/SubtreeValidation.go @@ -99,22 +99,12 @@ func (u *Server) SetSubtreeExists(_ *chainhash.Hash) error { } // GetSubtreeExists checks if a subtree exists in the local storage. -// -// This method queries the local storage to determine whether a specific subtree -// has already been processed and stored. It's used to optimize processing by -// avoiding duplicate work on subtrees that have already been validated. -// -// Parameters: -// - ctx: Context for cancellation and request-scoped values -// - subtreeHash: The hash identifier of the subtree to check -// -// Returns: -// - bool: Always false in the current implementation -// - error: Always nil in the current implementation -// -// TODO: Implement actual local storage lookup for subtree existence. -func (u *Server) GetSubtreeExists(_ context.Context, _ *chainhash.Hash) (bool, error) { - return false, nil +func (u *Server) GetSubtreeExists(ctx context.Context, hash *chainhash.Hash) (bool, error) { + if u.subtreeStore == nil { + return false, nil + } + + return u.subtreeStore.Exists(ctx, hash[:], fileformat.FileTypeSubtree) } // txMetaCacheOps defines the interface for transaction metadata cache operations. diff --git a/services/subtreevalidation/check_block_subtrees.go b/services/subtreevalidation/check_block_subtrees.go index 3ad6662e04..89d2baf58b 100644 --- a/services/subtreevalidation/check_block_subtrees.go +++ b/services/subtreevalidation/check_block_subtrees.go @@ -393,6 +393,13 @@ func (u *Server) CheckBlockSubtrees(ctx context.Context, request *subtreevalidat return nil } + if subtree == nil { + // ValidateSubtreeInternal returned (nil, nil) because the subtree already existed in the + // store - another goroutine completed the write between TryLockIfFileNotExists and the + // in-store existence check. Nothing to clean up. + return nil + } + // Remove validated transactions from orphanage for _, node := range subtree.Nodes { u.orphanage.Delete(node.Hash) @@ -431,6 +438,13 @@ func (u *Server) CheckBlockSubtrees(ctx context.Context, request *subtreevalidat return nil, errors.WrapGRPC(errors.NewProcessingError("[CheckBlockSubtreesRequest] Failed to validate subtree %s", subtreeHash.String(), err)) } + if subtree == nil { + // ValidateSubtreeInternal returned (nil, nil) because the subtree already existed in the + // store - another goroutine completed the write between TryLockIfFileNotExists and the + // in-store existence check. Nothing to clean up. + continue + } + // Remove validated transactions from orphanage for _, node := range subtree.Nodes { u.orphanage.Delete(node.Hash) diff --git a/services/subtreevalidation/check_block_subtrees_test.go b/services/subtreevalidation/check_block_subtrees_test.go index cf44f63a75..db23344b47 100644 --- a/services/subtreevalidation/check_block_subtrees_test.go +++ b/services/subtreevalidation/check_block_subtrees_test.go @@ -9,6 +9,7 @@ import ( "path/filepath" "runtime" "sync" + "sync/atomic" "testing" "time" @@ -2382,3 +2383,112 @@ func TestBuildParentMetadata(t *testing.T) { assert.Equal(t, uint32(100), meta.BlockHeight) }) } + +// raceSubtreeStore wraps a blob.Store and reports FileTypeSubtree for a specific hash as missing +// on its first Exists() call but present on subsequent calls. This simulates the race window +// where another goroutine commits the subtree between the initial existence check in +// CheckBlockSubtrees (line ~101) and the GetSubtreeExists call inside ValidateSubtreeInternal — +// the path that drives ValidateSubtreeInternal to return (nil, nil). +type raceSubtreeStore struct { + blob.Store + target chainhash.Hash + subtreeHit atomic.Int32 +} + +func (r *raceSubtreeStore) Exists(ctx context.Context, key []byte, fileType fileformat.FileType, opts ...options.FileOption) (bool, error) { + if fileType == fileformat.FileTypeSubtree && bytes.Equal(key, r.target[:]) { + // First call: subtree absent (so CheckBlockSubtrees adds it to missingSubtrees). + // Subsequent calls: subtree present (so ValidateSubtreeInternal -> GetSubtreeExists + // returns true and ValidateSubtreeInternal returns (nil, nil), exercising the nil guard). + if r.subtreeHit.Add(1) == 1 { + return false, nil + } + return true, nil + } + return r.Store.Exists(ctx, key, fileType, opts...) +} + +// TestCheckBlockSubtrees_NilSubtree_FromGoroutine drives the race shape where +// ValidateSubtreeInternal returns (nil, nil) inside the parallel validation goroutine in +// CheckBlockSubtrees. Without the nil guard, the orphan-deletion loop would dereference a nil +// subtree and panic. The serial revalidate path (the second nil-guard site) is symmetric: it +// calls the same ValidateSubtreeInternal and dereferences the same field, so a single +// representative test covers the shape for both. +func TestCheckBlockSubtrees_NilSubtree_FromGoroutine(t *testing.T) { + server, cleanup := setupTestServer(t) + defer cleanup() + + testHeaders := testhelpers.CreateTestHeaders(t, 1) + + server.blockchainClient.(*blockchain.Mock).On("GetBestBlockHeader", + mock.Anything). + Return(testHeaders[0], &model.BlockHeaderMeta{}, nil) + + tx1, err := createTestTransaction("fff2525b8931402dd09222c50775608f75787bd2b87e56995a7bdd30f79702c4") + require.NoError(t, err) + + subtree, err := subtreepkg.NewTreeByLeafCount(2) + require.NoError(t, err) + require.NoError(t, subtree.AddNode(*tx1.TxIDChainHash(), 1, 1)) + + subtreeData := subtreepkg.NewSubtreeData(subtree) + require.NoError(t, subtreeData.AddTx(tx1, 0)) + + subtreeBytes, err := subtree.Serialize() + require.NoError(t, err) + + subtreeDataBytes, err := subtreeData.Serialize() + require.NoError(t, err) + + // Pre-populate FileTypeSubtreeToCheck and FileTypeSubtreeData so the loading goroutine + // inside CheckBlockSubtrees can avoid HTTP fetches and proceed past the batch processing + // stage to the parallel validation goroutine that hits the panic site. + require.NoError(t, server.subtreeStore.Set(context.Background(), subtree.RootHash()[:], fileformat.FileTypeSubtreeToCheck, subtreeBytes)) + require.NoError(t, server.subtreeStore.Set(context.Background(), subtree.RootHash()[:], fileformat.FileTypeSubtreeData, subtreeDataBytes)) + + // Wrap the subtree store so FileTypeSubtree reports missing on the first Exists call + // (added to missingSubtrees) and present afterwards (ValidateSubtreeInternal returns nil,nil). + server.subtreeStore = &raceSubtreeStore{ + Store: server.subtreeStore, + target: *subtree.RootHash(), + } + + header := &model.BlockHeader{ + Version: 1, + HashPrevBlock: &chainhash.Hash{}, + HashMerkleRoot: &chainhash.Hash{}, + Timestamp: uint32(time.Now().Unix()), + Bits: model.NBit{}, + Nonce: 0, + } + + coinbaseTx := &bt.Tx{Version: 1} + block, err := model.NewBlock(header, coinbaseTx, []*chainhash.Hash{subtree.RootHash()}, 1, 400, 0, 0) + require.NoError(t, err) + + blockBytes, err := block.Bytes() + require.NoError(t, err) + + server.blockchainClient.(*blockchain.Mock).On("GetBlockHeaderIDs", + mock.Anything, mock.Anything, mock.Anything). + Return([]uint32{1, 2, 3}, nil) + server.blockchainClient.(*blockchain.Mock).On("IsFSMCurrentState", + mock.Anything, blockchain.FSMStateRUNNING). + Return(true, nil) + + mockValidator := server.validatorClient.(*validator.MockValidatorClient) + mockValidator.UtxoStore = server.utxoStore + + request := &subtreevalidation_api.CheckBlockSubtreesRequest{ + Block: blockBytes, + BaseUrl: "http://test.com", + } + + require.NotPanics(t, func() { + var response *subtreevalidation_api.CheckBlockSubtreesResponse + response, err = server.CheckBlockSubtrees(context.Background(), request) + require.NoError(t, err) + require.NotNil(t, response) + assert.True(t, response.Blessed) + }) +} diff --git a/services/subtreevalidation/subtreeHandler.go b/services/subtreevalidation/subtreeHandler.go index 5126c4238a..7f39d0f6a5 100644 --- a/services/subtreevalidation/subtreeHandler.go +++ b/services/subtreevalidation/subtreeHandler.go @@ -151,6 +151,13 @@ func (u *Server) subtreesHandler(ctx context.Context, hash *chainhash.Hash, base return err } + if subtree == nil { + // ValidateSubtreeInternal returned (nil, nil) because the subtree already existed in the + // store - another goroutine completed the write between TryLockIfFileNotExists and the + // in-store existence check. Nothing to clean up. + return nil + } + // if no error was thrown, remove all the transactions from this subtree from the orphanage for _, node := range subtree.Nodes { u.orphanage.Delete(node.Hash) diff --git a/services/subtreevalidation/subtreeHandler_test.go b/services/subtreevalidation/subtreeHandler_test.go index 09c05c088a..89e791e84f 100644 --- a/services/subtreevalidation/subtreeHandler_test.go +++ b/services/subtreevalidation/subtreeHandler_test.go @@ -349,6 +349,65 @@ func TestSubtreeMessageHandler_BlocksOnlyFalse_ProcessesMessage(t *testing.T) { time.Sleep(500 * time.Millisecond) } +// TestSubtreesHandler_NilSubtree exercises the race shape where ValidateSubtreeInternal +// returns (nil, nil) because the subtree was written to the store between the lock +// acquisition and the in-store existence check. The handler must not panic dereferencing +// the returned subtree. +func TestSubtreesHandler_NilSubtree(t *testing.T) { + subtreeHash, _ := chainhash.NewHashFromStr("d580e67e847f65c73496a9f1adafacc5f73b4ca9d44fbd0749d6d926914bdcaf") + baseURL, _ := url.Parse("http://localhost:8000") + + tSettings := test.CreateBaseTestSettings(t) + tSettings.SubtreeValidation.QuorumPath = "./data/subtree_quorum_nil" + + defer func() { + _ = os.RemoveAll(tSettings.SubtreeValidation.QuorumPath) + }() + + logger := ulogger.TestLogger{} + subtreeStore := memory.New() + utxoStore, _ := nullstore.NewNullStore() + blockchainClient := &blockchain.Mock{} + blockchainClient.On("IsFSMCurrentState", mock.Anything, mock.Anything).Return(true, nil) + + // pre-populate the subtree store so GetSubtreeExists returns true once ValidateSubtreeInternal + // reaches the store check, simulating another goroutine having completed the write. + require.NoError(t, subtreeStore.Set(context.Background(), subtreeHash[:], fileformat.FileTypeSubtree, []byte("validated"))) + + blockIDsMap := make(map[uint32]bool) + + server := &Server{ + logger: logger, + settings: tSettings, + blockchainClient: blockchainClient, + subtreeStore: subtreeStore, + utxoStore: utxoStore, + validatorClient: &validator.MockValidator{}, + orphanage: func() *Orphanage { + o, err := NewOrphanage(tSettings.SubtreeValidation.OrphanageTimeout, tSettings.SubtreeValidation.OrphanageMaxSize, logger) + require.NoError(t, err) + return o + }(), + currentBlockIDsMap: atomic.Pointer[map[uint32]bool]{}, + bestBlockHeader: atomic.Pointer[model.BlockHeader]{}, + bestBlockHeaderMeta: atomic.Pointer[model.BlockHeaderMeta]{}, + } + server.currentBlockIDsMap.Store(&blockIDsMap) + server.bestBlockHeaderMeta.Store(&model.BlockHeaderMeta{Height: 100}) + + // use MockExister for the quorum so the lock is acquired even though the file exists in the + // subtree store; this reproduces the race window where Exists() succeeded before the writer + // committed the subtree but the handler's later GetSubtreeExists() observes the new entry. + var err error + server.quorum, err = NewQuorum(logger, MockExister{}, tSettings.SubtreeValidation.QuorumPath) + require.NoError(t, err) + + require.NotPanics(t, func() { + err = server.subtreesHandler(context.Background(), subtreeHash, baseURL, "peer1") + }) + require.NoError(t, err) +} + // TestSubtreeMessageHandler_BlocksOnly_CatchingBlocksStillSkips verifies that when FSM is in // CATCHINGBLOCKS state, processing is skipped regardless of BlocksOnly setting. func TestSubtreeMessageHandler_BlocksOnly_CatchingBlocksStillSkips(t *testing.T) {