Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
136 changes: 98 additions & 38 deletions stores/blockchain/sql/GetBlockHeaderIDs.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,24 @@
// This file implements the GetBlockHeaderIDs method, which retrieves a sequence of
// block header IDs starting from a specified block hash. In Teranode's architecture
// for BSV, this method plays a critical role in block mining status tracking and
// chain traversal operations. The implementation uses a multi-tier caching strategy
// for performance optimization and falls back to a recursive SQL Common Table Expression
// (CTE) query when cache misses occur. This method supports Teranode's high-throughput
// transaction processing by providing efficient access to block header identifiers
// without requiring the full header data to be loaded. This is particularly important
// for operations that only need to track or reference blocks by their internal database IDs.
// chain traversal operations.
//
// The implementation uses a multi-tier strategy:
//
// 1. In-memory cache (responseCache or chainWalkCache, depending on
// useInMemoryChainCheck) to short-circuit repeated requests.
// 2. An on_main_chain fast path that filters by the partial index and a
// height range derived from the start block — used whenever the start
// hash is on the main chain and no rebuild is in flight.
// 3. A recursive SQL Common Table Expression (CTE) fallback that walks
// parent_id pointers — used for fork tips, unknown hashes, and during
// main-chain rebuilds.
//
// This method supports Teranode's high-throughput transaction processing by
// providing efficient access to block header identifiers without requiring
// the full header data to be loaded. This is particularly important for
// operations that only need to track or reference blocks by their internal
// database IDs.
package sql

import (
Expand All @@ -33,20 +45,27 @@ import (
//
// The implementation employs a multi-tier approach for optimal performance:
//
// 1. First attempts to retrieve header IDs from the in-memory blocks cache
// - Provides O(1) lookup time when the requested blocks are in the cache
// - Significantly reduces database load for frequently accessed recent blocks
// - Returns immediately if the cache contains the requested headers
// 1. First attempts to retrieve header IDs from the in-memory blocks cache.
// - Provides O(1) lookup time when the requested blocks are in the cache.
// - Significantly reduces database load for frequently accessed recent blocks.
// - Returns immediately if the cache contains the requested headers.
//
// 2. Falls back to a recursive SQL query using Common Table Expressions (CTEs) when cache misses occur
// - Efficiently traverses the blockchain graph starting from the specified block
// - Retrieves the specified number of header IDs in descending height order
// - Uses database indexing for optimal query performance
// 2. On cache miss, dispatches to buildGetBlockHeaderIDsQuery which picks
// between two SQL strategies:
// - on_main_chain fast path: a single backward index scan on the
// partial index, restricted to the height range derived from the
// start block. Used whenever the start hash is on the main chain
// and no rebuild is in flight.
// - Recursive CTE fallback: walks parent_id pointers from the start
// block. Used for fork tips, unknown hashes, and during main-chain
// rebuilds.
//
// The recursive CTE approach is particularly well-suited for blockchain's linked-list
// structure, as it allows efficient traversal of the chain without requiring multiple
// separate queries. This is critical for Teranode's high-throughput BSV implementation,
// where database efficiency directly impacts node performance.
// The fast path replaces an O(N) recursive walk with a contiguous index
// scan and is ~3-6x faster on small datasets, expected 10-20x on
// production-sized DBs. The CTE remains authoritative for the cases where
// on_main_chain cannot answer correctly. This is critical for Teranode's
// high-throughput BSV implementation, where database efficiency directly
// impacts node performance.
// Parameters:
// - ctx: Context for the database operation, allowing for cancellation and timeouts
// - blockHashFrom: Hash of the starting block to retrieve IDs from
Expand All @@ -58,10 +77,11 @@ import (
// - StorageError for database query or scan errors
//
// The method is optimized for performance with a multi-tier approach:
// 1. First checks the in-memory blocks cache for the requested headers
// 2. If found in cache, extracts and returns just the IDs without database access
// 3. On cache miss, uses a recursive SQL CTE query to efficiently traverse the blockchain
// 4. Returns an empty slice with nil error if no matching blocks are found
// 1. First checks the in-memory blocks cache for the requested headers.
// 2. If found in cache, extracts and returns just the IDs without database access.
// 3. On cache miss, dispatches to the on_main_chain fast path or the recursive
// CTE fallback via buildGetBlockHeaderIDsQuery.
// 4. Returns an empty slice with nil error if no matching blocks are found.
//
// This method is particularly important for mining-related operations where blocks need
// to be efficiently marked as mined without loading their complete data.
Expand Down Expand Up @@ -122,22 +142,11 @@ func (s *SQL) GetBlockHeaderIDs(ctx context.Context, blockHashFrom *chainhash.Ha
}
ids := make([]uint32, 0, initialCap)

q := `
WITH RECURSIVE ChainBlocks AS (
SELECT id, parent_id, 1 AS depth
FROM blocks
WHERE hash = $1
UNION ALL
SELECT bb.id, bb.parent_id, cb.depth + 1
FROM blocks bb
JOIN ChainBlocks cb ON bb.id = cb.parent_id
WHERE bb.id != cb.id
AND cb.depth < $2
)
SELECT id FROM ChainBlocks
LIMIT $2
`
rows, err := s.db.QueryContext(ctx, q, blockHashFrom[:], numberOfHeaders)
// Try the on_main_chain fast path; fall back to the recursive CTE on fork
// tips, unknown hashes, or while a main-chain rebuild is in flight. Same
// semantics as buildGetBlockHeadersQuery — see comment there.
q, args := s.buildGetBlockHeaderIDsQuery(ctx, blockHashFrom, numberOfHeaders)
rows, err := s.db.QueryContext(ctx, q, args...)

if err != nil {
if errors.Is(err, sql.ErrNoRows) {
Expand Down Expand Up @@ -170,3 +179,54 @@ func (s *SQL) GetBlockHeaderIDs(ctx context.Context, blockHashFrom *chainhash.Ha

return ids, nil
}

// buildGetBlockHeaderIDsQuery returns the SQL query and args for GetBlockHeaderIDs.
Comment thread
oskarszoon marked this conversation as resolved.
// The fast path uses the on_main_chain partial index when the start hash is on
// the main chain. Otherwise the recursive CTE walks parent_id pointers and is
// authoritative for fork tips and rebuilds.
func (s *SQL) buildGetBlockHeaderIDsQuery(ctx context.Context, blockHashFrom *chainhash.Hash, numberOfHeaders uint64) (string, []interface{}) {
if s.mainChainRebuilding.Load() == 0 {
var (
onMain bool
startHeight uint32
)
// Resolve start-block height in the probe so the main query binds it as
// a literal parameter. This (a) lets the planner pick the
// idx_on_main_chain_height partial index for the height range, and
// (b) eliminates the intra-query race that a same-query subquery
// evaluated twice would have.
if scanErr := s.db.QueryRowContext(ctx,
`SELECT COALESCE(on_main_chain, false), COALESCE(height, 0)
FROM blocks WHERE hash = $1 LIMIT 1`,
blockHashFrom[:],
).Scan(&onMain, &startHeight); scanErr == nil && onMain {
fastPath := `
SELECT b.id
FROM blocks b
WHERE b.on_main_chain = true
AND b.height <= $1
AND b.height > $1 - $2
ORDER BY b.height DESC
LIMIT $2
`
return fastPath, []interface{}{startHeight, numberOfHeaders}
}
}

cte := `
WITH RECURSIVE ChainBlocks AS (
SELECT id, parent_id, 1 AS depth
FROM blocks
WHERE hash = $1
UNION ALL
SELECT bb.id, bb.parent_id, cb.depth + 1
FROM blocks bb
JOIN ChainBlocks cb ON bb.id = cb.parent_id
WHERE bb.id != cb.id
AND cb.depth < $2
)
SELECT id FROM ChainBlocks
LIMIT $2
`
return cte, []interface{}{blockHashFrom[:], numberOfHeaders}
}
154 changes: 108 additions & 46 deletions stores/blockchain/sql/GetBlockHeaders.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,19 @@
// This file implements the GetBlockHeaders method, which retrieves a sequence of consecutive
// block headers starting from a specified block hash. This functionality is essential for
// blockchain synchronization, where nodes need to efficiently retrieve chains of headers
// to validate and update their local blockchain state. The implementation uses a recursive
// Common Table Expression (CTE) in SQL to efficiently traverse the blockchain graph structure,
// following the parent-child relationships between blocks. It also includes caching mechanisms
// to optimize performance for frequently requested header sequences and handles special cases
// like chain reorganizations and invalid blocks.
// to validate and update their local blockchain state.
//
// The implementation uses a hybrid query strategy:
//
// 1. An in-memory response/chain-walk cache to short-circuit repeated requests.
// 2. A fast path that filters by the on_main_chain partial index and a height
// range derived from the start block — used whenever the start hash is on
// the main chain and no rebuild is in flight. This replaces an O(N)
// recursive parent_id walk with a single backward index scan.
// 3. A recursive Common Table Expression (CTE) fallback that walks
// parent_id pointers — used for fork tips, unknown hashes, and while a
// main-chain rebuild is in flight, so the CTE remains authoritative for
// reorg / fork scenarios.
//
// In Teranode's high-throughput architecture, efficient header retrieval is critical for
// maintaining synchronization with the network, especially during initial block download
Expand Down Expand Up @@ -42,17 +50,23 @@ import (
// network consensus.
//
// The implementation follows a tiered approach to optimize performance:
// 1. First checks the blocks cache for the requested headers sequence
// 2. If not found in cache, executes a SQL query to recursively traverse the blockchain graph structure
// 3. The query follows parent-child relationships between blocks, starting from the
// specified block and retrieving the requested number of headers
// 4. For each block, constructs both a BlockHeader object containing the core consensus
// fields and a BlockHeaderMeta object containing additional metadata
// 1. First checks the blocks cache for the requested headers sequence.
// 2. If not found in cache, takes the on_main_chain fast path when the
// start hash is on the main chain and no rebuild is in flight: a single
// backward index scan over (height) where on_main_chain = true,
// restricted to the height range derived from the start block.
// 3. Otherwise (fork tips, unknown hashes, mid-rebuild), falls back to a
// recursive CTE that walks parent_id pointers from the start block
// backwards.
// 4. For each block, constructs both a BlockHeader object containing the
// core consensus fields and a BlockHeaderMeta object containing
// additional metadata.
//
// The SQL implementation uses database-specific optimizations for both PostgreSQL and
// SQLite to ensure efficient execution of the recursive query. The method also handles
// special cases such as chain reorganizations and invalid blocks, ensuring that only
// valid headers are returned.
// SQLite to ensure efficient execution of both the fast path and the CTE
// fallback. The method also handles special cases such as chain
// reorganizations and invalid blocks, ensuring that only valid headers are
// returned.
//
// Parameters:
// - ctx: Context for the database operation, allowing for cancellation and timeouts
Expand Down Expand Up @@ -98,19 +112,43 @@ func (s *SQL) GetBlockHeaders(ctx context.Context, blockHashFrom *chainhash.Hash
ctx, cancel := context.WithCancel(ctx)
defer cancel()

const q = `
WITH RECURSIVE ChainBlocks AS (
SELECT id, parent_id, 1 AS depth
FROM blocks
WHERE hash = $1
UNION ALL
SELECT bb.id, bb.parent_id, cb.depth + 1
FROM blocks bb
JOIN ChainBlocks cb ON bb.id = cb.parent_id
WHERE bb.id != cb.id
AND cb.depth < $2
)
SELECT
// Try the on_main_chain fast path when the start hash is itself on the main
Comment thread
oskarszoon marked this conversation as resolved.
// chain and no rebuild is in flight. The fast path replaces an O(N) recursive
// parent_id walk with a single backward index scan over idx_on_main_chain_height
// — measured ~3-6× faster on small datasets and 10-20× on production-sized DBs.
// Fork tips, unknown hashes, or DB errors fall back to the recursive CTE so the
// CTE remains the authoritative path. Same TOCTOU caveats apply as in
// GetLatestBlockHeaderFromBlockLocator: the guard check and main query are
// non-atomic, but the store's single-writer model bounds staleness to one call.
q, args := s.buildGetBlockHeadersQuery(ctx, blockHashFrom, numberOfHeaders)

rows, err := s.db.QueryContext(ctx, q, args...)
if err != nil {
if errors.Is(err, sql.ErrNoRows) {
return []*model.BlockHeader{}, []*model.BlockHeaderMeta{}, nil
}

return nil, nil, errors.NewStorageError("failed to get headers", err)
}

defer rows.Close()

h, m, err := s.processBlockHeadersRows(rows, numberOfHeaders, false)
if err != nil {
return nil, nil, err
}

cacheOp.Set([2]interface{}{h, m}, cacheTTL)

return h, m, nil
}

// buildGetBlockHeadersQuery returns the SQL query and args for GetBlockHeaders.
// The fast path uses the on_main_chain partial index when the start hash is on
// the main chain. Otherwise the recursive CTE walks parent_id pointers and is
// authoritative for fork tips and rebuilds.
func (s *SQL) buildGetBlockHeadersQuery(ctx context.Context, blockHashFrom *chainhash.Hash, numberOfHeaders uint64) (string, []interface{}) {
const blockColumns = `
b.version
,b.block_time
,b.nonce
Expand All @@ -129,32 +167,56 @@ func (s *SQL) GetBlockHeaders(ctx context.Context, blockHashFrom *chainhash.Hash
,b.subtrees_set
,b.invalid
,b.processed_at
,b.median_time_past
,b.median_time_past`

if s.mainChainRebuilding.Load() == 0 {
var (
onMain bool
startHeight uint32
)
// Resolve start-block height in the probe so the main query binds it as
// a literal parameter. This (a) lets the planner pick the
// idx_on_main_chain_height partial index for the height range, and
// (b) eliminates the intra-query race that a same-query subquery
// evaluated twice would have. Treat any error / missing row / off-main-chain
// as "not eligible" and fall through to the CTE.
if scanErr := s.db.QueryRowContext(ctx,
`SELECT COALESCE(on_main_chain, false), COALESCE(height, 0)
FROM blocks WHERE hash = $1 LIMIT 1`,
blockHashFrom[:],
).Scan(&onMain, &startHeight); scanErr == nil && onMain {
fastPath := `
SELECT` + blockColumns + `
FROM blocks b
JOIN ChainBlocks cb ON b.id = cb.id
WHERE b.on_main_chain = true
AND b.height <= $1
AND b.height > $1 - $2
ORDER BY b.height DESC
LIMIT $2
`

rows, err := s.db.QueryContext(ctx, q, blockHashFrom[:], numberOfHeaders)
if err != nil {
if errors.Is(err, sql.ErrNoRows) {
return []*model.BlockHeader{}, []*model.BlockHeaderMeta{}, nil
return fastPath, []interface{}{startHeight, numberOfHeaders}
}

return nil, nil, errors.NewStorageError("failed to get headers", err)
}

defer rows.Close()

h, m, err := s.processBlockHeadersRows(rows, numberOfHeaders, false)
if err != nil {
return nil, nil, err
}

cacheOp.Set([2]interface{}{h, m}, cacheTTL)

return h, m, nil
cte := `
WITH RECURSIVE ChainBlocks AS (
SELECT id, parent_id, 1 AS depth
FROM blocks
WHERE hash = $1
UNION ALL
SELECT bb.id, bb.parent_id, cb.depth + 1
FROM blocks bb
JOIN ChainBlocks cb ON bb.id = cb.parent_id
WHERE bb.id != cb.id
AND cb.depth < $2
)
SELECT` + blockColumns + `
FROM blocks b
JOIN ChainBlocks cb ON b.id = cb.id
ORDER BY b.height DESC
LIMIT $2
`
return cte, []interface{}{blockHashFrom[:], numberOfHeaders}
}

func (s *SQL) processBlockHeadersRows(rows *sql.Rows, numberOfHeaders uint64, hasCoinbaseColumn bool) ([]*model.BlockHeader, []*model.BlockHeaderMeta, error) {
Expand Down
Loading