Skip to content

Split the block cache into block pointer cache and block data cache #6037

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jun 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions chain/ethereum/src/chain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -400,8 +400,9 @@ impl Chain {
pub async fn block_number(
&self,
hash: &BlockHash,
) -> Result<Option<(String, BlockNumber, Option<u64>, Option<BlockHash>)>, StoreError> {
self.chain_store.block_number(hash).await
) -> Result<Option<(String, BlockNumber, Option<BlockTime>, Option<BlockHash>)>, StoreError>
{
self.chain_store.block_pointer(hash).await
}

// TODO: This is only used to build the block stream which could prolly
Expand Down Expand Up @@ -1130,6 +1131,9 @@ pub struct FirehoseMapper {
impl BlockStreamMapper<Chain> for FirehoseMapper {
fn decode_block(
&self,
// We share the trait with substreams but for firehose the timestamp
// is in the block header so we don't need to use it here.
_timestamp: BlockTime,
output: Option<&[u8]>,
) -> Result<Option<BlockFinality>, BlockStreamError> {
let block = match output {
Expand Down Expand Up @@ -1198,12 +1202,19 @@ impl FirehoseMapperTrait<Chain> for FirehoseMapper {
// Check about adding basic information about the block in the firehose::Response or maybe
// define a slimmed down stuct that would decode only a few fields and ignore all the rest.
let block = codec::Block::decode(any_block.value.as_ref())?;
let timestamp = block
.header()
.timestamp
.map(|ts| BlockTime::since_epoch(ts.seconds, ts.nanos as u32))
.unwrap_or_default();

use firehose::ForkStep::*;
match step {
StepNew => {
// unwrap: Input cannot be None so output will be error or block.
let block = self.decode_block(Some(any_block.value.as_ref()))?.unwrap();
let block = self
.decode_block(timestamp, Some(any_block.value.as_ref()))?
.unwrap();
let block_with_triggers = self.block_with_triggers(logger, block).await?;

Ok(BlockStreamEvent::ProcessBlock(
Expand Down
10 changes: 7 additions & 3 deletions chain/near/src/chain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ use graph::blockchain::client::ChainClient;
use graph::blockchain::firehose_block_ingestor::FirehoseBlockIngestor;
use graph::blockchain::substreams_block_stream::SubstreamsBlockStream;
use graph::blockchain::{
BasicBlockchainBuilder, BlockIngestor, BlockchainBuilder, BlockchainKind, NoopDecoderHook,
NoopRuntimeAdapter, Trigger, TriggerFilterWrapper,
BasicBlockchainBuilder, BlockIngestor, BlockTime, BlockchainBuilder, BlockchainKind,
NoopDecoderHook, NoopRuntimeAdapter, Trigger, TriggerFilterWrapper,
};
use graph::cheap_clone::CheapClone;
use graph::components::network_provider::ChainName;
Expand Down Expand Up @@ -432,6 +432,7 @@ pub struct FirehoseMapper {
impl BlockStreamMapper<Chain> for FirehoseMapper {
fn decode_block(
&self,
_timestamp: BlockTime,
output: Option<&[u8]>,
) -> Result<Option<codec::Block>, BlockStreamError> {
let block = match output {
Expand Down Expand Up @@ -528,7 +529,10 @@ impl FirehoseMapperTrait<Chain> for FirehoseMapper {
// Check about adding basic information about the block in the bstream::BlockResponseV2 or maybe
// define a slimmed down stuct that would decode only a few fields and ignore all the rest.
// unwrap: Input cannot be None so output will be error or block.
let block = self.decode_block(Some(any_block.value.as_ref()))?.unwrap();
let block = self
// the block time is inside the block.
.decode_block(BlockTime::MIN, Some(any_block.value.as_ref()))?
.unwrap();

use ForkStep::*;
match step {
Expand Down
3 changes: 2 additions & 1 deletion chain/substreams/src/chain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ pub enum ParsedChanges {
pub struct Block {
pub hash: BlockHash,
pub number: BlockNumber,
pub timestamp: BlockTime,
pub changes: EntityChanges,
pub parsed_changes: Vec<ParsedChanges>,
}
Expand All @@ -60,7 +61,7 @@ impl blockchain::Block for Block {
}

fn timestamp(&self) -> BlockTime {
BlockTime::NONE
self.timestamp
}
}

Expand Down
14 changes: 12 additions & 2 deletions chain/substreams/src/mapper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ pub struct WasmBlockMapper {
impl BlockStreamMapper<Chain> for WasmBlockMapper {
fn decode_block(
&self,
_timestamp: BlockTime,
_output: Option<&[u8]>,
) -> Result<Option<crate::Block>, BlockStreamError> {
unreachable!("WasmBlockMapper does not do block decoding")
Expand Down Expand Up @@ -104,7 +105,11 @@ pub struct Mapper {

#[async_trait]
impl BlockStreamMapper<Chain> for Mapper {
fn decode_block(&self, output: Option<&[u8]>) -> Result<Option<Block>, BlockStreamError> {
fn decode_block(
&self,
timestamp: BlockTime,
output: Option<&[u8]>,
) -> Result<Option<Block>, BlockStreamError> {
let changes: EntityChanges = match output {
Some(msg) => Message::decode(msg).map_err(SubstreamsError::DecodingError)?,
None => EntityChanges {
Expand All @@ -125,6 +130,7 @@ impl BlockStreamMapper<Chain> for Mapper {
number,
changes,
parsed_changes,
timestamp,
};

Ok(Some(block))
Expand Down Expand Up @@ -152,9 +158,13 @@ impl BlockStreamMapper<Chain> for Mapper {
) -> Result<BlockStreamEvent<Chain>, BlockStreamError> {
let block_number: BlockNumber = clock.number.try_into().map_err(Error::from)?;
let block_hash = clock.id.as_bytes().to_vec().into();
let timestamp = clock
.timestamp
.map(|ts| BlockTime::since_epoch(ts.seconds, ts.nanos as u32))
.unwrap_or_default();

let block = self
.decode_block(Some(&block))?
.decode_block(timestamp, Some(&block))?
.ok_or_else(|| anyhow!("expected block to not be empty"))?;

let block = self.block_with_triggers(logger, block).await.map(|bt| {
Expand Down
6 changes: 5 additions & 1 deletion graph/src/blockchain/block_stream.rs
Original file line number Diff line number Diff line change
Expand Up @@ -685,7 +685,11 @@ pub trait FirehoseMapper<C: Blockchain>: Send + Sync {

#[async_trait]
pub trait BlockStreamMapper<C: Blockchain>: Send + Sync {
fn decode_block(&self, output: Option<&[u8]>) -> Result<Option<C::Block>, BlockStreamError>;
fn decode_block(
&self,
timestamp: BlockTime,
output: Option<&[u8]>,
) -> Result<Option<C::Block>, BlockStreamError>;

async fn block_with_triggers(
&self,
Expand Down
5 changes: 3 additions & 2 deletions graph/src/blockchain/mock.rs
Original file line number Diff line number Diff line change
Expand Up @@ -546,10 +546,11 @@ impl ChainStore for MockChainStore {
fn confirm_block_hash(&self, _number: BlockNumber, _hash: &BlockHash) -> Result<usize, Error> {
unimplemented!()
}
async fn block_number(
async fn block_pointer(
&self,
_hash: &BlockHash,
) -> Result<Option<(String, BlockNumber, Option<u64>, Option<BlockHash>)>, StoreError> {
) -> Result<Option<(String, BlockNumber, Option<BlockTime>, Option<BlockHash>)>, StoreError>
{
unimplemented!()
}
async fn block_numbers(
Expand Down
76 changes: 63 additions & 13 deletions graph/src/blockchain/types.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use anyhow::anyhow;
use chrono::DateTime;
use diesel::deserialize::FromSql;
use diesel::pg::Pg;
use diesel::serialize::{Output, ToSql};
Expand All @@ -7,6 +8,7 @@ use diesel::sql_types::{Bytea, Nullable, Text};
use diesel_derives::{AsExpression, FromSqlRow};
use serde::{Deserialize, Deserializer};
use std::convert::TryFrom;
use std::num::ParseIntError;
use std::time::Duration;
use std::{fmt, str::FromStr};
use web3::types::{Block, H256, U256, U64};
Expand All @@ -16,9 +18,9 @@ use crate::components::store::BlockNumber;
use crate::data::graphql::IntoValue;
use crate::data::store::scalar::Timestamp;
use crate::derive::CheapClone;
use crate::object;
use crate::prelude::{r, Value};
use crate::util::stable_hash_glue::{impl_stable_hash, AsBytes};
use crate::{bail, object};

/// A simple marker for byte arrays that are really block hashes
#[derive(Clone, Default, PartialEq, Eq, Hash, FromSqlRow, AsExpression)]
Expand Down Expand Up @@ -477,10 +479,7 @@ impl TryFrom<(Option<H256>, Option<U64>, H256, U256)> for ExtendedBlockPtr {
let block_number =
i32::try_from(number).map_err(|_| anyhow!("Block number out of range"))?;

// Convert `U256` to `BlockTime`
let secs =
i64::try_from(timestamp_u256).map_err(|_| anyhow!("Timestamp out of range for i64"))?;
let block_time = BlockTime::since_epoch(secs, 0);
let block_time = BlockTime::try_from(timestamp_u256)?;

Ok(ExtendedBlockPtr {
hash: hash.into(),
Expand All @@ -497,16 +496,13 @@ impl TryFrom<(H256, i32, H256, U256)> for ExtendedBlockPtr {
fn try_from(tuple: (H256, i32, H256, U256)) -> Result<Self, Self::Error> {
let (hash, block_number, parent_hash, timestamp_u256) = tuple;

// Convert `U256` to `BlockTime`
let secs =
i64::try_from(timestamp_u256).map_err(|_| anyhow!("Timestamp out of range for i64"))?;
let block_time = BlockTime::since_epoch(secs, 0);
let timestamp = BlockTime::try_from(timestamp_u256)?;

Ok(ExtendedBlockPtr {
hash: hash.into(),
number: block_number,
parent_hash: parent_hash.into(),
timestamp: block_time,
timestamp,
})
}
}
Expand Down Expand Up @@ -562,14 +558,63 @@ impl fmt::Display for ChainIdentifier {
#[diesel(sql_type = Timestamptz)]
pub struct BlockTime(Timestamp);

impl Default for BlockTime {
fn default() -> Self {
BlockTime::NONE
}
}

impl TryFrom<BlockTime> for U256 {
type Error = anyhow::Error;

fn try_from(value: BlockTime) -> Result<Self, Self::Error> {
if value.as_secs_since_epoch() < 0 {
bail!("unable to convert block time into U256");
}

Ok(U256::from(value.as_secs_since_epoch() as u64))
}
}

impl TryFrom<U256> for BlockTime {
type Error = anyhow::Error;

fn try_from(value: U256) -> Result<Self, Self::Error> {
i64::try_from(value)
.map_err(|_| anyhow!("Timestamp out of range for i64"))
.map(|ts| BlockTime::since_epoch(ts, 0))
}
}

impl TryFrom<Option<String>> for BlockTime {
type Error = ParseIntError;

fn try_from(ts: Option<String>) -> Result<Self, Self::Error> {
match ts {
Some(str) => return BlockTime::from_hex_str(&str),
None => return Ok(BlockTime::NONE),
};
}
}

impl BlockTime {
/// A timestamp from a long long time ago used to indicate that we don't
/// have a timestamp
pub const NONE: Self = Self(Timestamp::NONE);
pub const NONE: Self = Self::MIN;

pub const MAX: Self = Self(Timestamp::MAX);

pub const MIN: Self = Self(Timestamp::MIN);
pub const MIN: Self = Self(Timestamp(DateTime::from_timestamp_nanos(0)));

pub fn from_hex_str(ts: &str) -> Result<Self, ParseIntError> {
let (radix, idx) = if ts.starts_with("0x") {
(16, 2)
} else {
(10, 0)
};

u64::from_str_radix(&ts[idx..], radix).map(|ts| BlockTime::since_epoch(ts as i64, 0))
}

/// Construct a block time that is the given number of seconds and
/// nanoseconds after the Unix epoch
Expand All @@ -586,7 +631,12 @@ impl BlockTime {
/// hourly rollups in tests
#[cfg(debug_assertions)]
pub fn for_test(ptr: &BlockPtr) -> Self {
Self::since_epoch(ptr.number as i64 * 45 * 60, 0)
Self::for_test_number(&ptr.number)
}

#[cfg(debug_assertions)]
pub fn for_test_number(number: &BlockNumber) -> Self {
Self::since_epoch(*number as i64 * 45 * 60, 0)
}

pub fn as_secs_since_epoch(&self) -> i64 {
Expand Down
8 changes: 4 additions & 4 deletions graph/src/components/store/traits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -576,10 +576,10 @@ pub trait ChainStore: ChainHeadStore {
/// Currently, the timestamp is only returned if it's present in the top level block. This format is
/// depends on the chain and the implementation of Blockchain::Block for the specific chain.
/// eg: {"block": { "timestamp": 123123123 } }
async fn block_number(
async fn block_pointer(
&self,
hash: &BlockHash,
) -> Result<Option<(String, BlockNumber, Option<u64>, Option<BlockHash>)>, StoreError>;
) -> Result<Option<(String, BlockNumber, Option<BlockTime>, Option<BlockHash>)>, StoreError>;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are all these Option still justified? I think they will all always be Some. It would also be nicer to have a struct for this. Maybe call it BlockPointer since it's one row from that table (and BlockPtr is than a small excerpt from that)

Also, this method should be renamed to block_pointer

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's not always a timestamp, on the shared storage model it still can be None

The option BlockTime is a little weird but I kept it because there is a different between Some(epoch time) and None, it's more idiomatic to have Option than checking BlockTime == BlockTime::NONE or MIN which are also in fact the same value (I didn't really get why).


/// Do the same lookup as `block_number`, but in bulk
async fn block_numbers(
Expand Down Expand Up @@ -665,10 +665,10 @@ pub trait QueryStore: Send + Sync {
/// Returns the blocknumber, timestamp and the parentHash. Timestamp depends on the chain block type
/// and can have multiple formats, it can also not be prevent. For now this is only available
/// for EVM chains both firehose and rpc.
async fn block_number_with_timestamp_and_parent_hash(
async fn block_pointer(
&self,
block_hash: &BlockHash,
) -> Result<Option<(BlockNumber, Option<u64>, Option<BlockHash>)>, StoreError>;
) -> Result<Option<(BlockNumber, Option<BlockTime>, Option<BlockHash>)>, StoreError>;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And this could also just be called block_pointer


fn wait_stats(&self) -> PoolWaitStats;

Expand Down
4 changes: 2 additions & 2 deletions graphql/src/runner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,11 +111,11 @@ where
let latest_block = match store.block_ptr().await.ok().flatten() {
Some(block) => Some(LatestBlockInfo {
timestamp: store
.block_number_with_timestamp_and_parent_hash(&block.hash)
.block_pointer(&block.hash)
.await
.ok()
.flatten()
.and_then(|(_, t, _)| t),
.and_then(|(_, t, _)| t.map(|ts| ts.as_secs_since_epoch() as u64)),
hash: block.hash,
number: block.number,
}),
Expand Down
4 changes: 2 additions & 2 deletions graphql/src/store/resolver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ impl StoreResolver {
let (timestamp, parent_hash) = if lookup_needed(field) {
match self
.store
.block_number_with_timestamp_and_parent_hash(&block_ptr.hash)
.block_pointer(&block_ptr.hash)
.await
.map_err(Into::<QueryExecutionError>::into)?
{
Expand Down Expand Up @@ -219,7 +219,7 @@ impl StoreResolver {
.unwrap_or(r::Value::Null);

let timestamp = timestamp
.map(|ts| r::Value::Int(ts as i64))
.map(|ts| r::Value::Int(ts.as_secs_since_epoch()))
.unwrap_or(r::Value::Null);

let parent_hash = parent_hash
Expand Down
40 changes: 40 additions & 0 deletions justfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
DATABASE_TEST_VAR_NAME := "THEGRAPH_STORE_POSTGRES_DIESEL_URL"
DATABASE_URL := "postgresql://graph-node:let-me-in@localhost:5432/graph-node"

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's a justfile? This should be your local file, not something in the repo

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is similar to a make file, it's intentionally to be in the repo, provides some shortcuts for common operations, you don't need to use it yourself but it's useful to have for others

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


help:
@just -l

local-deps-up *ARGS:
docker compose -f docker/docker-compose.yml up ipfs postgres {{ ARGS }}

local-deps-down:
docker compose -f docker/docker-compose.yml down

test-deps-up *ARGS:
docker compose -f tests/docker-compose.yml up {{ ARGS }}

test-deps-down:
docker compose -f tests/docker-compose.yml down

# Requires local-deps, see local-deps-up
test *ARGS:
just _run_in_bash cargo test --workspace --exclude graph-tests -- --nocapture {{ ARGS }}

runner-test *ARGS:
just _run_in_bash cargo test -p graph-tests --test runner_tests -- --nocapture {{ ARGS }}

# Requires test-deps to be running, see test-deps-up
it-test *ARGS:
just _run_in_bash cargo test --test integration_tests -- --nocapture {{ ARGS }}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These can be just aliases in ~/.cargo/config.toml. I have e.g.

[alias]
store = "test -p graph-store-postgres"
tst = "test --workspace --exclude graph-tests"
docs = "doc --workspace --document-private-items"
gm = "install --bin graphman --path node --locked"
gmt = "install --bin graphman --path node --locked --root /var/tmp/cargo"
rt = "test -p graph-tests --test runner_tests"
it = "test -p graph-tests --test integration_tests -- --nocapture"

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and that's local, this works for everyone.


local-rm-db:
rm -r docker/data/postgres

new-migration NAME:
diesel migration generate {{ NAME }} --migration-dir store/postgres/migrations/

_run_in_bash *CMD:
#!/usr/bin/env bash
export {{ DATABASE_TEST_VAR_NAME }}={{ DATABASE_URL }}
{{ CMD }}
Loading
Loading