Skip to content

Split the block cache into block pointer cache and block data cache #6037

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jun 7, 2025
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions chain/ethereum/src/chain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -400,7 +400,8 @@ impl Chain {
pub async fn block_number(
&self,
hash: &BlockHash,
) -> Result<Option<(String, BlockNumber, Option<u64>, Option<BlockHash>)>, StoreError> {
) -> Result<Option<(String, BlockNumber, Option<BlockTime>, Option<BlockHash>)>, StoreError>
{
self.chain_store.block_number(hash).await
}

Expand Down Expand Up @@ -1130,6 +1131,9 @@ pub struct FirehoseMapper {
impl BlockStreamMapper<Chain> for FirehoseMapper {
fn decode_block(
&self,
// We share the trait with substreams but for firehose the timestamp
// is in the block header so we don't need to use it here.
_timestamp: BlockTime,
output: Option<&[u8]>,
) -> Result<Option<BlockFinality>, BlockStreamError> {
let block = match output {
Expand Down Expand Up @@ -1198,12 +1202,19 @@ impl FirehoseMapperTrait<Chain> for FirehoseMapper {
// Check about adding basic information about the block in the firehose::Response or maybe
// define a slimmed down stuct that would decode only a few fields and ignore all the rest.
let block = codec::Block::decode(any_block.value.as_ref())?;
let timestamp = block
.header()
.timestamp
.map(|ts| BlockTime::since_epoch(ts.seconds, ts.nanos as u32))
.unwrap_or_default();

use firehose::ForkStep::*;
match step {
StepNew => {
// unwrap: Input cannot be None so output will be error or block.
let block = self.decode_block(Some(any_block.value.as_ref()))?.unwrap();
let block = self
.decode_block(timestamp, Some(any_block.value.as_ref()))?
.unwrap();
let block_with_triggers = self.block_with_triggers(logger, block).await?;

Ok(BlockStreamEvent::ProcessBlock(
Expand Down
10 changes: 7 additions & 3 deletions chain/near/src/chain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ use graph::blockchain::client::ChainClient;
use graph::blockchain::firehose_block_ingestor::FirehoseBlockIngestor;
use graph::blockchain::substreams_block_stream::SubstreamsBlockStream;
use graph::blockchain::{
BasicBlockchainBuilder, BlockIngestor, BlockchainBuilder, BlockchainKind, NoopDecoderHook,
NoopRuntimeAdapter, Trigger, TriggerFilterWrapper,
BasicBlockchainBuilder, BlockIngestor, BlockTime, BlockchainBuilder, BlockchainKind,
NoopDecoderHook, NoopRuntimeAdapter, Trigger, TriggerFilterWrapper,
};
use graph::cheap_clone::CheapClone;
use graph::components::network_provider::ChainName;
Expand Down Expand Up @@ -432,6 +432,7 @@ pub struct FirehoseMapper {
impl BlockStreamMapper<Chain> for FirehoseMapper {
fn decode_block(
&self,
_timestamp: BlockTime,
output: Option<&[u8]>,
) -> Result<Option<codec::Block>, BlockStreamError> {
let block = match output {
Expand Down Expand Up @@ -528,7 +529,10 @@ impl FirehoseMapperTrait<Chain> for FirehoseMapper {
// Check about adding basic information about the block in the bstream::BlockResponseV2 or maybe
// define a slimmed down stuct that would decode only a few fields and ignore all the rest.
// unwrap: Input cannot be None so output will be error or block.
let block = self.decode_block(Some(any_block.value.as_ref()))?.unwrap();
let block = self
// the block time is inside the block.
.decode_block(BlockTime::MIN, Some(any_block.value.as_ref()))?
.unwrap();

use ForkStep::*;
match step {
Expand Down
3 changes: 2 additions & 1 deletion chain/substreams/src/chain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ pub enum ParsedChanges {
pub struct Block {
pub hash: BlockHash,
pub number: BlockNumber,
pub timestamp: BlockTime,
pub changes: EntityChanges,
pub parsed_changes: Vec<ParsedChanges>,
}
Expand All @@ -60,7 +61,7 @@ impl blockchain::Block for Block {
}

fn timestamp(&self) -> BlockTime {
BlockTime::NONE
self.timestamp
}
}

Expand Down
14 changes: 12 additions & 2 deletions chain/substreams/src/mapper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ pub struct WasmBlockMapper {
impl BlockStreamMapper<Chain> for WasmBlockMapper {
fn decode_block(
&self,
_timestamp: BlockTime,
_output: Option<&[u8]>,
) -> Result<Option<crate::Block>, BlockStreamError> {
unreachable!("WasmBlockMapper does not do block decoding")
Expand Down Expand Up @@ -104,7 +105,11 @@ pub struct Mapper {

#[async_trait]
impl BlockStreamMapper<Chain> for Mapper {
fn decode_block(&self, output: Option<&[u8]>) -> Result<Option<Block>, BlockStreamError> {
fn decode_block(
&self,
timestamp: BlockTime,
output: Option<&[u8]>,
) -> Result<Option<Block>, BlockStreamError> {
let changes: EntityChanges = match output {
Some(msg) => Message::decode(msg).map_err(SubstreamsError::DecodingError)?,
None => EntityChanges {
Expand All @@ -125,6 +130,7 @@ impl BlockStreamMapper<Chain> for Mapper {
number,
changes,
parsed_changes,
timestamp,
};

Ok(Some(block))
Expand Down Expand Up @@ -152,9 +158,13 @@ impl BlockStreamMapper<Chain> for Mapper {
) -> Result<BlockStreamEvent<Chain>, BlockStreamError> {
let block_number: BlockNumber = clock.number.try_into().map_err(Error::from)?;
let block_hash = clock.id.as_bytes().to_vec().into();
let timestamp = clock
.timestamp
.map(|ts| BlockTime::since_epoch(ts.seconds, ts.nanos as u32))
.unwrap_or_default();

let block = self
.decode_block(Some(&block))?
.decode_block(timestamp, Some(&block))?
.ok_or_else(|| anyhow!("expected block to not be empty"))?;

let block = self.block_with_triggers(logger, block).await.map(|bt| {
Expand Down
6 changes: 5 additions & 1 deletion graph/src/blockchain/block_stream.rs
Original file line number Diff line number Diff line change
Expand Up @@ -685,7 +685,11 @@ pub trait FirehoseMapper<C: Blockchain>: Send + Sync {

#[async_trait]
pub trait BlockStreamMapper<C: Blockchain>: Send + Sync {
fn decode_block(&self, output: Option<&[u8]>) -> Result<Option<C::Block>, BlockStreamError>;
fn decode_block(
&self,
timestamp: BlockTime,
output: Option<&[u8]>,
) -> Result<Option<C::Block>, BlockStreamError>;

async fn block_with_triggers(
&self,
Expand Down
3 changes: 2 additions & 1 deletion graph/src/blockchain/mock.rs
Original file line number Diff line number Diff line change
Expand Up @@ -549,7 +549,8 @@ impl ChainStore for MockChainStore {
async fn block_number(
&self,
_hash: &BlockHash,
) -> Result<Option<(String, BlockNumber, Option<u64>, Option<BlockHash>)>, StoreError> {
) -> Result<Option<(String, BlockNumber, Option<BlockTime>, Option<BlockHash>)>, StoreError>
{
unimplemented!()
}
async fn block_numbers(
Expand Down
84 changes: 69 additions & 15 deletions graph/src/blockchain/types.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use anyhow::anyhow;
use chrono::DateTime;
use diesel::deserialize::FromSql;
use diesel::pg::Pg;
use diesel::serialize::{Output, ToSql};
Expand All @@ -7,6 +8,7 @@ use diesel::sql_types::{Bytea, Nullable, Text};
use diesel_derives::{AsExpression, FromSqlRow};
use serde::{Deserialize, Deserializer};
use std::convert::TryFrom;
use std::num::ParseIntError;
use std::time::Duration;
use std::{fmt, str::FromStr};
use web3::types::{Block, H256, U256, U64};
Expand All @@ -16,9 +18,9 @@ use crate::components::store::BlockNumber;
use crate::data::graphql::IntoValue;
use crate::data::store::scalar::Timestamp;
use crate::derive::CheapClone;
use crate::object;
use crate::prelude::{r, Value};
use crate::util::stable_hash_glue::{impl_stable_hash, AsBytes};
use crate::{bail, object};

/// A simple marker for byte arrays that are really block hashes
#[derive(Clone, Default, PartialEq, Eq, Hash, FromSqlRow, AsExpression)]
Expand Down Expand Up @@ -477,10 +479,7 @@ impl TryFrom<(Option<H256>, Option<U64>, H256, U256)> for ExtendedBlockPtr {
let block_number =
i32::try_from(number).map_err(|_| anyhow!("Block number out of range"))?;

// Convert `U256` to `BlockTime`
let secs =
i64::try_from(timestamp_u256).map_err(|_| anyhow!("Timestamp out of range for i64"))?;
let block_time = BlockTime::since_epoch(secs, 0);
let block_time = BlockTime::try_from(timestamp_u256)?;

Ok(ExtendedBlockPtr {
hash: hash.into(),
Expand All @@ -497,16 +496,13 @@ impl TryFrom<(H256, i32, H256, U256)> for ExtendedBlockPtr {
fn try_from(tuple: (H256, i32, H256, U256)) -> Result<Self, Self::Error> {
let (hash, block_number, parent_hash, timestamp_u256) = tuple;

// Convert `U256` to `BlockTime`
let secs =
i64::try_from(timestamp_u256).map_err(|_| anyhow!("Timestamp out of range for i64"))?;
let block_time = BlockTime::since_epoch(secs, 0);
let timestamp = BlockTime::try_from(timestamp_u256)?;

Ok(ExtendedBlockPtr {
hash: hash.into(),
number: block_number,
parent_hash: parent_hash.into(),
timestamp: block_time,
timestamp,
})
}
}
Expand Down Expand Up @@ -562,14 +558,67 @@ impl fmt::Display for ChainIdentifier {
#[diesel(sql_type = Timestamptz)]
pub struct BlockTime(Timestamp);

impl Default for BlockTime {
fn default() -> Self {
BlockTime::NONE
}
}

impl TryFrom<BlockTime> for U256 {
type Error = anyhow::Error;

fn try_from(value: BlockTime) -> Result<Self, Self::Error> {
if value.as_secs_since_epoch() < 0 {
bail!("unable to convert block time into U256");
}

Ok(U256::from(value.as_secs_since_epoch() as u64))
}
}

impl TryFrom<U256> for BlockTime {
type Error = anyhow::Error;

fn try_from(value: U256) -> Result<Self, Self::Error> {
i64::try_from(value)
.map_err(|_| anyhow!("Timestamp out of range for i64"))
.map(|ts| BlockTime::since_epoch(ts, 0))
}
}

impl TryFrom<Option<String>> for BlockTime {
type Error = ParseIntError;

fn try_from(ts: Option<String>) -> Result<Self, Self::Error> {
match ts {
Some(str) => return BlockTime::from_str(&str),
None => return Ok(BlockTime::NONE),
};
}
}

impl FromStr for BlockTime {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This impl is very unintuitive to me, that parsing a string will try to interpret the string as a hex/decimal number.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's how it was used I just move the implementation somewhere that was easier to find. The previous function was try_parse_timestamp or something similar. If it's the naming I can change it a method?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

renamed function

type Err = ParseIntError;

fn from_str(ts: &str) -> Result<Self, Self::Err> {
let (radix, idx) = if ts.starts_with("0x") {
(16, 2)
} else {
(10, 0)
};

u64::from_str_radix(&ts[idx..], radix).map(|ts| BlockTime::since_epoch(ts as i64, 0))
}
}

impl BlockTime {
/// A timestamp from a long long time ago used to indicate that we don't
/// have a timestamp
pub const NONE: Self = Self(Timestamp::NONE);
// /// A timestamp from a long long time ago used to indicate that we don't
// /// have a timestamp
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like some extra comment signs snuck in

pub const NONE: Self = Self::MIN;

pub const MAX: Self = Self(Timestamp::MAX);

pub const MIN: Self = Self(Timestamp::MIN);
pub const MIN: Self = Self(Timestamp(DateTime::from_timestamp_nanos(0)));

/// Construct a block time that is the given number of seconds and
/// nanoseconds after the Unix epoch
Expand All @@ -586,7 +635,12 @@ impl BlockTime {
/// hourly rollups in tests
#[cfg(debug_assertions)]
pub fn for_test(ptr: &BlockPtr) -> Self {
Self::since_epoch(ptr.number as i64 * 45 * 60, 0)
Self::for_test_number(&ptr.number)
}

#[cfg(debug_assertions)]
pub fn for_test_number(number: &BlockNumber) -> Self {
Self::since_epoch(*number as i64 * 45 * 60, 0)
}

pub fn as_secs_since_epoch(&self) -> i64 {
Expand Down
4 changes: 2 additions & 2 deletions graph/src/components/store/traits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -579,7 +579,7 @@ pub trait ChainStore: ChainHeadStore {
async fn block_number(
&self,
hash: &BlockHash,
) -> Result<Option<(String, BlockNumber, Option<u64>, Option<BlockHash>)>, StoreError>;
) -> Result<Option<(String, BlockNumber, Option<BlockTime>, Option<BlockHash>)>, StoreError>;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are all these Option still justified? I think they will all always be Some. It would also be nicer to have a struct for this. Maybe call it BlockPointer since it's one row from that table (and BlockPtr is than a small excerpt from that)

Also, this method should be renamed to block_pointer

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's not always a timestamp, on the shared storage model it still can be None

The option BlockTime is a little weird but I kept it because there is a different between Some(epoch time) and None, it's more idiomatic to have Option than checking BlockTime == BlockTime::NONE or MIN which are also in fact the same value (I didn't really get why).


/// Do the same lookup as `block_number`, but in bulk
async fn block_numbers(
Expand Down Expand Up @@ -668,7 +668,7 @@ pub trait QueryStore: Send + Sync {
async fn block_number_with_timestamp_and_parent_hash(
&self,
block_hash: &BlockHash,
) -> Result<Option<(BlockNumber, Option<u64>, Option<BlockHash>)>, StoreError>;
) -> Result<Option<(BlockNumber, Option<BlockTime>, Option<BlockHash>)>, StoreError>;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And this could also just be called block_pointer


fn wait_stats(&self) -> PoolWaitStats;

Expand Down
2 changes: 1 addition & 1 deletion graph/src/data_source/offchain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ impl DataSource {
data_source::MappingTrigger::Offchain(trigger.clone()),
self.mapping.handler.clone(),
BlockPtr::new(Default::default(), self.creation_block.unwrap_or(0)),
BlockTime::NONE,
BlockTime::MIN,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why that change here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

from testing, I'll revert, it's the exact same value, not sure why either

))
}

Expand Down
2 changes: 1 addition & 1 deletion graphql/src/runner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ where
.await
.ok()
.flatten()
.and_then(|(_, t, _)| t),
.and_then(|(_, t, _)| t.map(|ts| ts.as_secs_since_epoch() as u64)),
hash: block.hash,
number: block.number,
}),
Expand Down
2 changes: 1 addition & 1 deletion graphql/src/store/resolver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ impl StoreResolver {
.unwrap_or(r::Value::Null);

let timestamp = timestamp
.map(|ts| r::Value::Int(ts as i64))
.map(|ts| r::Value::Int(ts.as_secs_since_epoch()))
.unwrap_or(r::Value::Null);

let parent_hash = parent_hash
Expand Down
40 changes: 40 additions & 0 deletions justfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
DATABASE_TEST_VAR_NAME := "THEGRAPH_STORE_POSTGRES_DIESEL_URL"
DATABASE_URL := "postgresql://graph-node:let-me-in@localhost:5432/graph-node"

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's a justfile? This should be your local file, not something in the repo

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is similar to a make file, it's intentionally to be in the repo, provides some shortcuts for common operations, you don't need to use it yourself but it's useful to have for others

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


help:
@just -l

local-deps-up *ARGS:
docker compose -f docker/docker-compose.yml up ipfs postgres {{ ARGS }}

local-deps-down:
docker compose -f docker/docker-compose.yml down

test-deps-up *ARGS:
docker compose -f tests/docker-compose.yml up {{ ARGS }}

test-deps-down:
docker compose -f tests/docker-compose.yml down

# Requires local-deps, see local-deps-up
test *ARGS:
just _run_in_bash cargo test --workspace --exclude graph-tests -- --nocapture {{ ARGS }}

runner-test *ARGS:
just _run_in_bash cargo test -p graph-tests --test runner_tests -- --nocapture {{ ARGS }}

# Requires test-deps to be running, see test-deps-up
it-test *ARGS:
just _run_in_bash cargo test --test integration_tests -- --nocapture {{ ARGS }}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These can be just aliases in ~/.cargo/config.toml. I have e.g.

[alias]
store = "test -p graph-store-postgres"
tst = "test --workspace --exclude graph-tests"
docs = "doc --workspace --document-private-items"
gm = "install --bin graphman --path node --locked"
gmt = "install --bin graphman --path node --locked --root /var/tmp/cargo"
rt = "test -p graph-tests --test runner_tests"
it = "test -p graph-tests --test integration_tests -- --nocapture"

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and that's local, this works for everyone.


local-rm-db:
rm -r docker/data/postgres

new-migration NAME:
diesel migration generate {{ NAME }} --migration-dir store/postgres/migrations/

_run_in_bash *CMD:
#!/usr/bin/env bash
export {{ DATABASE_TEST_VAR_NAME }}={{ DATABASE_URL }}
{{ CMD }}
2 changes: 1 addition & 1 deletion server/index-node/src/resolver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ impl<S: Store> IndexNodeResolver<S> {
block: object! {
hash: cached_call.block_ptr.hash.hash_hex(),
number: cached_call.block_ptr.number,
timestamp: timestamp,
timestamp: timestamp.map(|ts| ts.as_secs_since_epoch() as u64),
},
contractAddress: &cached_call.contract_address[..],
returnValue: &cached_call.return_value[..],
Expand Down
Loading