Skip to content

Add bao-http-tool example #43

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions bao-http-tool/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
[package]
name = "http-tool"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
anyhow = "1.0.82"
bao-tree = "0.13.0"
clap = { version = "4.5.4", features = ["derive"] }
iroh-io = { version = "0.6.0", features = ["x-http", "stats", "tokio-util"] }
range-collections = "0.4.5"
tokio = { version = "1.37.0", features = ["full"] }
tokio-util = "0.7.10"
tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
url = "2.5.0"
27 changes: 27 additions & 0 deletions bao-http-tool/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Validate external data using external outboard

## Usage

Create an outboard for an asset that is available via http:

```
❯ bao-http-tool generate --data http://127.0.0.1:3003/370e2b3002be3b38b120f7b3be53da4cf646810e26f8f4a5018247c8188af5b2.data --block-size-log 4
Computing outboard for http://127.0.0.1:3003/370e2b3002be3b38b120f7b3be53da4cf646810e26f8f4a5018247c8188af5b2.data of size 100000000 with block log 4, size 16384
Computed hash: 370e2b3002be3b38b120f7b3be53da4cf646810e26f8f4a5018247c8188af5b2
Writing outboard to 370e2b3002be3b38b120f7b3be53da4cf646810e26f8f4a5018247c8188af5b2.obao4
```

Validate a range of the file:

```
validate --hash 370e2b3002be3b38b120f7b3be53da4cf646810e26f8f4a5018247c8188af5b2 --block-size-log 4 --data http://127.0.0.1:3003/370e2b3002be3b38b120f7b3be53da4cf646810e26f8f4a5018247c8188af5b2.data --outboard 370e2b3002be3b38b120f7b3be53da4cf646810e26f8f4a5018247c8188af5b2.obao4 --range 0..10000000
Size: 100000000
Outboard: 390592
Hash: 370e2b3002be3b38b120f7b3be53da4cf646810e26f8f4a5018247c8188af5b2
Block size: BlockSize(4)
Byte ranges: RangeSet{0..10000000}
Chunk ranges: RangeSet{0..9766}
confirmed that range RangeSet{0..10000000} of http://127.0.0.1:3003/370e2b3002be3b38b120f7b3be53da4cf646810e26f8f4a5018247c8188af5b2.data matches 370e2b3002be3b38b120f7b3be53da4cf646810e26f8f4a5018247c8188af5b2
```

Note that for validation both data **and** outboard can be available via http.
74 changes: 74 additions & 0 deletions bao-http-tool/src/args.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
use std::{fmt::Display, path::PathBuf, str::FromStr};

use bao_tree::blake3::Hash;
use clap::Parser;
use url::Url;

#[derive(Debug, Parser)]
pub struct Args {
#[clap(subcommand)]
pub subcommand: SubCommand,
}

#[derive(Debug, Parser)]
pub enum SubCommand {
Validate(ValidateArgs),
Generate(GenerateArgs),
}

#[derive(Debug, Parser)]
pub struct ValidateArgs {
#[clap(long, help = "Hash of the data")]
pub hash: Hash,

#[clap(long, help = "URL or local path to the data")]
pub data: PathOrUrl,

#[clap(long, help = "URL or local path to the outboard")]
pub outboard: PathOrUrl,

#[clap(long, default_value_t = 0, help = "Block size in log2(bytes)")]
pub block_size_log: u8,

#[clap(long, help = "Range of the data to read")]
pub range: Option<String>,
}

#[derive(Debug, Parser)]
pub struct GenerateArgs {
#[clap(long, help = "URL to the data")]
pub data: Url,

#[clap(long, help = "path where to create the outboard")]
pub target: Option<PathBuf>,

#[clap(long, default_value_t = 0, help = "Block size in log2(bytes)")]
pub block_size_log: u8,
}

#[derive(Debug, Clone)]
pub enum PathOrUrl {
Path(std::path::PathBuf),
Url(Url),
}

impl Display for PathOrUrl {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
PathOrUrl::Path(path) => write!(f, "{}", path.display()),
PathOrUrl::Url(url) => write!(f, "{}", url),
}
}
}

impl FromStr for PathOrUrl {
type Err = anyhow::Error;

fn from_str(s: &str) -> Result<Self, Self::Err> {
if let Ok(url) = Url::from_str(s) {
Ok(PathOrUrl::Url(url))
} else {
Ok(PathOrUrl::Path(std::path::PathBuf::from(s)))
}
}
}
116 changes: 116 additions & 0 deletions bao-http-tool/src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
use std::io::Cursor;

use bao_tree::{
io::{
fsm::{encode_ranges_validated, outboard_post_order},
outboard::PreOrderOutboard,
round_up_to_chunks,
},
BaoTree, BlockSize, ByteRanges,
};
use clap::Parser;
use iroh_io::{AsyncSliceReader, HttpAdapter};
use tokio_util::either::Either;
mod args;
mod outboard;
use args::{GenerateArgs, PathOrUrl, SubCommand, ValidateArgs};
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter};

use crate::args::Args;

type Reader = Either<iroh_io::HttpAdapter, iroh_io::File>;

async fn open(path: PathOrUrl) -> anyhow::Result<Reader> {
Ok(match path {
PathOrUrl::Url(url) => Either::Left(iroh_io::HttpAdapter::new(url)),
PathOrUrl::Path(path) => Either::Right(iroh_io::File::open(path).await?),
})
}

async fn generate(args: GenerateArgs) -> anyhow::Result<()> {
let mut outboard = Vec::new();
let mut data = HttpAdapter::new(args.data.clone());
let block_size = BlockSize::from_chunk_log(args.block_size_log);
let size = data.size().await?;
let tree = BaoTree::new(size, block_size);
println!(
"Computing outboard for {} of size {} with block log {}, size {}",
args.data,
size,
block_size.chunk_log(),
block_size.bytes()
);
let hash = outboard_post_order(Cursor::new(data), tree, &mut outboard).await?;
println!("Computed hash: {}", hash.to_hex());
let filename = args
.target
.unwrap_or_else(|| format!("{}.obao{}", hash.to_hex(), block_size.chunk_log()).into());
println!("Writing outboard to {}", filename.display());
tokio::fs::write(filename, outboard).await?;
Ok(())
}

async fn validate(args: ValidateArgs) -> anyhow::Result<()> {
let mut data = open(args.data.clone()).await?;
let mut outboard = open(args.outboard.clone()).await?;
let size = data.size().await?;
let outboard_size = outboard.size().await?;
let byte_ranges = if let Some(range) = args
.range
.as_ref()
.map(|x| x.split("..").collect::<Vec<_>>())
{
if range.len() != 2 {
anyhow::bail!("Invalid range");
}
let start: u64 = range[0].parse()?;
let end: u64 = range[1].parse()?;
ByteRanges::from(start..end)
} else {
ByteRanges::all()
};
let chunk_ranges = round_up_to_chunks(&byte_ranges);
// let size = outboard.read_at(0, 8).await?;
// let size = u64::from_le_bytes(size.as_ref().try_into()?);
let block_size = BlockSize::from_chunk_log(args.block_size_log);
println!("Size: {}", size);
println!("Outboard: {}", outboard_size);
println!("Hash: {}", args.hash.to_hex());
println!("Block size: {}", block_size);
println!("Byte ranges: {:?}", byte_ranges);
println!("Chunk ranges: {:?}", chunk_ranges);
let tree = BaoTree::new(size, block_size);
let outboard = PreOrderOutboard {
root: args.hash,
data: outboard,
tree,
};
let encoded = Vec::new();
encode_ranges_validated(data, outboard, &chunk_ranges, encoded).await?;
println!(
"confirmed that range {:?} of {} matches {}",
byte_ranges,
args.data,
args.hash.to_hex()
);
Ok(())
}

fn setup_logging() {
tracing_subscriber::registry()
.with(tracing_subscriber::fmt::layer().with_writer(std::io::stderr))
.with(EnvFilter::from_default_env())
.try_init()
.ok();
}

#[tokio::main]
async fn main() -> anyhow::Result<()> {
setup_logging();
let args = Args::parse();
match args.subcommand {
SubCommand::Validate(args) => validate(args).await?,
SubCommand::Generate(args) => generate(args).await?,
}
Ok(())
}
93 changes: 93 additions & 0 deletions bao-http-tool/src/outboard.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
use std::{
collections::BTreeMap,
io,
sync::{Arc, RwLock},
};

use bao_tree::{blake3::Hash, iter::BaoChunk, BaoTree, ByteRanges, ChunkRangesRef, TreeNode};
use range_collections::{range_set::RangeSetRange, RangeSetRef};

struct SparseOutboardCache<T> {
cache: Arc<RwLock<BTreeMap<TreeNode, (Hash, Hash)>>>,
inner: T,
}

impl<T: bao_tree::io::fsm::Outboard> bao_tree::io::fsm::Outboard for SparseOutboardCache<T> {
fn root(&self) -> bao_tree::blake3::Hash {
self.inner.root()
}

fn tree(&self) -> bao_tree::BaoTree {
self.inner.tree()
}

async fn load(
&mut self,
node: bao_tree::TreeNode,
) -> io::Result<Option<(bao_tree::blake3::Hash, bao_tree::blake3::Hash)>> {
if let Some(entry) = self.cache.read().unwrap().get(&node) {
return Ok(Some(entry.clone()));
}
let entry = self.inner.load(node).await?;
if let Some(entry) = entry {
self.cache.write().unwrap().insert(node, entry.clone());
}
Ok(entry)
}
}

/// Get the set of node IDs needed to encode the given chunk ranges.
pub async fn node_ids(
tree: BaoTree,
ranges: &ChunkRangesRef,
) -> impl Iterator<Item = TreeNode> + '_ {
// todo!
// let ranges = bao_tree::rec::truncate_ranges(ranges, outboard.tree().size())
tree.ranges_pre_order_chunks_iter_ref(ranges, tree.block_size().chunk_log())
.filter_map(|chunk| match chunk {
BaoChunk::Parent { node, .. } => Some(node),
_ => None,
})
}

/// Compute the byte ranges of a pre order offset without size prefix to encode the given nodes.
pub fn pre_order_ranges(tree: BaoTree, nodes: impl IntoIterator<Item = TreeNode>) -> ByteRanges {
let mut res = ByteRanges::empty();
for node in nodes {
if let Some(offset) = tree.post_order_offset(node) {
let offset = offset.value() * 64;
res |= ByteRanges::from(offset..offset + 64);
}
}
res
}

/// Rounds up ranges to the nearest chunk boundaries, where a chunk is 2^block_shift bytes.
pub fn simplify(ranges: &RangeSetRef<u64>, block_shift: u8) -> ByteRanges {
let mut res = ByteRanges::empty();
for item in ranges.iter() {
match item {
RangeSetRange::Range(range) => {
let start = (range.start >> block_shift) << block_shift;
let end = ((range.end + ((1 << block_shift) - 1)) >> block_shift) << block_shift;
res |= ByteRanges::from(start..end);
}
RangeSetRange::RangeFrom(range) => {
let start = (range.start >> block_shift) << block_shift;
res |= ByteRanges::from(start..);
}
}
}
res
}

/// Prefetch all nodes in the tree that are needed to encode the given chunk ranges.
pub async fn prefetch(
mut outboard: impl bao_tree::io::fsm::Outboard,
ranges: &ChunkRangesRef,
) -> io::Result<()> {
for node in node_ids(outboard.tree(), ranges).await {
outboard.load(node).await?;
}
Ok(())
}
15 changes: 15 additions & 0 deletions validate-http/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
[package]
name = "bao-http-tool"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
anyhow = "1.0.82"
bao-tree = "0.13.0"
clap = { version = "4.5.4", features = ["derive"] }
iroh-io = { version = "0.6.0", features = ["x-http", "stats", "tokio-util"] }
tokio = { version = "1.37.0", features = ["full"] }
tokio-util = "0.7.10"
url = "2.5.0"
Loading