From 8e8378b980ac4d25421e2568c48bcb3e3c88f4fe Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Fri, 10 Jan 2025 22:08:44 +0100 Subject: [PATCH 01/14] chore: setup dat test scaffolding Signed-off-by: Robert Pack --- .github/actions/load_dat/action.yaml | 26 ++++ .github/actions/setup-env/action.yml | 4 +- .gitignore | 4 +- Cargo.toml | 9 +- crates/core/Cargo.toml | 7 +- crates/core/src/kernel/snapshot/mod.rs | 1 + crates/core/src/kernel/snapshot/next.rs | 183 ++++++++++++++++++++++++ crates/test/Cargo.toml | 13 +- crates/test/src/acceptance/data.rs | 130 +++++++++++++++++ crates/test/src/acceptance/meta.rs | 73 ++++++++++ crates/test/src/acceptance/mod.rs | 5 + crates/test/src/lib.rs | 1 + 12 files changed, 444 insertions(+), 12 deletions(-) create mode 100644 .github/actions/load_dat/action.yaml create mode 100644 crates/core/src/kernel/snapshot/next.rs create mode 100644 crates/test/src/acceptance/data.rs create mode 100644 crates/test/src/acceptance/meta.rs create mode 100644 crates/test/src/acceptance/mod.rs diff --git a/.github/actions/load_dat/action.yaml b/.github/actions/load_dat/action.yaml new file mode 100644 index 0000000000..071db58ba0 --- /dev/null +++ b/.github/actions/load_dat/action.yaml @@ -0,0 +1,26 @@ +name: Delta Acceptance Tests +description: Load Delta Lake acceptance test data + +inputs: + version: + description: "The Python version to set up" + required: false + default: "0.0.3" + + target-directory: + description: target directory for acceptance test data + required: false + default: ${{ github.workspace }}/dat + +runs: + using: composite + + steps: + - name: load DAT + shell: bash + run: | + rm -rf {{ inputs.target-directory }} + curl -OL https://github.com/delta-incubator/dat/releases/download/v${{ inputs.version }}/deltalake-dat-v${{ inputs.version }}.tar.gz + mkdir -p {{ inputs.target-directory }} + tar --no-same-permissions -xzf deltalake-dat-v${{ inputs.version }}.tar.gz --directory {{ inputs.target-directory }} + rm deltalake-dat-v${{ inputs.version }}.tar.gz diff --git a/.github/actions/setup-env/action.yml b/.github/actions/setup-env/action.yml index 8339c45449..74f05ea84d 100644 --- a/.github/actions/setup-env/action.yml +++ b/.github/actions/setup-env/action.yml @@ -4,12 +4,12 @@ description: "Set up Python, virtual environment, and Rust toolchain" inputs: python-version: description: "The Python version to set up" - required: true + required: false default: "3.10" rust-toolchain: description: "The Rust toolchain to set up" - required: true + required: false default: "stable" runs: diff --git a/.gitignore b/.gitignore index 18dcc39f69..ee7ca99235 100644 --- a/.gitignore +++ b/.gitignore @@ -23,6 +23,7 @@ __blobstorage__ .githubchangeloggenerator.cache/ .githubchangeloggenerator* data +.zed/ # Add all Cargo.lock files except for those in binary crates Cargo.lock @@ -32,4 +33,5 @@ Cargo.lock justfile site -__pycache__ \ No newline at end of file +__pycache__ +dat/ diff --git a/Cargo.toml b/Cargo.toml index c500941247..8ac14e5209 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,8 +26,12 @@ debug = true debug = "line-tables-only" [workspace.dependencies] -delta_kernel = { version = "=0.6.0", features = ["default-engine"] } +#delta_kernel = { version = "=0.6.0", features = ["default-engine"] } #delta_kernel = { path = "../delta-kernel-rs/kernel", features = ["sync-engine"] } +delta_kernel = { git = "https://github.com/roeap/delta-kernel-rs", rev = "fcc43b50dafdc5e6b84c206492bbde8ed1115529", features = [ + "default-engine", + "developer-visibility", +] } # arrow arrow = { version = "53" } @@ -59,7 +63,7 @@ datafusion-sql = { version = "44" } # serde serde = { version = "1.0.194", features = ["derive"] } serde_json = "1" -strum = { version = "*"} +strum = { version = "*" } # "stdlib" @@ -77,4 +81,3 @@ async-trait = { version = "0.1" } futures = { version = "0.3" } tokio = { version = "1" } num_cpus = { version = "1" } - diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index 65743fe281..d7143983a7 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -15,7 +15,7 @@ rust-version.workspace = true features = ["datafusion", "json", "unity-experimental"] [dependencies] -delta_kernel.workspace = true +delta_kernel = { workspace = true } # arrow arrow = { workspace = true } @@ -29,10 +29,7 @@ arrow-ord = { workspace = true } arrow-row = { workspace = true } arrow-schema = { workspace = true, features = ["serde"] } arrow-select = { workspace = true } -parquet = { workspace = true, features = [ - "async", - "object_store", -] } +parquet = { workspace = true, features = ["async", "object_store"] } pin-project-lite = "^0.2.7" # datafusion diff --git a/crates/core/src/kernel/snapshot/mod.rs b/crates/core/src/kernel/snapshot/mod.rs index 2938b3d3db..38d1dc570d 100644 --- a/crates/core/src/kernel/snapshot/mod.rs +++ b/crates/core/src/kernel/snapshot/mod.rs @@ -45,6 +45,7 @@ pub use self::log_data::*; mod log_data; pub(crate) mod log_segment; +mod next; pub(crate) mod parse; mod replay; mod serde; diff --git a/crates/core/src/kernel/snapshot/next.rs b/crates/core/src/kernel/snapshot/next.rs new file mode 100644 index 0000000000..7189fc75cd --- /dev/null +++ b/crates/core/src/kernel/snapshot/next.rs @@ -0,0 +1,183 @@ +use std::collections::HashMap; +use std::sync::{Arc, LazyLock}; + +use ::serde::{Deserialize, Serialize}; +use arrow_array::RecordBatch; +use delta_kernel::actions::{ + get_log_schema, ADD_NAME, CDC_NAME, METADATA_NAME, PROTOCOL_NAME, REMOVE_NAME, + SET_TRANSACTION_NAME, +}; +use delta_kernel::actions::{Metadata, Protocol}; +use delta_kernel::engine::default::executor::tokio::{ + TokioBackgroundExecutor, TokioMultiThreadExecutor, +}; +use delta_kernel::engine::default::DefaultEngine; +use delta_kernel::engine_data::{GetData, RowVisitor, TypedGetData as _}; +use delta_kernel::expressions::ColumnName; +use delta_kernel::scan::state::{DvInfo, Stats}; +use delta_kernel::scan::ScanBuilder; +use delta_kernel::schema::{ColumnNamesAndTypes, DataType}; +use delta_kernel::snapshot::Snapshot as SnapshotInner; +use delta_kernel::{DeltaResult as KernelResult, Engine, Error, Table}; +use futures::{StreamExt, TryStreamExt}; +use object_store::path::Path; +use object_store::ObjectStore; +use tokio::sync::mpsc::channel; + +use crate::{DeltaResult, DeltaTableConfig, DeltaTableError}; + +pub struct Snapshot { + inner: Arc, + engine: Arc, +} + +impl Snapshot { + pub fn new(inner: Arc, engine: Arc) -> Self { + Self { inner, engine } + } + + pub async fn try_new( + table: Table, + store: Arc, + config: DeltaTableConfig, + version: Option, + ) -> DeltaResult { + let executor = Arc::new(TokioMultiThreadExecutor::new( + tokio::runtime::Handle::current(), + )); + let table_root = Path::from_url_path(table.location().path())?; + let engine = DefaultEngine::new(store, table_root, executor); + let snapshot = table.snapshot(&engine, None)?; + + Ok(Self::new(Arc::new(snapshot), Arc::new(engine))) + } + + pub fn protocol(&self) -> &Protocol { + self.inner.protocol() + } + + pub fn metadata(&self) -> &delta_kernel::actions::Metadata { + self.inner.metadata() + } + + pub(crate) fn replay_log(&self) -> DeltaResult<()> { + let log_schema = get_log_schema().project(&[ADD_NAME, REMOVE_NAME])?; + let actions = self.inner._log_segment().replay( + self.engine.as_ref(), + log_schema.clone(), + log_schema.clone(), + None, + )?; + + // let it = scan_action_iter( + // engine, + // self.replay_for_scan_data(engine)?, + // physical_predicate, + // ); + // Ok(Some(it).into_iter().flatten()) + + Ok(()) + } +} + +enum Action { + Metadata(delta_kernel::actions::Metadata), + Protocol(delta_kernel::actions::Protocol), + Remove(delta_kernel::actions::Remove), + Add(delta_kernel::actions::Add), + SetTransaction(delta_kernel::actions::SetTransaction), + Cdc(delta_kernel::actions::Cdc), +} + +static NAMES_AND_TYPES: LazyLock = + LazyLock::new(|| get_log_schema().leaves(None)); + +struct LogVisitor { + actions: Vec<(Action, usize)>, + offsets: HashMap, + previous_rows_seen: usize, +} + +impl LogVisitor { + fn new() -> LogVisitor { + // Grab the start offset for each top-level column name, then compute the end offset by + // skipping the rest of the leaves for that column. + let mut offsets = HashMap::new(); + let mut it = NAMES_AND_TYPES.as_ref().0.iter().enumerate().peekable(); + while let Some((start, col)) = it.next() { + let mut end = start + 1; + while it.next_if(|(_, other)| col[0] == other[0]).is_some() { + end += 1; + } + offsets.insert(col[0].clone(), (start, end)); + } + LogVisitor { + actions: vec![], + offsets, + previous_rows_seen: 0, + } + } +} + +impl RowVisitor for LogVisitor { + fn selected_column_names_and_types(&self) -> (&'static [ColumnName], &'static [DataType]) { + todo!() + } + + fn visit<'a>(&mut self, row_count: usize, getters: &[&'a dyn GetData<'a>]) -> KernelResult<()> { + todo!() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + use deltalake_test::acceptance::{read_dat_case, TestCaseInfo}; + use deltalake_test::TestResult; + use std::path::PathBuf; + + fn get_dat_dir() -> PathBuf { + let d = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + let mut rep_root = d + .parent() + .and_then(|p| p.parent()) + .expect("valid directory") + .to_path_buf(); + rep_root.push("dat/out/reader_tests/generated"); + rep_root + } + + #[tokio::test(flavor = "multi_thread")] + async fn load_snapshot() -> TestResult<()> { + // some comment + let mut dat_dir = get_dat_dir(); + dat_dir.push("basic_append"); + let dat_info: TestCaseInfo = read_dat_case(dat_dir)?; + let table_info = dat_info.table_summary()?; + + let table = Table::try_from_uri(dat_info.table_root()?)?; + + let snapshot = Snapshot::try_new( + table, + Arc::new(object_store::local::LocalFileSystem::default()), + Default::default(), + None, + ) + .await?; + + assert_eq!( + snapshot.protocol().min_reader_version(), + table_info.min_reader_version + ); + + // let table_root = object_store::path::Path::new("s3://delta-rs/test"); + // let store = object_store::ObjectStore::new(&table_root).unwrap(); + // let table = delta::DeltaTable::load(&store, &table_root).await.unwrap(); + // let snapshot = delta::Snapshot::try_new(table_root, table, store, Default::default(), None) + // .await + // .unwrap(); + // snapshot.replay_log().unwrap(); + Ok(()) + } +} diff --git a/crates/test/Cargo.toml b/crates/test/Cargo.toml index 6c93fa705c..1dfaccdc07 100644 --- a/crates/test/Cargo.toml +++ b/crates/test/Cargo.toml @@ -5,9 +5,18 @@ edition = "2021" publish = false [dependencies] +delta_kernel = { workspace = true } +deltalake-core = { version = "0.24.0", path = "../core" } + +arrow-array = { workspace = true, features = ["chrono-tz"] } +arrow-cast = { workspace = true } +arrow-ord = { workspace = true } +arrow-schema = { workspace = true, features = ["serde"] } +arrow-select = { workspace = true } +parquet = { workspace = true, features = ["async", "object_store"] } + bytes = { workspace = true } chrono = { workspace = true, default-features = false, features = ["clock"] } -deltalake-core = { version = "0.24.0", path = "../core" } dotenvy = "0" fs_extra = "1.3.0" futures = { version = "0.3" } @@ -16,7 +25,9 @@ rand = "0.8" serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } tempfile = "3" +thiserror = { workspace = true } tokio = { version = "1", features = ["macros", "rt-multi-thread"] } +url = { workspace = true } [features] default = [] diff --git a/crates/test/src/acceptance/data.rs b/crates/test/src/acceptance/data.rs new file mode 100644 index 0000000000..6d8ae4dbca --- /dev/null +++ b/crates/test/src/acceptance/data.rs @@ -0,0 +1,130 @@ +use std::{path::Path, sync::Arc}; + +use arrow_array::{Array, RecordBatch}; +use arrow_ord::sort::{lexsort_to_indices, SortColumn}; +use arrow_schema::{DataType, Schema}; +use arrow_select::{concat::concat_batches, take::take}; +use delta_kernel::DeltaResult; +use futures::{stream::TryStreamExt, StreamExt}; +use object_store::{local::LocalFileSystem, ObjectStore}; +use parquet::arrow::async_reader::{ParquetObjectReader, ParquetRecordBatchStreamBuilder}; + +use super::TestCaseInfo; +use crate::TestResult; + +pub async fn read_golden(path: &Path, _version: Option<&str>) -> DeltaResult { + let expected_root = path.join("expected").join("latest").join("table_content"); + let store = Arc::new(LocalFileSystem::new_with_prefix(&expected_root)?); + let files: Vec<_> = store.list(None).try_collect().await?; + let mut batches = vec![]; + let mut schema = None; + for meta in files.into_iter() { + if let Some(ext) = meta.location.extension() { + if ext == "parquet" { + let reader = ParquetObjectReader::new(store.clone(), meta); + let builder = ParquetRecordBatchStreamBuilder::new(reader).await?; + if schema.is_none() { + schema = Some(builder.schema().clone()); + } + let mut stream = builder.build()?; + while let Some(batch) = stream.next().await { + batches.push(batch?); + } + } + } + } + let all_data = concat_batches(&schema.unwrap(), &batches)?; + Ok(all_data) +} + +pub fn sort_record_batch(batch: RecordBatch) -> DeltaResult { + // Sort by all columns + let mut sort_columns = vec![]; + for col in batch.columns() { + match col.data_type() { + DataType::Struct(_) | DataType::List(_) | DataType::Map(_, _) => { + // can't sort structs, lists, or maps + } + _ => sort_columns.push(SortColumn { + values: col.clone(), + options: None, + }), + } + } + let indices = lexsort_to_indices(&sort_columns, None)?; + let columns = batch + .columns() + .iter() + .map(|c| take(c, &indices, None).unwrap()) + .collect(); + Ok(RecordBatch::try_new(batch.schema(), columns)?) +} + +// Ensure that two schema have the same field names, and dict_id/ordering. +// We ignore: +// - data type: This is checked already in `assert_columns_match` +// - nullability: parquet marks many things as nullable that we don't in our schema +// - metadata: because that diverges from the real data to the golden tabled data +fn assert_schema_fields_match(schema: &Schema, golden: &Schema) { + for (schema_field, golden_field) in schema.fields.iter().zip(golden.fields.iter()) { + assert!( + schema_field.name() == golden_field.name(), + "Field names don't match" + ); + assert!( + schema_field.dict_id() == golden_field.dict_id(), + "Field dict_id doesn't match" + ); + assert!( + schema_field.dict_is_ordered() == golden_field.dict_is_ordered(), + "Field dict_is_ordered doesn't match" + ); + } +} + +// some things are equivalent, but don't show up as equivalent for `==`, so we normalize here +fn normalize_col(col: Arc) -> Arc { + if let DataType::Timestamp(unit, Some(zone)) = col.data_type() { + if **zone == *"+00:00" { + arrow_cast::cast::cast(&col, &DataType::Timestamp(*unit, Some("UTC".into()))) + .expect("Could not cast to UTC") + } else { + col + } + } else { + col + } +} + +fn assert_columns_match(actual: &[Arc], expected: &[Arc]) { + for (actual, expected) in actual.iter().zip(expected) { + let actual = normalize_col(actual.clone()); + let expected = normalize_col(expected.clone()); + // note that array equality includes data_type equality + // See: https://arrow.apache.org/rust/arrow_data/equal/fn.equal.html + assert_eq!( + &actual, &expected, + "Column data didn't match. Got {actual:?}, expected {expected:?}" + ); + } +} + +pub async fn assert_scan_data( + all_data: Vec, + test_case: &TestCaseInfo, +) -> TestResult<()> { + let all_data = concat_batches(&all_data[0].schema(), all_data.iter()).unwrap(); + let all_data = sort_record_batch(all_data)?; + + let golden = read_golden(test_case.root_dir(), None).await?; + let golden = sort_record_batch(golden)?; + + assert_columns_match(all_data.columns(), golden.columns()); + assert_schema_fields_match(all_data.schema().as_ref(), golden.schema().as_ref()); + assert!( + all_data.num_rows() == golden.num_rows(), + "Didn't have same number of rows" + ); + + Ok(()) +} diff --git a/crates/test/src/acceptance/meta.rs b/crates/test/src/acceptance/meta.rs new file mode 100644 index 0000000000..6a44f2cb69 --- /dev/null +++ b/crates/test/src/acceptance/meta.rs @@ -0,0 +1,73 @@ +use std::collections::HashMap; +use std::fs::File; +use std::path::{Path, PathBuf}; + +use delta_kernel::{Error, Version}; +use serde::{Deserialize, Serialize}; +use url::Url; + +#[derive(Debug, thiserror::Error)] +pub enum AssertionError { + #[error("Invalid test case data")] + InvalidTestCase, + + #[error("Kernel error: {0}")] + KernelError(#[from] Error), +} + +pub type TestResult = std::result::Result; + +#[derive(Serialize, Deserialize, PartialEq, Eq, Debug)] +struct TestCaseInfoJson { + name: String, + description: String, +} + +#[derive(PartialEq, Eq, Debug)] +pub struct TestCaseInfo { + name: String, + description: String, + root_dir: PathBuf, +} + +impl TestCaseInfo { + /// Root path for this test cases Delta table. + pub fn table_root(&self) -> TestResult { + let table_root = self.root_dir.join("delta"); + Url::from_directory_path(table_root).map_err(|_| AssertionError::InvalidTestCase) + } + + pub fn root_dir(&self) -> &PathBuf { + &self.root_dir + } + + pub fn table_summary(&self) -> TestResult { + let info_path = self + .root_dir() + .join("expected/latest/table_version_metadata.json"); + let file = File::open(info_path).map_err(|_| AssertionError::InvalidTestCase)?; + let info: TableVersionMetaData = + serde_json::from_reader(file).map_err(|_| AssertionError::InvalidTestCase)?; + Ok(info) + } +} + +#[derive(Serialize, Deserialize, PartialEq, Eq, Debug)] +pub struct TableVersionMetaData { + pub version: Version, + pub properties: HashMap, + pub min_reader_version: i32, + pub min_writer_version: i32, +} + +pub fn read_dat_case(case_root: impl AsRef) -> TestResult { + let info_path = case_root.as_ref().join("test_case_info.json"); + let file = File::open(info_path).map_err(|_| AssertionError::InvalidTestCase)?; + let info: TestCaseInfoJson = + serde_json::from_reader(file).map_err(|_| AssertionError::InvalidTestCase)?; + Ok(TestCaseInfo { + root_dir: case_root.as_ref().into(), + name: info.name, + description: info.description, + }) +} diff --git a/crates/test/src/acceptance/mod.rs b/crates/test/src/acceptance/mod.rs new file mode 100644 index 0000000000..521fd294ae --- /dev/null +++ b/crates/test/src/acceptance/mod.rs @@ -0,0 +1,5 @@ +pub mod data; +pub mod meta; + +pub use data::*; +pub use meta::*; diff --git a/crates/test/src/lib.rs b/crates/test/src/lib.rs index dd8c2a2951..6220d7ae1d 100644 --- a/crates/test/src/lib.rs +++ b/crates/test/src/lib.rs @@ -14,6 +14,7 @@ use deltalake_core::DeltaTableBuilder; use deltalake_core::{ObjectStore, Path}; use tempfile::TempDir; +pub mod acceptance; pub mod clock; pub mod concurrent; #[cfg(feature = "datafusion")] From 333198c407e848fe0a8fcd871eb23ae8b2e36ff7 Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Sat, 11 Jan 2025 15:19:09 +0100 Subject: [PATCH 02/14] feat: file action replay Signed-off-by: Robert Pack --- Cargo.toml | 7 +- crates/core/src/kernel/snapshot/next.rs | 337 +++++++++++++++++++----- crates/core/src/protocol/checkpoints.rs | 9 +- 3 files changed, 279 insertions(+), 74 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8ac14e5209..c1bc6ea502 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,11 +27,14 @@ debug = "line-tables-only" [workspace.dependencies] #delta_kernel = { version = "=0.6.0", features = ["default-engine"] } -#delta_kernel = { path = "../delta-kernel-rs/kernel", features = ["sync-engine"] } -delta_kernel = { git = "https://github.com/roeap/delta-kernel-rs", rev = "fcc43b50dafdc5e6b84c206492bbde8ed1115529", features = [ +delta_kernel = { path = "../delta-kernel-rs/kernel", features = [ "default-engine", "developer-visibility", ] } +# delta_kernel = { git = "https://github.com/roeap/delta-kernel-rs", rev = "fcc43b50dafdc5e6b84c206492bbde8ed1115529", features = [ +# "default-engine", +# "developer-visibility", +# ] } # arrow arrow = { version = "53" } diff --git a/crates/core/src/kernel/snapshot/next.rs b/crates/core/src/kernel/snapshot/next.rs index 7189fc75cd..3e7ab01fe2 100644 --- a/crates/core/src/kernel/snapshot/next.rs +++ b/crates/core/src/kernel/snapshot/next.rs @@ -1,31 +1,75 @@ -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::sync::{Arc, LazyLock}; use ::serde::{Deserialize, Serialize}; -use arrow_array::RecordBatch; +use arrow::compute::{concat_batches, filter_record_batch}; +use arrow_arith::boolean::{and, is_null, not}; +use arrow_array::{BooleanArray, RecordBatch}; use delta_kernel::actions::{ - get_log_schema, ADD_NAME, CDC_NAME, METADATA_NAME, PROTOCOL_NAME, REMOVE_NAME, - SET_TRANSACTION_NAME, + get_log_add_schema, get_log_schema, Add, ADD_NAME, CDC_NAME, METADATA_NAME, PROTOCOL_NAME, + REMOVE_NAME, SET_TRANSACTION_NAME, }; use delta_kernel::actions::{Metadata, Protocol}; +use delta_kernel::engine::arrow_data::ArrowEngineData; use delta_kernel::engine::default::executor::tokio::{ TokioBackgroundExecutor, TokioMultiThreadExecutor, }; use delta_kernel::engine::default::DefaultEngine; use delta_kernel::engine_data::{GetData, RowVisitor, TypedGetData as _}; use delta_kernel::expressions::ColumnName; +use delta_kernel::scan::log_replay::scan_action_iter; use delta_kernel::scan::state::{DvInfo, Stats}; -use delta_kernel::scan::ScanBuilder; -use delta_kernel::schema::{ColumnNamesAndTypes, DataType}; +use delta_kernel::scan::{scan_row_schema, PhysicalPredicate, ScanBuilder, ScanData}; +use delta_kernel::schema::{ColumnNamesAndTypes, DataType, Schema}; use delta_kernel::snapshot::Snapshot as SnapshotInner; -use delta_kernel::{DeltaResult as KernelResult, Engine, Error, Table}; +use delta_kernel::table_properties::TableProperties; +use delta_kernel::{ + DeltaResult as KernelResult, Engine, EngineData, Error, Expression, Table, Version, +}; use futures::{StreamExt, TryStreamExt}; +use itertools::Itertools; use object_store::path::Path; use object_store::ObjectStore; -use tokio::sync::mpsc::channel; +use tracing::warn; +use url::Url; +use crate::kernel::ActionType; use crate::{DeltaResult, DeltaTableConfig, DeltaTableError}; +type ReplayIter = Box, Vec)>>>; + +impl ActionType { + pub(self) fn field_name_unckecked(&self) -> &'static str { + match self { + Self::Metadata => METADATA_NAME, + Self::Protocol => PROTOCOL_NAME, + Self::Remove => REMOVE_NAME, + Self::Add => ADD_NAME, + Self::Txn => SET_TRANSACTION_NAME, + Self::Cdc => CDC_NAME, + _ => panic!(), + } + } + + pub(self) fn field_name(&self) -> DeltaResult<&'static str> { + let name = match self { + Self::Metadata => METADATA_NAME, + Self::Protocol => PROTOCOL_NAME, + Self::Remove => REMOVE_NAME, + Self::Add => ADD_NAME, + Self::Txn => SET_TRANSACTION_NAME, + Self::Cdc => CDC_NAME, + _ => { + return Err(DeltaTableError::generic(format!( + "unsupported action type: {self:?}" + ))) + } + }; + Ok(name) + } +} + +#[derive(Clone)] pub struct Snapshot { inner: Arc, engine: Arc, @@ -39,19 +83,39 @@ impl Snapshot { pub async fn try_new( table: Table, store: Arc, - config: DeltaTableConfig, version: Option, ) -> DeltaResult { + // let executor = Arc::new(TokioMultiThreadExecutor::new( + // config + // .io_runtime + // .map(|rt| rt.get_handle()) + // .unwrap_or(tokio::runtime::Handle::current()), + // )); let executor = Arc::new(TokioMultiThreadExecutor::new( tokio::runtime::Handle::current(), )); let table_root = Path::from_url_path(table.location().path())?; let engine = DefaultEngine::new(store, table_root, executor); - let snapshot = table.snapshot(&engine, None)?; - + let snapshot = table.snapshot(&engine, version.map(|v| v as u64))?; Ok(Self::new(Arc::new(snapshot), Arc::new(engine))) } + pub(crate) fn engine_ref(&self) -> &Arc { + &self.engine + } + + pub fn table_root(&self) -> &Url { + &self.inner.table_root() + } + + pub fn version(&self) -> u64 { + self.inner.version() + } + + pub fn schema(&self) -> &Schema { + self.inner.schema() + } + pub fn protocol(&self) -> &Protocol { self.inner.protocol() } @@ -60,72 +124,190 @@ impl Snapshot { self.inner.metadata() } - pub(crate) fn replay_log(&self) -> DeltaResult<()> { - let log_schema = get_log_schema().project(&[ADD_NAME, REMOVE_NAME])?; - let actions = self.inner._log_segment().replay( - self.engine.as_ref(), - log_schema.clone(), - log_schema.clone(), - None, - )?; + pub fn table_properties(&self) -> &TableProperties { + &self.inner.table_properties() + } + + /// Get the timestamp of the given version in miliscends since epoch. + /// + /// Extracts the timestamp from the commit file of the given version + /// from the current log segment. If the commit file is not part of the + /// current log segment, `None` is returned. + pub fn version_timestamp(&self, version: Version) -> Option { + self.inner + ._log_segment() + .ascending_commit_files + .iter() + .find(|f| f.version == version) + .map(|f| f.location.last_modified) + } + fn log_data( + &self, + types: &[ActionType], + ) -> DeltaResult>> { + let field_names = types + .iter() + .filter_map(|t| t.field_name().ok()) + .collect::>(); + if field_names.len() != types.len() { + warn!("skipping unsupported action types"); + } + let log_schema = get_log_schema().project(&field_names)?; + Ok(self + .inner + ._log_segment() + .replay( + self.engine.as_ref(), + log_schema.clone(), + log_schema.clone(), + None, + )? + .map_ok(|(d, flag)| { + Ok(( + RecordBatch::from(ArrowEngineData::try_from_engine_data(d)?), + flag, + )) + }) + .flatten()) // let it = scan_action_iter( // engine, // self.replay_for_scan_data(engine)?, // physical_predicate, // ); // Ok(Some(it).into_iter().flatten()) - - Ok(()) } } -enum Action { - Metadata(delta_kernel::actions::Metadata), - Protocol(delta_kernel::actions::Protocol), - Remove(delta_kernel::actions::Remove), - Add(delta_kernel::actions::Add), - SetTransaction(delta_kernel::actions::SetTransaction), - Cdc(delta_kernel::actions::Cdc), +#[derive(Clone)] +pub struct EagerSnapshot { + snapshot: Snapshot, + files: RecordBatch, + actions: Option, } -static NAMES_AND_TYPES: LazyLock = - LazyLock::new(|| get_log_schema().leaves(None)); +impl EagerSnapshot { + /// Create a new [`EagerSnapshot`] instance tracking actions of the given types. + /// + /// Only actions supplied by `tracked_actions` will be loaded into memory. + /// This is useful when only a subset of actions are needed. `Add` and `Remove` actions + /// are treated specially. I.e. `Add` and `Remove` will be loaded as well. + pub async fn try_new_with_actions( + table_root: impl AsRef, + store: Arc, + config: DeltaTableConfig, + version: Option, + tracked_actions: HashSet, + predicate: Option, + ) -> DeltaResult { + let mut replay_actions = Vec::new(); + if config.require_files { + replay_actions.push(ActionType::Add); + replay_actions.push(ActionType::Remove); + } + replay_actions.extend(tracked_actions.into_iter().filter(|it| { + !config.require_files || (it != &ActionType::Add && it != &ActionType::Remove) + })); -struct LogVisitor { - actions: Vec<(Action, usize)>, - offsets: HashMap, - previous_rows_seen: usize, -} + let snapshot = Snapshot::try_new(Table::try_from_uri(table_root)?, store, version).await?; + + let mut replay_data = Vec::new(); + let mut action_data = Vec::new(); + for slice in snapshot.log_data(&replay_actions)? { + let (batch, flag) = slice?; -impl LogVisitor { - fn new() -> LogVisitor { - // Grab the start offset for each top-level column name, then compute the end offset by - // skipping the rest of the leaves for that column. - let mut offsets = HashMap::new(); - let mut it = NAMES_AND_TYPES.as_ref().0.iter().enumerate().peekable(); - while let Some((start, col)) = it.next() { - let mut end = start + 1; - while it.next_if(|(_, other)| col[0] == other[0]).is_some() { - end += 1; + let action_projection = replay_actions + .iter() + .filter_map(|t| { + (t != &ActionType::Add && t != &ActionType::Remove) + .then_some( + t.field_name() + .ok() + .and_then(|n| batch.schema_ref().index_of(n).ok()), + ) + .flatten() + }) + .collect_vec(); + + if !action_projection.is_empty() { + action_data.push(batch.project(&action_projection)?); + } + + if config.require_files { + let file_data = batch.project(&[0, 1])?; + let file_data = filter_record_batch( + &file_data, + ¬(&and( + &is_null(batch.column(0))?, + &is_null(batch.column(1))?, + )?)?, + )?; + replay_data.push(Ok(( + Box::new(ArrowEngineData::from(file_data)) as Box, + flag, + ))); } - offsets.insert(col[0].clone(), (start, end)); - } - LogVisitor { - actions: vec![], - offsets, - previous_rows_seen: 0, } + + let files_schema = Arc::new(get_log_add_schema().as_ref().try_into()?); + let scan_schema = Arc::new((&scan_row_schema()).try_into()?); + + let files = if !replay_data.is_empty() { + let (engine, action_iter) = (snapshot.engine_ref().as_ref(), replay_data.into_iter()); + let physical_predicate = + predicate.and_then(|p| PhysicalPredicate::try_new(&p, snapshot.schema()).ok()); + + let it: ReplayIter = match physical_predicate { + Some(PhysicalPredicate::StaticSkipAll) => Box::new(std::iter::empty()), + Some(PhysicalPredicate::Some(p, s)) => { + Box::new(scan_action_iter(engine, action_iter, Some((p, s)))) + } + None | Some(PhysicalPredicate::None) => { + Box::new(scan_action_iter(engine, action_iter, None)) + } + }; + + let mut filtered = Vec::new(); + for res in it { + let (batch, selection) = res?; + let predicate = BooleanArray::from(selection); + let data: RecordBatch = ArrowEngineData::try_from_engine_data(batch)?.into(); + filtered.push(filter_record_batch(&data, &predicate)?); + } + concat_batches(&scan_schema, &filtered)? + } else { + RecordBatch::new_empty(scan_schema.clone()) + }; + + let actions = (!action_data.is_empty()) + .then(|| concat_batches(&files_schema, &action_data).ok()) + .flatten(); + + Ok(Self { + snapshot, + files, + actions, + }) } -} -impl RowVisitor for LogVisitor { - fn selected_column_names_and_types(&self) -> (&'static [ColumnName], &'static [DataType]) { - todo!() + pub fn version(&self) -> u64 { + self.snapshot.version() } - fn visit<'a>(&mut self, row_count: usize, getters: &[&'a dyn GetData<'a>]) -> KernelResult<()> { - todo!() + pub fn schema(&self) -> &Schema { + self.snapshot.schema() + } + + pub fn protocol(&self) -> &Protocol { + self.snapshot.protocol() + } + + pub fn metadata(&self) -> &delta_kernel::actions::Metadata { + self.snapshot.metadata() + } + + pub fn table_properties(&self) -> &TableProperties { + &self.snapshot.table_properties() } } @@ -133,6 +315,7 @@ impl RowVisitor for LogVisitor { mod tests { use super::*; + use arrow_cast::pretty::print_batches; use deltalake_test::acceptance::{read_dat_case, TestCaseInfo}; use deltalake_test::TestResult; use std::path::PathBuf; @@ -152,7 +335,7 @@ mod tests { async fn load_snapshot() -> TestResult<()> { // some comment let mut dat_dir = get_dat_dir(); - dat_dir.push("basic_append"); + dat_dir.push("multi_partitioned"); let dat_info: TestCaseInfo = read_dat_case(dat_dir)?; let table_info = dat_info.table_summary()?; @@ -161,23 +344,47 @@ mod tests { let snapshot = Snapshot::try_new( table, Arc::new(object_store::local::LocalFileSystem::default()), + None, + ) + .await?; + + assert_eq!(snapshot.version(), table_info.version); + assert_eq!( + snapshot.protocol().min_reader_version(), + table_info.min_reader_version + ); + + Ok(()) + } + + #[tokio::test(flavor = "multi_thread")] + async fn load_eager_snapshot() -> TestResult<()> { + // some comment + let mut dat_dir = get_dat_dir(); + dat_dir.push("multi_partitioned"); + let dat_info: TestCaseInfo = read_dat_case(dat_dir)?; + let table_info = dat_info.table_summary()?; + + let table = Table::try_from_uri(dat_info.table_root()?)?; + + let snapshot = EagerSnapshot::try_new_with_actions( + table.location(), + Arc::new(object_store::local::LocalFileSystem::default()), + Default::default(), + None, Default::default(), None, ) .await?; + assert_eq!(snapshot.version(), table_info.version); assert_eq!( snapshot.protocol().min_reader_version(), table_info.min_reader_version ); - // let table_root = object_store::path::Path::new("s3://delta-rs/test"); - // let store = object_store::ObjectStore::new(&table_root).unwrap(); - // let table = delta::DeltaTable::load(&store, &table_root).await.unwrap(); - // let snapshot = delta::Snapshot::try_new(table_root, table, store, Default::default(), None) - // .await - // .unwrap(); - // snapshot.replay_log().unwrap(); + print_batches(&[snapshot.files])?; + Ok(()) } } diff --git a/crates/core/src/protocol/checkpoints.rs b/crates/core/src/protocol/checkpoints.rs index 72dbec7828..1fada05d38 100644 --- a/crates/core/src/protocol/checkpoints.rs +++ b/crates/core/src/protocol/checkpoints.rs @@ -170,12 +170,7 @@ pub async fn create_checkpoint_for( return Err(CheckpointError::StaleTableVersion(version, state.version()).into()); } - // TODO: checkpoints _can_ be multi-part... haven't actually found a good reference for - // an appropriate split point yet though so only writing a single part currently. - // See https://github.com/delta-io/delta-rs/issues/288 let last_checkpoint_path = log_store.log_path().child("_last_checkpoint"); - - debug!("Writing parquet bytes to checkpoint buffer."); let tombstones = state .unexpired_tombstones(log_store.object_store(None).clone()) .await @@ -291,9 +286,9 @@ fn parquet_bytes_from_state( // and omit metadata columns if at least one remove action has `extended_file_metadata=false`. // We've added the additional check on `size.is_some` because in delta-spark the primitive long type // is used, hence we want to omit possible errors when `extended_file_metadata=true`, but `size=null` - let use_extended_remove_schema = tombstones + let use_extended_remove_schema = !tombstones .iter() - .all(|r| r.extended_file_metadata == Some(true) && r.size.is_some()); + .any(|r| r.extended_file_metadata == Some(false) || r.size.is_none()); // If use_extended_remove_schema=false for some of the tombstones, then it should be for each. if !use_extended_remove_schema { From 0f2c1c465ec9ab090517c4d7f840c81758f2f712 Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Mon, 13 Jan 2025 15:43:52 +0100 Subject: [PATCH 03/14] feat: add objectstore with commit file caching Signed-off-by: Robert Pack --- crates/core/Cargo.toml | 5 + crates/core/src/storage/cache.rs | 186 +++++++++++++++++++++++++++++++ crates/core/src/storage/mod.rs | 2 + 3 files changed, 193 insertions(+) create mode 100644 crates/core/src/storage/cache.rs diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index d7143983a7..0bdadfbdab 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -73,6 +73,9 @@ tokio = { workspace = true, features = [ "parking_lot", ] } +# cahce +quick_cache = { version = "0.6.9", optional = true } + # other deps (these should be organized and pulled into workspace.dependencies as necessary) cfg-if = "1" dashmap = "6" @@ -98,6 +101,7 @@ humantime = { version = "2.1.0" } [dev-dependencies] criterion = "0.5" ctor = "0" +datatest-stable = "0.2" deltalake-test = { path = "../test", features = ["datafusion"] } dotenvy = "0" fs_extra = "1.2.0" @@ -126,3 +130,4 @@ datafusion = [ datafusion-ext = ["datafusion"] json = ["parquet/json"] python = ["arrow/pyarrow"] +log-cache = ["quick_cache"] diff --git a/crates/core/src/storage/cache.rs b/crates/core/src/storage/cache.rs new file mode 100644 index 0000000000..4b44da1a78 --- /dev/null +++ b/crates/core/src/storage/cache.rs @@ -0,0 +1,186 @@ +use std::sync::Arc; + +use bytes::Bytes; +use chrono::{DateTime, Utc}; +use futures::stream::BoxStream; +use futures::StreamExt; +use object_store::path::Path; +use object_store::{ + Attributes, GetOptions, GetResult, GetResultPayload, ListResult, MultipartUpload, ObjectMeta, + ObjectStore, PutMultipartOpts, PutOptions, PutPayload, PutResult, Result as ObjectStoreResult, +}; +use quick_cache::sync::Cache; + +#[derive(Debug, Clone)] +struct Entry { + data: Bytes, + last_modified: DateTime, + attributes: Attributes, + e_tag: Option, +} + +impl Entry { + fn new( + data: Bytes, + last_modified: DateTime, + e_tag: Option, + attributes: Attributes, + ) -> Self { + Self { + data, + last_modified, + e_tag, + attributes, + } + } +} + +/// An object store implementation that conditionally caches file requests. +/// +/// This implementation caches the file requests based on on the evaluation +/// of a condition. The condition is evaluated on the path of the file and +/// can be configured to meet the requirements of the user. +/// +/// This is __not__ a general purpose cache and is specifically designed to cache +/// the commit files of a Delta table. E.g. it is assumed that files written to +/// the object store are immutable and no attempt is made to invalidate the cache +/// when files are updated in the remote object store. +#[derive(Clone)] +pub(crate) struct ConditionallyCachedObjectStore { + inner: Arc, + check: Arc bool + Send + Sync>, + cache: Arc>, +} + +impl std::fmt::Debug for ConditionallyCachedObjectStore { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ConditionallyCachedObjectStore") + .field("object_store", &self.inner) + .finish() + } +} + +impl std::fmt::Display for ConditionallyCachedObjectStore { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "ConditionallyCachedObjectStore({})", self.inner) + } +} + +fn cache_json(path: &Path) -> bool { + path.extension() + .map_or(false, |ext| ext.eq_ignore_ascii_case("json")) +} + +impl ConditionallyCachedObjectStore { + /// Create a new conditionally cached object store. + pub fn new(inner: Arc) -> Self { + Self { + inner, + check: Arc::new(cache_json), + cache: Arc::new(Cache::new(100)), + } + } + + async fn get_opts_impl( + &self, + location: &Path, + options: GetOptions, + ) -> ObjectStoreResult { + if options.range.is_some() || !(self.check)(location) || options.head { + return self.inner.get_opts(location, options).await; + } + + let entry = if let Some(entry) = self.cache.get(location) { + entry + } else { + let response = self.inner.get_opts(location, options.clone()).await?; + let attributes = response.attributes.clone(); + let meta = response.meta.clone(); + let data = response.bytes().await?; + let entry = Entry::new(data, meta.last_modified, meta.e_tag, attributes); + self.cache.insert(location.clone(), entry.clone()); + entry + }; + + let meta = ObjectMeta { + location: location.clone(), + last_modified: entry.last_modified, + size: entry.data.len(), + e_tag: entry.e_tag, + version: None, + }; + let (range, data) = (0..entry.data.len(), entry.data); + let stream = futures::stream::once(futures::future::ready(Ok(data))); + Ok(GetResult { + payload: GetResultPayload::Stream(stream.boxed()), + attributes: entry.attributes, + meta, + range, + }) + } +} + +#[async_trait::async_trait] +impl ObjectStore for ConditionallyCachedObjectStore { + async fn put_opts( + &self, + location: &Path, + bytes: PutPayload, + options: PutOptions, + ) -> ObjectStoreResult { + self.inner.put_opts(location, bytes, options).await + } + + async fn get_opts(&self, location: &Path, options: GetOptions) -> ObjectStoreResult { + self.get_opts_impl(location, options).await + } + + async fn head(&self, location: &Path) -> ObjectStoreResult { + self.inner.head(location).await + } + + async fn delete(&self, location: &Path) -> ObjectStoreResult<()> { + self.cache.remove(location); + self.inner.delete(location).await + } + + fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, ObjectStoreResult> { + self.inner.list(prefix) + } + + fn list_with_offset( + &self, + prefix: Option<&Path>, + offset: &Path, + ) -> BoxStream<'_, ObjectStoreResult> { + self.inner.list_with_offset(prefix, offset) + } + + async fn list_with_delimiter(&self, prefix: Option<&Path>) -> ObjectStoreResult { + self.inner.list_with_delimiter(prefix).await + } + + async fn copy(&self, from: &Path, to: &Path) -> ObjectStoreResult<()> { + self.inner.copy(from, to).await + } + + async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> ObjectStoreResult<()> { + self.inner.copy_if_not_exists(from, to).await + } + + async fn rename_if_not_exists(&self, from: &Path, to: &Path) -> ObjectStoreResult<()> { + self.inner.rename_if_not_exists(from, to).await + } + + async fn put_multipart(&self, location: &Path) -> ObjectStoreResult> { + self.inner.put_multipart(location).await + } + + async fn put_multipart_opts( + &self, + location: &Path, + options: PutMultipartOpts, + ) -> ObjectStoreResult> { + self.inner.put_multipart_opts(location, options).await + } +} diff --git a/crates/core/src/storage/mod.rs b/crates/core/src/storage/mod.rs index 8361bf138e..f44e86753d 100644 --- a/crates/core/src/storage/mod.rs +++ b/crates/core/src/storage/mod.rs @@ -30,6 +30,8 @@ pub use retry_ext::ObjectStoreRetryExt; use std::ops::Range; pub use utils::*; +#[cfg(feature = "log-cache")] +pub mod cache; pub mod file; pub mod retry_ext; pub mod utils; From 55565aed18868a5d28a7ff637e6ed24d7e313c79 Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Mon, 13 Jan 2025 16:59:27 +0100 Subject: [PATCH 04/14] feat: add owned file view Signed-off-by: Robert Pack --- crates/core/Cargo.toml | 3 +- crates/core/src/kernel/snapshot/next.rs | 221 ++++++++++++++++++++---- crates/core/src/storage/cache.rs | 16 +- 3 files changed, 196 insertions(+), 44 deletions(-) diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index 0bdadfbdab..232cc5f00d 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -74,7 +74,7 @@ tokio = { workspace = true, features = [ ] } # cahce -quick_cache = { version = "0.6.9", optional = true } +quick_cache = { version = "0.6.9" } # other deps (these should be organized and pulled into workspace.dependencies as necessary) cfg-if = "1" @@ -130,4 +130,3 @@ datafusion = [ datafusion-ext = ["datafusion"] json = ["parquet/json"] python = ["arrow/pyarrow"] -log-cache = ["quick_cache"] diff --git a/crates/core/src/kernel/snapshot/next.rs b/crates/core/src/kernel/snapshot/next.rs index 3e7ab01fe2..7e2acbb1de 100644 --- a/crates/core/src/kernel/snapshot/next.rs +++ b/crates/core/src/kernel/snapshot/next.rs @@ -1,43 +1,46 @@ -use std::collections::{HashMap, HashSet}; -use std::sync::{Arc, LazyLock}; +use std::collections::HashSet; +use std::sync::Arc; -use ::serde::{Deserialize, Serialize}; use arrow::compute::{concat_batches, filter_record_batch}; use arrow_arith::boolean::{and, is_null, not}; -use arrow_array::{BooleanArray, RecordBatch}; +use arrow_array::cast::AsArray; +use arrow_array::types::Int64Type; +use arrow_array::{Array, BooleanArray, RecordBatch}; +use chrono::{DateTime, Utc}; +use delta_kernel::actions::set_transaction::{SetTransactionMap, SetTransactionScanner}; use delta_kernel::actions::{ - get_log_add_schema, get_log_schema, Add, ADD_NAME, CDC_NAME, METADATA_NAME, PROTOCOL_NAME, + get_log_add_schema, get_log_schema, ADD_NAME, CDC_NAME, METADATA_NAME, PROTOCOL_NAME, REMOVE_NAME, SET_TRANSACTION_NAME, }; -use delta_kernel::actions::{Metadata, Protocol}; +use delta_kernel::actions::{Metadata, Protocol, SetTransaction}; use delta_kernel::engine::arrow_data::ArrowEngineData; use delta_kernel::engine::default::executor::tokio::{ TokioBackgroundExecutor, TokioMultiThreadExecutor, }; use delta_kernel::engine::default::DefaultEngine; use delta_kernel::engine_data::{GetData, RowVisitor, TypedGetData as _}; -use delta_kernel::expressions::ColumnName; +use delta_kernel::expressions::{Scalar, StructData}; use delta_kernel::scan::log_replay::scan_action_iter; -use delta_kernel::scan::state::{DvInfo, Stats}; -use delta_kernel::scan::{scan_row_schema, PhysicalPredicate, ScanBuilder, ScanData}; -use delta_kernel::schema::{ColumnNamesAndTypes, DataType, Schema}; +use delta_kernel::scan::{scan_row_schema, PhysicalPredicate}; +use delta_kernel::schema::Schema; use delta_kernel::snapshot::Snapshot as SnapshotInner; use delta_kernel::table_properties::TableProperties; -use delta_kernel::{ - DeltaResult as KernelResult, Engine, EngineData, Error, Expression, Table, Version, -}; -use futures::{StreamExt, TryStreamExt}; +use delta_kernel::{DeltaResult as KernelResult, Engine, EngineData, Expression, Table, Version}; use itertools::Itertools; use object_store::path::Path; use object_store::ObjectStore; use tracing::warn; use url::Url; +use crate::kernel::scalars::ScalarExt; use crate::kernel::ActionType; +use crate::storage::cache::CommitCacheObjectStore; use crate::{DeltaResult, DeltaTableConfig, DeltaTableError}; type ReplayIter = Box, Vec)>>>; +type LocalFileSystem = CommitCacheObjectStore; + impl ActionType { pub(self) fn field_name_unckecked(&self) -> &'static str { match self { @@ -76,28 +79,42 @@ pub struct Snapshot { } impl Snapshot { + /// Create a new [`Snapshot`] instance. pub fn new(inner: Arc, engine: Arc) -> Self { Self { inner, engine } } + /// Create a new [`Snapshot`] instance for a table. pub async fn try_new( table: Table, store: Arc, version: Option, ) -> DeltaResult { - // let executor = Arc::new(TokioMultiThreadExecutor::new( - // config - // .io_runtime - // .map(|rt| rt.get_handle()) - // .unwrap_or(tokio::runtime::Handle::current()), - // )); - let executor = Arc::new(TokioMultiThreadExecutor::new( - tokio::runtime::Handle::current(), - )); + // TODO: how to deal with the dedicated IO runtime? Would this already be covered by the + // object store implementation pass to this? let table_root = Path::from_url_path(table.location().path())?; - let engine = DefaultEngine::new(store, table_root, executor); - let snapshot = table.snapshot(&engine, version.map(|v| v as u64))?; - Ok(Self::new(Arc::new(snapshot), Arc::new(engine))) + let store_str = format!("{}", store); + let is_local = store_str.starts_with("LocalFileSystem"); + let store = Arc::new(CommitCacheObjectStore::new(store)); + let handle = tokio::runtime::Handle::current(); + let engine: Arc = match handle.runtime_flavor() { + tokio::runtime::RuntimeFlavor::MultiThread => Arc::new(DefaultEngine::new_with_opts( + store, + table_root, + Arc::new(TokioMultiThreadExecutor::new(handle)), + !is_local, + )), + tokio::runtime::RuntimeFlavor::CurrentThread => Arc::new(DefaultEngine::new_with_opts( + store, + table_root, + Arc::new(TokioBackgroundExecutor::new()), + !is_local, + )), + _ => return Err(DeltaTableError::generic("unsupported runtime flavor")), + }; + + let snapshot = table.snapshot(engine.as_ref(), version.map(|v| v as u64))?; + Ok(Self::new(Arc::new(snapshot), engine)) } pub(crate) fn engine_ref(&self) -> &Arc { @@ -120,7 +137,7 @@ impl Snapshot { self.inner.protocol() } - pub fn metadata(&self) -> &delta_kernel::actions::Metadata { + pub fn metadata(&self) -> &Metadata { self.inner.metadata() } @@ -142,6 +159,28 @@ impl Snapshot { .map(|f| f.location.last_modified) } + /// Scan the Delta Log to obtain the latest transaction for all applications + /// + /// This method requires a full scan of the log to find all transactions. + /// When a specific application id is requested, it is much more efficient to use + /// [`application_transaction`](Self::application_transaction) instead. + pub fn application_transactions(&self) -> DeltaResult { + let scanner = SetTransactionScanner::new(self.inner.clone()); + Ok(scanner.application_transactions(self.engine.as_ref())?) + } + + /// Scan the Delta Log for the latest transaction entry for a specific application. + /// + /// Initiates a log scan, but terminates as soon as the transaction + /// for the given application is found. + pub fn application_transaction( + &self, + app_id: impl AsRef, + ) -> DeltaResult> { + let scanner = SetTransactionScanner::new(self.inner.clone()); + Ok(scanner.application_transaction(self.engine.as_ref(), app_id.as_ref())?) + } + fn log_data( &self, types: &[ActionType], @@ -170,12 +209,6 @@ impl Snapshot { )) }) .flatten()) - // let it = scan_action_iter( - // engine, - // self.replay_for_scan_data(engine)?, - // physical_predicate, - // ); - // Ok(Some(it).into_iter().flatten()) } } @@ -309,6 +342,109 @@ impl EagerSnapshot { pub fn table_properties(&self) -> &TableProperties { &self.snapshot.table_properties() } + + pub fn files(&self) -> impl Iterator { + LogicalFileView { + files: self.files.clone(), + index: 0, + } + } + + /// Get the number of files in the current snapshot + pub fn files_count(&self) -> usize { + self.files.num_rows() + } +} + +/// Helper trait to extract individual values from a `StructData`. +pub trait StructDataExt { + fn get(&self, key: &str) -> Option<&Scalar>; +} + +impl StructDataExt for StructData { + fn get(&self, key: &str) -> Option<&Scalar> { + self.fields() + .iter() + .zip(self.values().iter()) + .find(|(k, _)| k.name() == key) + .map(|(_, v)| v) + } +} + +#[derive(Clone)] +pub struct LogicalFileView { + files: RecordBatch, + index: usize, +} + +impl LogicalFileView { + /// Path of the file. + pub fn path(&self) -> &str { + self.files.column(0).as_string::().value(self.index) + } + + /// Size of the file in bytes. + pub fn size(&self) -> i64 { + self.files + .column(1) + .as_primitive::() + .value(self.index) + } + + /// Modification time of the file in milliseconds since epoch. + pub fn modification_time(&self) -> i64 { + self.files + .column(2) + .as_primitive::() + .value(self.index) + } + + /// Datetime of the last modification time of the file. + pub fn modification_datetime(&self) -> DeltaResult> { + DateTime::from_timestamp_millis(self.modification_time()).ok_or(DeltaTableError::from( + crate::protocol::ProtocolError::InvalidField(format!( + "invalid modification_time: {:?}", + self.modification_time() + )), + )) + } + + pub fn stats(&self) -> Option<&str> { + let col = self.files.column(3).as_string::(); + col.is_valid(self.index).then(|| col.value(self.index)) + } + + pub fn partition_values(&self) -> Option { + self.files + .column_by_name("fileConstantValues") + .and_then(|col| col.as_struct_opt()) + .and_then(|s| s.column_by_name("partitionValues")) + .and_then(|arr| { + arr.is_valid(self.index) + .then(|| match Scalar::from_array(arr, self.index) { + Some(Scalar::Struct(s)) => Some(s), + _ => None, + }) + .flatten() + }) + } +} + +impl Iterator for LogicalFileView { + type Item = LogicalFileView; + + fn next(&mut self) -> Option { + if self.index < self.files.num_rows() { + let file = LogicalFileView { + files: self.files.clone(), + index: self.index, + }; + self.index += 1; + Some(file) + } else { + None + } + } } #[cfg(test)] @@ -331,11 +467,11 @@ mod tests { rep_root } - #[tokio::test(flavor = "multi_thread")] async fn load_snapshot() -> TestResult<()> { // some comment let mut dat_dir = get_dat_dir(); dat_dir.push("multi_partitioned"); + let dat_info: TestCaseInfo = read_dat_case(dat_dir)?; let table_info = dat_info.table_summary()?; @@ -350,14 +486,27 @@ mod tests { assert_eq!(snapshot.version(), table_info.version); assert_eq!( - snapshot.protocol().min_reader_version(), - table_info.min_reader_version + ( + snapshot.protocol().min_reader_version(), + snapshot.protocol().min_writer_version() + ), + (table_info.min_reader_version, table_info.min_writer_version) ); Ok(()) } #[tokio::test(flavor = "multi_thread")] + async fn load_snapshot_multi() -> TestResult<()> { + load_snapshot().await + } + + #[tokio::test(flavor = "current_thread")] + async fn load_snapshot_current() -> TestResult<()> { + load_snapshot().await + } + + #[tokio::test] async fn load_eager_snapshot() -> TestResult<()> { // some comment let mut dat_dir = get_dat_dir(); diff --git a/crates/core/src/storage/cache.rs b/crates/core/src/storage/cache.rs index 4b44da1a78..eb6b5bd785 100644 --- a/crates/core/src/storage/cache.rs +++ b/crates/core/src/storage/cache.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use bytes::Bytes; use chrono::{DateTime, Utc}; use futures::stream::BoxStream; -use futures::StreamExt; +use futures::{StreamExt, TryStreamExt}; use object_store::path::Path; use object_store::{ Attributes, GetOptions, GetResult, GetResultPayload, ListResult, MultipartUpload, ObjectMeta, @@ -46,13 +46,14 @@ impl Entry { /// the object store are immutable and no attempt is made to invalidate the cache /// when files are updated in the remote object store. #[derive(Clone)] -pub(crate) struct ConditionallyCachedObjectStore { +pub(crate) struct CommitCacheObjectStore { inner: Arc, check: Arc bool + Send + Sync>, cache: Arc>, + has_ordered_listing: bool, } -impl std::fmt::Debug for ConditionallyCachedObjectStore { +impl std::fmt::Debug for CommitCacheObjectStore { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("ConditionallyCachedObjectStore") .field("object_store", &self.inner) @@ -60,7 +61,7 @@ impl std::fmt::Debug for ConditionallyCachedObjectStore { } } -impl std::fmt::Display for ConditionallyCachedObjectStore { +impl std::fmt::Display for CommitCacheObjectStore { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "ConditionallyCachedObjectStore({})", self.inner) } @@ -71,13 +72,16 @@ fn cache_json(path: &Path) -> bool { .map_or(false, |ext| ext.eq_ignore_ascii_case("json")) } -impl ConditionallyCachedObjectStore { +impl CommitCacheObjectStore { /// Create a new conditionally cached object store. pub fn new(inner: Arc) -> Self { + let store_str = format!("{}", inner); + let is_local = store_str.starts_with("LocalFileSystem"); Self { inner, check: Arc::new(cache_json), cache: Arc::new(Cache::new(100)), + has_ordered_listing: !is_local, } } @@ -121,7 +125,7 @@ impl ConditionallyCachedObjectStore { } #[async_trait::async_trait] -impl ObjectStore for ConditionallyCachedObjectStore { +impl ObjectStore for CommitCacheObjectStore { async fn put_opts( &self, location: &Path, From 3d6d263ed3b7925384f7ad526ba10bc484beedd3 Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Thu, 16 Jan 2025 01:58:37 +0100 Subject: [PATCH 05/14] feat: basic updates of file state Signed-off-by: Robert Pack --- crates/core/src/kernel/snapshot/next.rs | 371 +++++++++++++++--------- crates/core/src/operations/vacuum.rs | 1 + crates/core/src/protocol/checkpoints.rs | 2 +- crates/core/src/storage/mod.rs | 3 +- 4 files changed, 231 insertions(+), 146 deletions(-) diff --git a/crates/core/src/kernel/snapshot/next.rs b/crates/core/src/kernel/snapshot/next.rs index 7e2acbb1de..56127d21c0 100644 --- a/crates/core/src/kernel/snapshot/next.rs +++ b/crates/core/src/kernel/snapshot/next.rs @@ -1,13 +1,19 @@ +//! Snapshot of a Delta Table at a specific version. +//! use std::collections::HashSet; -use std::sync::Arc; +use std::sync::{Arc, LazyLock}; use arrow::compute::{concat_batches, filter_record_batch}; use arrow_arith::boolean::{and, is_null, not}; use arrow_array::cast::AsArray; use arrow_array::types::Int64Type; use arrow_array::{Array, BooleanArray, RecordBatch}; +use arrow_cast::pretty::print_batches; use chrono::{DateTime, Utc}; use delta_kernel::actions::set_transaction::{SetTransactionMap, SetTransactionScanner}; +use delta_kernel::actions::visitors::{ + AddVisitor, CdcVisitor, MetadataVisitor, ProtocolVisitor, RemoveVisitor, SetTransactionVisitor, +}; use delta_kernel::actions::{ get_log_add_schema, get_log_schema, ADD_NAME, CDC_NAME, METADATA_NAME, PROTOCOL_NAME, REMOVE_NAME, SET_TRANSACTION_NAME, @@ -20,12 +26,16 @@ use delta_kernel::engine::default::executor::tokio::{ use delta_kernel::engine::default::DefaultEngine; use delta_kernel::engine_data::{GetData, RowVisitor, TypedGetData as _}; use delta_kernel::expressions::{Scalar, StructData}; +use delta_kernel::log_segment::LogSegment; use delta_kernel::scan::log_replay::scan_action_iter; -use delta_kernel::scan::{scan_row_schema, PhysicalPredicate}; -use delta_kernel::schema::Schema; +use delta_kernel::scan::scan_row_schema; +use delta_kernel::schema::{DataType, Schema, StructField, StructType}; use delta_kernel::snapshot::Snapshot as SnapshotInner; use delta_kernel::table_properties::TableProperties; -use delta_kernel::{DeltaResult as KernelResult, Engine, EngineData, Expression, Table, Version}; +use delta_kernel::{ + DeltaResult as KernelResult, Engine, EngineData, Expression, ExpressionHandler, ExpressionRef, + Table, Version, +}; use itertools::Itertools; use object_store::path::Path; use object_store::ObjectStore; @@ -33,7 +43,7 @@ use tracing::warn; use url::Url; use crate::kernel::scalars::ScalarExt; -use crate::kernel::ActionType; +use crate::kernel::{ActionType, ARROW_HANDLER}; use crate::storage::cache::CommitCacheObjectStore; use crate::{DeltaResult, DeltaTableConfig, DeltaTableError}; @@ -41,6 +51,26 @@ type ReplayIter = Box, Vec type LocalFileSystem = CommitCacheObjectStore; +#[derive(thiserror::Error, Debug)] +enum SnapshotError { + #[error("Snapshot not initialized for action type: {0}")] + MissingData(String), +} + +impl SnapshotError { + fn missing_data(action: ActionType) -> Self { + Self::MissingData(action.field_name_unckecked().to_string()) + } +} + +impl From for DeltaTableError { + fn from(e: SnapshotError) -> Self { + match &e { + SnapshotError::MissingData(_) => DeltaTableError::generic(e), + } + } +} + impl ActionType { pub(self) fn field_name_unckecked(&self) -> &'static str { match self { @@ -88,7 +118,7 @@ impl Snapshot { pub async fn try_new( table: Table, store: Arc, - version: Option, + version: impl Into>, ) -> DeltaResult { // TODO: how to deal with the dedicated IO runtime? Would this already be covered by the // object store implementation pass to this? @@ -113,7 +143,7 @@ impl Snapshot { _ => return Err(DeltaTableError::generic("unsupported runtime flavor")), }; - let snapshot = table.snapshot(engine.as_ref(), version.map(|v| v as u64))?; + let snapshot = table.snapshot(engine.as_ref(), version.into())?; Ok(Self::new(Arc::new(snapshot), engine)) } @@ -125,7 +155,7 @@ impl Snapshot { &self.inner.table_root() } - pub fn version(&self) -> u64 { + pub fn version(&self) -> Version { self.inner.version() } @@ -159,6 +189,49 @@ impl Snapshot { .map(|f| f.location.last_modified) } + /// read all active files from the log + pub(crate) fn files( + &self, + predicate: Option>, + ) -> DeltaResult>> { + let scan = self + .inner + .clone() + .scan_builder() + .with_predicate(predicate) + .build()?; + Ok(scan.scan_data(self.engine.as_ref())?.map(|res| { + res.and_then(|(data, mut predicate)| { + let batch: RecordBatch = ArrowEngineData::try_from_engine_data(data)?.into(); + if predicate.len() < batch.num_rows() { + predicate + .extend(std::iter::repeat(true).take(batch.num_rows() - predicate.len())); + } + Ok(filter_record_batch(&batch, &BooleanArray::from(predicate))?) + }) + })) + } + + pub(crate) fn tombstones(&self) -> DeltaResult>> { + static META_PREDICATE: LazyLock> = LazyLock::new(|| { + Some(Arc::new( + Expression::column([REMOVE_NAME, "path"]).is_not_null(), + )) + }); + let read_schema = get_log_schema().project(&[REMOVE_NAME])?; + Ok(self + .inner + ._log_segment() + .replay( + self.engine.as_ref(), + read_schema.clone(), + read_schema, + META_PREDICATE.clone(), + )? + .map_ok(|(d, _)| Ok(RecordBatch::from(ArrowEngineData::try_from_engine_data(d)?))) + .flatten()) + } + /// Scan the Delta Log to obtain the latest transaction for all applications /// /// This method requires a full scan of the log to find all transactions. @@ -180,43 +253,13 @@ impl Snapshot { let scanner = SetTransactionScanner::new(self.inner.clone()); Ok(scanner.application_transaction(self.engine.as_ref(), app_id.as_ref())?) } - - fn log_data( - &self, - types: &[ActionType], - ) -> DeltaResult>> { - let field_names = types - .iter() - .filter_map(|t| t.field_name().ok()) - .collect::>(); - if field_names.len() != types.len() { - warn!("skipping unsupported action types"); - } - let log_schema = get_log_schema().project(&field_names)?; - Ok(self - .inner - ._log_segment() - .replay( - self.engine.as_ref(), - log_schema.clone(), - log_schema.clone(), - None, - )? - .map_ok(|(d, flag)| { - Ok(( - RecordBatch::from(ArrowEngineData::try_from_engine_data(d)?), - flag, - )) - }) - .flatten()) - } } #[derive(Clone)] pub struct EagerSnapshot { snapshot: Snapshot, - files: RecordBatch, - actions: Option, + files: Option, + predicate: Option>, } impl EagerSnapshot { @@ -229,101 +272,23 @@ impl EagerSnapshot { table_root: impl AsRef, store: Arc, config: DeltaTableConfig, - version: Option, + version: impl Into>, tracked_actions: HashSet, - predicate: Option, + predicate: Option>, ) -> DeltaResult { - let mut replay_actions = Vec::new(); - if config.require_files { - replay_actions.push(ActionType::Add); - replay_actions.push(ActionType::Remove); - } - replay_actions.extend(tracked_actions.into_iter().filter(|it| { - !config.require_files || (it != &ActionType::Add && it != &ActionType::Remove) - })); - let snapshot = Snapshot::try_new(Table::try_from_uri(table_root)?, store, version).await?; - - let mut replay_data = Vec::new(); - let mut action_data = Vec::new(); - for slice in snapshot.log_data(&replay_actions)? { - let (batch, flag) = slice?; - - let action_projection = replay_actions - .iter() - .filter_map(|t| { - (t != &ActionType::Add && t != &ActionType::Remove) - .then_some( - t.field_name() - .ok() - .and_then(|n| batch.schema_ref().index_of(n).ok()), - ) - .flatten() - }) - .collect_vec(); - - if !action_projection.is_empty() { - action_data.push(batch.project(&action_projection)?); - } - - if config.require_files { - let file_data = batch.project(&[0, 1])?; - let file_data = filter_record_batch( - &file_data, - ¬(&and( - &is_null(batch.column(0))?, - &is_null(batch.column(1))?, - )?)?, - )?; - replay_data.push(Ok(( - Box::new(ArrowEngineData::from(file_data)) as Box, - flag, - ))); - } - } - - let files_schema = Arc::new(get_log_add_schema().as_ref().try_into()?); - let scan_schema = Arc::new((&scan_row_schema()).try_into()?); - - let files = if !replay_data.is_empty() { - let (engine, action_iter) = (snapshot.engine_ref().as_ref(), replay_data.into_iter()); - let physical_predicate = - predicate.and_then(|p| PhysicalPredicate::try_new(&p, snapshot.schema()).ok()); - - let it: ReplayIter = match physical_predicate { - Some(PhysicalPredicate::StaticSkipAll) => Box::new(std::iter::empty()), - Some(PhysicalPredicate::Some(p, s)) => { - Box::new(scan_action_iter(engine, action_iter, Some((p, s)))) - } - None | Some(PhysicalPredicate::None) => { - Box::new(scan_action_iter(engine, action_iter, None)) - } - }; - - let mut filtered = Vec::new(); - for res in it { - let (batch, selection) = res?; - let predicate = BooleanArray::from(selection); - let data: RecordBatch = ArrowEngineData::try_from_engine_data(batch)?.into(); - filtered.push(filter_record_batch(&data, &predicate)?); - } - concat_batches(&scan_schema, &filtered)? - } else { - RecordBatch::new_empty(scan_schema.clone()) - }; - - let actions = (!action_data.is_empty()) - .then(|| concat_batches(&files_schema, &action_data).ok()) - .flatten(); - + let files = config + .require_files + .then(|| -> DeltaResult<_> { Ok(replay_file_actions(&snapshot)?) }) + .transpose()?; Ok(Self { snapshot, files, - actions, + predicate, }) } - pub fn version(&self) -> u64 { + pub fn version(&self) -> Version { self.snapshot.version() } @@ -335,7 +300,7 @@ impl EagerSnapshot { self.snapshot.protocol() } - pub fn metadata(&self) -> &delta_kernel::actions::Metadata { + pub fn metadata(&self) -> &Metadata { self.snapshot.metadata() } @@ -343,17 +308,138 @@ impl EagerSnapshot { &self.snapshot.table_properties() } - pub fn files(&self) -> impl Iterator { - LogicalFileView { - files: self.files.clone(), + pub fn files(&self) -> DeltaResult> { + Ok(LogicalFileView { + files: self + .files + .clone() + .ok_or_else(|| SnapshotError::missing_data(ActionType::Add))?, index: 0, - } + }) } /// Get the number of files in the current snapshot - pub fn files_count(&self) -> usize { - self.files.num_rows() + pub fn files_count(&self) -> DeltaResult { + Ok(self + .files + .as_ref() + .map(|f| f.num_rows()) + .ok_or_else(|| SnapshotError::missing_data(ActionType::Add))?) } + + pub fn tombstones(&self) -> DeltaResult>> { + self.snapshot.tombstones() + } + + /// Scan the Delta Log to obtain the latest transaction for all applications + /// + /// This method requires a full scan of the log to find all transactions. + /// When a specific application id is requested, it is much more efficient to use + /// [`application_transaction`](Self::application_transaction) instead. + pub fn application_transactions(&self) -> DeltaResult { + self.snapshot.application_transactions() + } + + /// Scan the Delta Log for the latest transaction entry for a specific application. + /// + /// Initiates a log scan, but terminates as soon as the transaction + /// for the given application is found. + pub fn application_transaction( + &self, + app_id: impl AsRef, + ) -> DeltaResult> { + self.snapshot.application_transaction(app_id) + } + + pub(crate) fn update(&mut self) -> DeltaResult<()> { + let state = self + .files + .as_ref() + .ok_or(SnapshotError::missing_data(ActionType::Add))? + .clone(); + + let log_root = self.snapshot.table_root().join("_delta_log/").unwrap(); + let fs_client = self.snapshot.engine.get_file_system_client(); + let commit_read_schema = get_log_schema().project(&[ADD_NAME, REMOVE_NAME])?; + let checkpoint_read_schema = get_log_add_schema().clone(); + + let segment = + LogSegment::for_table_changes(fs_client.as_ref(), log_root, self.version() + 1, None)?; + let slice_iter = segment + .replay( + self.snapshot.engine.as_ref(), + commit_read_schema, + checkpoint_read_schema, + None, + )? + .chain(std::iter::once(Ok(( + Box::new(ArrowEngineData::from(state)) as Box, + false, + )))); + + let res = scan_action_iter(self.snapshot.engine.as_ref(), slice_iter, None) + .map(|res| { + res.and_then(|(d, sel)| { + let batch = RecordBatch::from(ArrowEngineData::try_from_engine_data(d)?); + Ok(filter_record_batch(&batch, &BooleanArray::from(sel))?) + }) + }) + .collect::, _>>()?; + + self.files = Some(concat_batches(res[0].schema_ref(), &res)?); + + Ok(()) + } +} + +fn replay_file_actions(snapshot: &Snapshot) -> DeltaResult { + let commit_read_schema = get_log_schema().project(&[ADD_NAME, REMOVE_NAME])?; + let checkpoint_read_schema = get_log_add_schema().clone(); + + let curr_data = snapshot + .inner + ._log_segment() + .replay( + snapshot.engine.as_ref(), + commit_read_schema.clone(), + checkpoint_read_schema.clone(), + None, + )? + .map_ok( + |(data, flag)| -> Result<(RecordBatch, bool), delta_kernel::Error> { + Ok((ArrowEngineData::try_from_engine_data(data)?.into(), flag)) + }, + ) + .flatten() + .collect::, _>>()?; + + let scan_iter = curr_data.clone().into_iter().map(|(data, flag)| { + Ok(( + Box::new(ArrowEngineData::new(data.clone())) as Box, + flag, + )) + }); + + let res = scan_action_iter(snapshot.engine.as_ref(), scan_iter, None) + .map(|res| { + res.and_then(|(d, selection)| { + Ok(( + RecordBatch::from(ArrowEngineData::try_from_engine_data(d)?), + selection, + )) + }) + }) + .zip(curr_data.into_iter()) + .map(|(scan_res, (data_raw, _))| match scan_res { + Ok((_, selection)) => { + let data = filter_record_batch(&data_raw, &BooleanArray::from(selection))?; + Ok(data.project(&[0])?) + } + Err(e) => Err(e), + }) + .collect::, _>>()?; + + Ok(concat_batches(res[0].schema_ref(), &res)?) } /// Helper trait to extract individual values from a `StructData`. @@ -451,7 +537,6 @@ impl Iterator for LogicalFileView { mod tests { use super::*; - use arrow_cast::pretty::print_batches; use deltalake_test::acceptance::{read_dat_case, TestCaseInfo}; use deltalake_test::TestResult; use std::path::PathBuf; @@ -508,31 +593,31 @@ mod tests { #[tokio::test] async fn load_eager_snapshot() -> TestResult<()> { - // some comment let mut dat_dir = get_dat_dir(); dat_dir.push("multi_partitioned"); + let dat_info: TestCaseInfo = read_dat_case(dat_dir)?; let table_info = dat_info.table_summary()?; let table = Table::try_from_uri(dat_info.table_root()?)?; - let snapshot = EagerSnapshot::try_new_with_actions( + let mut snapshot = EagerSnapshot::try_new_with_actions( table.location(), Arc::new(object_store::local::LocalFileSystem::default()), Default::default(), - None, + Some(1), Default::default(), None, ) .await?; - assert_eq!(snapshot.version(), table_info.version); - assert_eq!( - snapshot.protocol().min_reader_version(), - table_info.min_reader_version - ); + // assert_eq!(snapshot.version(), table_info.version); + // assert_eq!( + // snapshot.protocol().min_reader_version(), + // table_info.min_reader_version + // ); - print_batches(&[snapshot.files])?; + snapshot.update()?; Ok(()) } diff --git a/crates/core/src/operations/vacuum.rs b/crates/core/src/operations/vacuum.rs index 4e5c46589f..9fd614e83e 100644 --- a/crates/core/src/operations/vacuum.rs +++ b/crates/core/src/operations/vacuum.rs @@ -217,6 +217,7 @@ impl VacuumBuilder { self.log_store.object_store(None).clone(), ) .await?; + let valid_files = self.snapshot.file_paths_iter().collect::>(); let mut files_to_delete = vec![]; diff --git a/crates/core/src/protocol/checkpoints.rs b/crates/core/src/protocol/checkpoints.rs index 1fada05d38..f6ebacff99 100644 --- a/crates/core/src/protocol/checkpoints.rs +++ b/crates/core/src/protocol/checkpoints.rs @@ -174,7 +174,7 @@ pub async fn create_checkpoint_for( let tombstones = state .unexpired_tombstones(log_store.object_store(None).clone()) .await - .map_err(|_| ProtocolError::Generic("filed to get tombstones".into()))? + .map_err(|_| ProtocolError::Generic("failed to get tombstones".into()))? .collect::>(); let (checkpoint, parquet_bytes) = parquet_bytes_from_state(state, tombstones)?; diff --git a/crates/core/src/storage/mod.rs b/crates/core/src/storage/mod.rs index f44e86753d..31f4f60c77 100644 --- a/crates/core/src/storage/mod.rs +++ b/crates/core/src/storage/mod.rs @@ -30,8 +30,7 @@ pub use retry_ext::ObjectStoreRetryExt; use std::ops::Range; pub use utils::*; -#[cfg(feature = "log-cache")] -pub mod cache; +pub(crate) mod cache; pub mod file; pub mod retry_ext; pub mod utils; From a5672b58e55b2f907cc78ef4f7dd21e6b2023315 Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Thu, 16 Jan 2025 14:17:43 +0100 Subject: [PATCH 06/14] feat: introduce snapshot trait Signed-off-by: Robert Pack --- crates/core/src/kernel/mod.rs | 1 + crates/core/src/kernel/snapshot/mod.rs | 1 - crates/core/src/kernel/snapshot/next.rs | 624 ------------------ .../snapshot_next}/cache.rs | 8 +- crates/core/src/kernel/snapshot_next/eager.rs | 263 ++++++++ .../src/kernel/snapshot_next/iterators.rs | 310 +++++++++ crates/core/src/kernel/snapshot_next/lazy.rs | 229 +++++++ crates/core/src/kernel/snapshot_next/mod.rs | 161 +++++ crates/core/src/storage/mod.rs | 1 - 9 files changed, 966 insertions(+), 632 deletions(-) delete mode 100644 crates/core/src/kernel/snapshot/next.rs rename crates/core/src/{storage => kernel/snapshot_next}/cache.rs (95%) create mode 100644 crates/core/src/kernel/snapshot_next/eager.rs create mode 100644 crates/core/src/kernel/snapshot_next/iterators.rs create mode 100644 crates/core/src/kernel/snapshot_next/lazy.rs create mode 100644 crates/core/src/kernel/snapshot_next/mod.rs diff --git a/crates/core/src/kernel/mod.rs b/crates/core/src/kernel/mod.rs index b2fcd71634..0a51630ee5 100644 --- a/crates/core/src/kernel/mod.rs +++ b/crates/core/src/kernel/mod.rs @@ -10,6 +10,7 @@ pub mod error; pub mod models; pub mod scalars; mod snapshot; +pub mod snapshot_next; pub use error::*; pub use models::*; diff --git a/crates/core/src/kernel/snapshot/mod.rs b/crates/core/src/kernel/snapshot/mod.rs index 38d1dc570d..2938b3d3db 100644 --- a/crates/core/src/kernel/snapshot/mod.rs +++ b/crates/core/src/kernel/snapshot/mod.rs @@ -45,7 +45,6 @@ pub use self::log_data::*; mod log_data; pub(crate) mod log_segment; -mod next; pub(crate) mod parse; mod replay; mod serde; diff --git a/crates/core/src/kernel/snapshot/next.rs b/crates/core/src/kernel/snapshot/next.rs deleted file mode 100644 index 56127d21c0..0000000000 --- a/crates/core/src/kernel/snapshot/next.rs +++ /dev/null @@ -1,624 +0,0 @@ -//! Snapshot of a Delta Table at a specific version. -//! -use std::collections::HashSet; -use std::sync::{Arc, LazyLock}; - -use arrow::compute::{concat_batches, filter_record_batch}; -use arrow_arith::boolean::{and, is_null, not}; -use arrow_array::cast::AsArray; -use arrow_array::types::Int64Type; -use arrow_array::{Array, BooleanArray, RecordBatch}; -use arrow_cast::pretty::print_batches; -use chrono::{DateTime, Utc}; -use delta_kernel::actions::set_transaction::{SetTransactionMap, SetTransactionScanner}; -use delta_kernel::actions::visitors::{ - AddVisitor, CdcVisitor, MetadataVisitor, ProtocolVisitor, RemoveVisitor, SetTransactionVisitor, -}; -use delta_kernel::actions::{ - get_log_add_schema, get_log_schema, ADD_NAME, CDC_NAME, METADATA_NAME, PROTOCOL_NAME, - REMOVE_NAME, SET_TRANSACTION_NAME, -}; -use delta_kernel::actions::{Metadata, Protocol, SetTransaction}; -use delta_kernel::engine::arrow_data::ArrowEngineData; -use delta_kernel::engine::default::executor::tokio::{ - TokioBackgroundExecutor, TokioMultiThreadExecutor, -}; -use delta_kernel::engine::default::DefaultEngine; -use delta_kernel::engine_data::{GetData, RowVisitor, TypedGetData as _}; -use delta_kernel::expressions::{Scalar, StructData}; -use delta_kernel::log_segment::LogSegment; -use delta_kernel::scan::log_replay::scan_action_iter; -use delta_kernel::scan::scan_row_schema; -use delta_kernel::schema::{DataType, Schema, StructField, StructType}; -use delta_kernel::snapshot::Snapshot as SnapshotInner; -use delta_kernel::table_properties::TableProperties; -use delta_kernel::{ - DeltaResult as KernelResult, Engine, EngineData, Expression, ExpressionHandler, ExpressionRef, - Table, Version, -}; -use itertools::Itertools; -use object_store::path::Path; -use object_store::ObjectStore; -use tracing::warn; -use url::Url; - -use crate::kernel::scalars::ScalarExt; -use crate::kernel::{ActionType, ARROW_HANDLER}; -use crate::storage::cache::CommitCacheObjectStore; -use crate::{DeltaResult, DeltaTableConfig, DeltaTableError}; - -type ReplayIter = Box, Vec)>>>; - -type LocalFileSystem = CommitCacheObjectStore; - -#[derive(thiserror::Error, Debug)] -enum SnapshotError { - #[error("Snapshot not initialized for action type: {0}")] - MissingData(String), -} - -impl SnapshotError { - fn missing_data(action: ActionType) -> Self { - Self::MissingData(action.field_name_unckecked().to_string()) - } -} - -impl From for DeltaTableError { - fn from(e: SnapshotError) -> Self { - match &e { - SnapshotError::MissingData(_) => DeltaTableError::generic(e), - } - } -} - -impl ActionType { - pub(self) fn field_name_unckecked(&self) -> &'static str { - match self { - Self::Metadata => METADATA_NAME, - Self::Protocol => PROTOCOL_NAME, - Self::Remove => REMOVE_NAME, - Self::Add => ADD_NAME, - Self::Txn => SET_TRANSACTION_NAME, - Self::Cdc => CDC_NAME, - _ => panic!(), - } - } - - pub(self) fn field_name(&self) -> DeltaResult<&'static str> { - let name = match self { - Self::Metadata => METADATA_NAME, - Self::Protocol => PROTOCOL_NAME, - Self::Remove => REMOVE_NAME, - Self::Add => ADD_NAME, - Self::Txn => SET_TRANSACTION_NAME, - Self::Cdc => CDC_NAME, - _ => { - return Err(DeltaTableError::generic(format!( - "unsupported action type: {self:?}" - ))) - } - }; - Ok(name) - } -} - -#[derive(Clone)] -pub struct Snapshot { - inner: Arc, - engine: Arc, -} - -impl Snapshot { - /// Create a new [`Snapshot`] instance. - pub fn new(inner: Arc, engine: Arc) -> Self { - Self { inner, engine } - } - - /// Create a new [`Snapshot`] instance for a table. - pub async fn try_new( - table: Table, - store: Arc, - version: impl Into>, - ) -> DeltaResult { - // TODO: how to deal with the dedicated IO runtime? Would this already be covered by the - // object store implementation pass to this? - let table_root = Path::from_url_path(table.location().path())?; - let store_str = format!("{}", store); - let is_local = store_str.starts_with("LocalFileSystem"); - let store = Arc::new(CommitCacheObjectStore::new(store)); - let handle = tokio::runtime::Handle::current(); - let engine: Arc = match handle.runtime_flavor() { - tokio::runtime::RuntimeFlavor::MultiThread => Arc::new(DefaultEngine::new_with_opts( - store, - table_root, - Arc::new(TokioMultiThreadExecutor::new(handle)), - !is_local, - )), - tokio::runtime::RuntimeFlavor::CurrentThread => Arc::new(DefaultEngine::new_with_opts( - store, - table_root, - Arc::new(TokioBackgroundExecutor::new()), - !is_local, - )), - _ => return Err(DeltaTableError::generic("unsupported runtime flavor")), - }; - - let snapshot = table.snapshot(engine.as_ref(), version.into())?; - Ok(Self::new(Arc::new(snapshot), engine)) - } - - pub(crate) fn engine_ref(&self) -> &Arc { - &self.engine - } - - pub fn table_root(&self) -> &Url { - &self.inner.table_root() - } - - pub fn version(&self) -> Version { - self.inner.version() - } - - pub fn schema(&self) -> &Schema { - self.inner.schema() - } - - pub fn protocol(&self) -> &Protocol { - self.inner.protocol() - } - - pub fn metadata(&self) -> &Metadata { - self.inner.metadata() - } - - pub fn table_properties(&self) -> &TableProperties { - &self.inner.table_properties() - } - - /// Get the timestamp of the given version in miliscends since epoch. - /// - /// Extracts the timestamp from the commit file of the given version - /// from the current log segment. If the commit file is not part of the - /// current log segment, `None` is returned. - pub fn version_timestamp(&self, version: Version) -> Option { - self.inner - ._log_segment() - .ascending_commit_files - .iter() - .find(|f| f.version == version) - .map(|f| f.location.last_modified) - } - - /// read all active files from the log - pub(crate) fn files( - &self, - predicate: Option>, - ) -> DeltaResult>> { - let scan = self - .inner - .clone() - .scan_builder() - .with_predicate(predicate) - .build()?; - Ok(scan.scan_data(self.engine.as_ref())?.map(|res| { - res.and_then(|(data, mut predicate)| { - let batch: RecordBatch = ArrowEngineData::try_from_engine_data(data)?.into(); - if predicate.len() < batch.num_rows() { - predicate - .extend(std::iter::repeat(true).take(batch.num_rows() - predicate.len())); - } - Ok(filter_record_batch(&batch, &BooleanArray::from(predicate))?) - }) - })) - } - - pub(crate) fn tombstones(&self) -> DeltaResult>> { - static META_PREDICATE: LazyLock> = LazyLock::new(|| { - Some(Arc::new( - Expression::column([REMOVE_NAME, "path"]).is_not_null(), - )) - }); - let read_schema = get_log_schema().project(&[REMOVE_NAME])?; - Ok(self - .inner - ._log_segment() - .replay( - self.engine.as_ref(), - read_schema.clone(), - read_schema, - META_PREDICATE.clone(), - )? - .map_ok(|(d, _)| Ok(RecordBatch::from(ArrowEngineData::try_from_engine_data(d)?))) - .flatten()) - } - - /// Scan the Delta Log to obtain the latest transaction for all applications - /// - /// This method requires a full scan of the log to find all transactions. - /// When a specific application id is requested, it is much more efficient to use - /// [`application_transaction`](Self::application_transaction) instead. - pub fn application_transactions(&self) -> DeltaResult { - let scanner = SetTransactionScanner::new(self.inner.clone()); - Ok(scanner.application_transactions(self.engine.as_ref())?) - } - - /// Scan the Delta Log for the latest transaction entry for a specific application. - /// - /// Initiates a log scan, but terminates as soon as the transaction - /// for the given application is found. - pub fn application_transaction( - &self, - app_id: impl AsRef, - ) -> DeltaResult> { - let scanner = SetTransactionScanner::new(self.inner.clone()); - Ok(scanner.application_transaction(self.engine.as_ref(), app_id.as_ref())?) - } -} - -#[derive(Clone)] -pub struct EagerSnapshot { - snapshot: Snapshot, - files: Option, - predicate: Option>, -} - -impl EagerSnapshot { - /// Create a new [`EagerSnapshot`] instance tracking actions of the given types. - /// - /// Only actions supplied by `tracked_actions` will be loaded into memory. - /// This is useful when only a subset of actions are needed. `Add` and `Remove` actions - /// are treated specially. I.e. `Add` and `Remove` will be loaded as well. - pub async fn try_new_with_actions( - table_root: impl AsRef, - store: Arc, - config: DeltaTableConfig, - version: impl Into>, - tracked_actions: HashSet, - predicate: Option>, - ) -> DeltaResult { - let snapshot = Snapshot::try_new(Table::try_from_uri(table_root)?, store, version).await?; - let files = config - .require_files - .then(|| -> DeltaResult<_> { Ok(replay_file_actions(&snapshot)?) }) - .transpose()?; - Ok(Self { - snapshot, - files, - predicate, - }) - } - - pub fn version(&self) -> Version { - self.snapshot.version() - } - - pub fn schema(&self) -> &Schema { - self.snapshot.schema() - } - - pub fn protocol(&self) -> &Protocol { - self.snapshot.protocol() - } - - pub fn metadata(&self) -> &Metadata { - self.snapshot.metadata() - } - - pub fn table_properties(&self) -> &TableProperties { - &self.snapshot.table_properties() - } - - pub fn files(&self) -> DeltaResult> { - Ok(LogicalFileView { - files: self - .files - .clone() - .ok_or_else(|| SnapshotError::missing_data(ActionType::Add))?, - index: 0, - }) - } - - /// Get the number of files in the current snapshot - pub fn files_count(&self) -> DeltaResult { - Ok(self - .files - .as_ref() - .map(|f| f.num_rows()) - .ok_or_else(|| SnapshotError::missing_data(ActionType::Add))?) - } - - pub fn tombstones(&self) -> DeltaResult>> { - self.snapshot.tombstones() - } - - /// Scan the Delta Log to obtain the latest transaction for all applications - /// - /// This method requires a full scan of the log to find all transactions. - /// When a specific application id is requested, it is much more efficient to use - /// [`application_transaction`](Self::application_transaction) instead. - pub fn application_transactions(&self) -> DeltaResult { - self.snapshot.application_transactions() - } - - /// Scan the Delta Log for the latest transaction entry for a specific application. - /// - /// Initiates a log scan, but terminates as soon as the transaction - /// for the given application is found. - pub fn application_transaction( - &self, - app_id: impl AsRef, - ) -> DeltaResult> { - self.snapshot.application_transaction(app_id) - } - - pub(crate) fn update(&mut self) -> DeltaResult<()> { - let state = self - .files - .as_ref() - .ok_or(SnapshotError::missing_data(ActionType::Add))? - .clone(); - - let log_root = self.snapshot.table_root().join("_delta_log/").unwrap(); - let fs_client = self.snapshot.engine.get_file_system_client(); - let commit_read_schema = get_log_schema().project(&[ADD_NAME, REMOVE_NAME])?; - let checkpoint_read_schema = get_log_add_schema().clone(); - - let segment = - LogSegment::for_table_changes(fs_client.as_ref(), log_root, self.version() + 1, None)?; - let slice_iter = segment - .replay( - self.snapshot.engine.as_ref(), - commit_read_schema, - checkpoint_read_schema, - None, - )? - .chain(std::iter::once(Ok(( - Box::new(ArrowEngineData::from(state)) as Box, - false, - )))); - - let res = scan_action_iter(self.snapshot.engine.as_ref(), slice_iter, None) - .map(|res| { - res.and_then(|(d, sel)| { - let batch = RecordBatch::from(ArrowEngineData::try_from_engine_data(d)?); - Ok(filter_record_batch(&batch, &BooleanArray::from(sel))?) - }) - }) - .collect::, _>>()?; - - self.files = Some(concat_batches(res[0].schema_ref(), &res)?); - - Ok(()) - } -} - -fn replay_file_actions(snapshot: &Snapshot) -> DeltaResult { - let commit_read_schema = get_log_schema().project(&[ADD_NAME, REMOVE_NAME])?; - let checkpoint_read_schema = get_log_add_schema().clone(); - - let curr_data = snapshot - .inner - ._log_segment() - .replay( - snapshot.engine.as_ref(), - commit_read_schema.clone(), - checkpoint_read_schema.clone(), - None, - )? - .map_ok( - |(data, flag)| -> Result<(RecordBatch, bool), delta_kernel::Error> { - Ok((ArrowEngineData::try_from_engine_data(data)?.into(), flag)) - }, - ) - .flatten() - .collect::, _>>()?; - - let scan_iter = curr_data.clone().into_iter().map(|(data, flag)| { - Ok(( - Box::new(ArrowEngineData::new(data.clone())) as Box, - flag, - )) - }); - - let res = scan_action_iter(snapshot.engine.as_ref(), scan_iter, None) - .map(|res| { - res.and_then(|(d, selection)| { - Ok(( - RecordBatch::from(ArrowEngineData::try_from_engine_data(d)?), - selection, - )) - }) - }) - .zip(curr_data.into_iter()) - .map(|(scan_res, (data_raw, _))| match scan_res { - Ok((_, selection)) => { - let data = filter_record_batch(&data_raw, &BooleanArray::from(selection))?; - Ok(data.project(&[0])?) - } - Err(e) => Err(e), - }) - .collect::, _>>()?; - - Ok(concat_batches(res[0].schema_ref(), &res)?) -} - -/// Helper trait to extract individual values from a `StructData`. -pub trait StructDataExt { - fn get(&self, key: &str) -> Option<&Scalar>; -} - -impl StructDataExt for StructData { - fn get(&self, key: &str) -> Option<&Scalar> { - self.fields() - .iter() - .zip(self.values().iter()) - .find(|(k, _)| k.name() == key) - .map(|(_, v)| v) - } -} - -#[derive(Clone)] -pub struct LogicalFileView { - files: RecordBatch, - index: usize, -} - -impl LogicalFileView { - /// Path of the file. - pub fn path(&self) -> &str { - self.files.column(0).as_string::().value(self.index) - } - - /// Size of the file in bytes. - pub fn size(&self) -> i64 { - self.files - .column(1) - .as_primitive::() - .value(self.index) - } - - /// Modification time of the file in milliseconds since epoch. - pub fn modification_time(&self) -> i64 { - self.files - .column(2) - .as_primitive::() - .value(self.index) - } - - /// Datetime of the last modification time of the file. - pub fn modification_datetime(&self) -> DeltaResult> { - DateTime::from_timestamp_millis(self.modification_time()).ok_or(DeltaTableError::from( - crate::protocol::ProtocolError::InvalidField(format!( - "invalid modification_time: {:?}", - self.modification_time() - )), - )) - } - - pub fn stats(&self) -> Option<&str> { - let col = self.files.column(3).as_string::(); - col.is_valid(self.index).then(|| col.value(self.index)) - } - - pub fn partition_values(&self) -> Option { - self.files - .column_by_name("fileConstantValues") - .and_then(|col| col.as_struct_opt()) - .and_then(|s| s.column_by_name("partitionValues")) - .and_then(|arr| { - arr.is_valid(self.index) - .then(|| match Scalar::from_array(arr, self.index) { - Some(Scalar::Struct(s)) => Some(s), - _ => None, - }) - .flatten() - }) - } -} - -impl Iterator for LogicalFileView { - type Item = LogicalFileView; - - fn next(&mut self) -> Option { - if self.index < self.files.num_rows() { - let file = LogicalFileView { - files: self.files.clone(), - index: self.index, - }; - self.index += 1; - Some(file) - } else { - None - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - use deltalake_test::acceptance::{read_dat_case, TestCaseInfo}; - use deltalake_test::TestResult; - use std::path::PathBuf; - - fn get_dat_dir() -> PathBuf { - let d = PathBuf::from(env!("CARGO_MANIFEST_DIR")); - let mut rep_root = d - .parent() - .and_then(|p| p.parent()) - .expect("valid directory") - .to_path_buf(); - rep_root.push("dat/out/reader_tests/generated"); - rep_root - } - - async fn load_snapshot() -> TestResult<()> { - // some comment - let mut dat_dir = get_dat_dir(); - dat_dir.push("multi_partitioned"); - - let dat_info: TestCaseInfo = read_dat_case(dat_dir)?; - let table_info = dat_info.table_summary()?; - - let table = Table::try_from_uri(dat_info.table_root()?)?; - - let snapshot = Snapshot::try_new( - table, - Arc::new(object_store::local::LocalFileSystem::default()), - None, - ) - .await?; - - assert_eq!(snapshot.version(), table_info.version); - assert_eq!( - ( - snapshot.protocol().min_reader_version(), - snapshot.protocol().min_writer_version() - ), - (table_info.min_reader_version, table_info.min_writer_version) - ); - - Ok(()) - } - - #[tokio::test(flavor = "multi_thread")] - async fn load_snapshot_multi() -> TestResult<()> { - load_snapshot().await - } - - #[tokio::test(flavor = "current_thread")] - async fn load_snapshot_current() -> TestResult<()> { - load_snapshot().await - } - - #[tokio::test] - async fn load_eager_snapshot() -> TestResult<()> { - let mut dat_dir = get_dat_dir(); - dat_dir.push("multi_partitioned"); - - let dat_info: TestCaseInfo = read_dat_case(dat_dir)?; - let table_info = dat_info.table_summary()?; - - let table = Table::try_from_uri(dat_info.table_root()?)?; - - let mut snapshot = EagerSnapshot::try_new_with_actions( - table.location(), - Arc::new(object_store::local::LocalFileSystem::default()), - Default::default(), - Some(1), - Default::default(), - None, - ) - .await?; - - // assert_eq!(snapshot.version(), table_info.version); - // assert_eq!( - // snapshot.protocol().min_reader_version(), - // table_info.min_reader_version - // ); - - snapshot.update()?; - - Ok(()) - } -} diff --git a/crates/core/src/storage/cache.rs b/crates/core/src/kernel/snapshot_next/cache.rs similarity index 95% rename from crates/core/src/storage/cache.rs rename to crates/core/src/kernel/snapshot_next/cache.rs index eb6b5bd785..594d599942 100644 --- a/crates/core/src/storage/cache.rs +++ b/crates/core/src/kernel/snapshot_next/cache.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use bytes::Bytes; use chrono::{DateTime, Utc}; use futures::stream::BoxStream; -use futures::{StreamExt, TryStreamExt}; +use futures::StreamExt; use object_store::path::Path; use object_store::{ Attributes, GetOptions, GetResult, GetResultPayload, ListResult, MultipartUpload, ObjectMeta, @@ -46,11 +46,10 @@ impl Entry { /// the object store are immutable and no attempt is made to invalidate the cache /// when files are updated in the remote object store. #[derive(Clone)] -pub(crate) struct CommitCacheObjectStore { +pub(super) struct CommitCacheObjectStore { inner: Arc, check: Arc bool + Send + Sync>, cache: Arc>, - has_ordered_listing: bool, } impl std::fmt::Debug for CommitCacheObjectStore { @@ -75,13 +74,10 @@ fn cache_json(path: &Path) -> bool { impl CommitCacheObjectStore { /// Create a new conditionally cached object store. pub fn new(inner: Arc) -> Self { - let store_str = format!("{}", inner); - let is_local = store_str.starts_with("LocalFileSystem"); Self { inner, check: Arc::new(cache_json), cache: Arc::new(Cache::new(100)), - has_ordered_listing: !is_local, } } diff --git a/crates/core/src/kernel/snapshot_next/eager.rs b/crates/core/src/kernel/snapshot_next/eager.rs new file mode 100644 index 0000000000..88306b8e49 --- /dev/null +++ b/crates/core/src/kernel/snapshot_next/eager.rs @@ -0,0 +1,263 @@ +use std::sync::Arc; + +use arrow::compute::{concat_batches, filter_record_batch}; +use arrow_array::{BooleanArray, RecordBatch}; +use chrono::format::Item; +use delta_kernel::actions::set_transaction::SetTransactionMap; +use delta_kernel::actions::{get_log_add_schema, get_log_schema, ADD_NAME, REMOVE_NAME}; +use delta_kernel::actions::{Add, Metadata, Protocol, SetTransaction}; +use delta_kernel::engine::arrow_data::ArrowEngineData; +use delta_kernel::log_segment::LogSegment; +use delta_kernel::scan::log_replay::scan_action_iter; +use delta_kernel::schema::Schema; +use delta_kernel::table_properties::TableProperties; +use delta_kernel::{EngineData, Expression, Table, Version}; +use itertools::Itertools; +use object_store::ObjectStore; +use url::Url; + +use super::iterators::{AddIterator, AddView, AddViewItem}; +use super::lazy::LazySnapshot; +use super::{Snapshot, SnapshotError}; +use crate::{DeltaResult, DeltaTableConfig, DeltaTableError}; + +/// An eager snapshot of a Delta Table at a specific version. +/// +/// This snapshot loads some log data eagerly and keeps it in memory. +#[derive(Clone)] +pub struct EagerSnapshot { + snapshot: LazySnapshot, + files: Option, + predicate: Option>, +} + +impl Snapshot for EagerSnapshot { + fn table_root(&self) -> &Url { + self.snapshot.table_root() + } + + fn version(&self) -> Version { + self.snapshot.version() + } + + fn schema(&self) -> &Schema { + self.snapshot.schema() + } + + fn protocol(&self) -> &Protocol { + self.snapshot.protocol() + } + + fn metadata(&self) -> &Metadata { + self.snapshot.metadata() + } + + fn table_properties(&self) -> &TableProperties { + self.snapshot.table_properties() + } + + fn files(&self) -> DeltaResult>> { + Ok(std::iter::once(Ok(self + .files + .clone() + .ok_or(SnapshotError::FilesNotInitialized)?))) + } + + fn tombstones(&self) -> DeltaResult>> { + self.snapshot.tombstones() + } + + fn application_transactions(&self) -> DeltaResult { + self.snapshot.application_transactions() + } + + fn application_transaction( + &self, + app_id: impl AsRef, + ) -> DeltaResult> { + self.snapshot.application_transaction(app_id) + } +} + +impl EagerSnapshot { + /// Create a new [`EagerSnapshot`] instance + pub async fn try_new( + table_root: impl AsRef, + store: Arc, + config: DeltaTableConfig, + version: impl Into>, + predicate: impl Into>>, + ) -> DeltaResult { + let snapshot = + LazySnapshot::try_new(Table::try_from_uri(table_root)?, store, version).await?; + let files = config + .require_files + .then(|| -> DeltaResult<_> { Ok(replay_file_actions(&snapshot)?) }) + .transpose()?; + Ok(Self { + snapshot, + files, + predicate: predicate.into(), + }) + } + + pub fn file_data(&self) -> DeltaResult<&RecordBatch> { + Ok(self + .files + .as_ref() + .ok_or(SnapshotError::FilesNotInitialized)?) + } + + pub fn files(&self) -> DeltaResult> { + AddView::try_new(self.file_data()?.clone()) + } + + pub fn file_actions(&self) -> DeltaResult> + '_> { + AddIterator::try_new(self.file_data()?) + } + + /// Get the number of files in the current snapshot + pub fn files_count(&self) -> DeltaResult { + Ok(self + .files + .as_ref() + .map(|f| f.num_rows()) + .ok_or_else(|| SnapshotError::FilesNotInitialized)?) + } + + pub(crate) fn update(&mut self) -> DeltaResult<()> { + let log_root = self.snapshot.table_root().join("_delta_log/").unwrap(); + let fs_client = self.snapshot.engine_ref().get_file_system_client(); + let commit_read_schema = get_log_schema().project(&[ADD_NAME, REMOVE_NAME])?; + let checkpoint_read_schema = get_log_add_schema().clone(); + + let segment = + LogSegment::for_table_changes(fs_client.as_ref(), log_root, self.version() + 1, None)?; + let mut slice_iter = segment + .replay( + self.snapshot.engine_ref().as_ref(), + commit_read_schema, + checkpoint_read_schema, + None, + )? + .map_ok( + |(data, flag)| -> Result<(RecordBatch, bool), delta_kernel::Error> { + Ok((ArrowEngineData::try_from_engine_data(data)?.into(), flag)) + }, + ) + .flatten() + .collect::, _>>()?; + + slice_iter.push(( + self.files + .as_ref() + .ok_or(SnapshotError::FilesNotInitialized)? + .clone(), + false, + )); + + self.files = Some(scan_as_log_data(&self.snapshot, slice_iter)?); + + Ok(()) + } +} + +fn replay_file_actions(snapshot: &LazySnapshot) -> DeltaResult { + let commit_read_schema = get_log_schema().project(&[ADD_NAME, REMOVE_NAME])?; + let checkpoint_read_schema = get_log_add_schema().clone(); + + let curr_data = snapshot + .inner + ._log_segment() + .replay( + snapshot.engine_ref().as_ref(), + commit_read_schema.clone(), + checkpoint_read_schema.clone(), + None, + )? + .map_ok( + |(data, flag)| -> Result<(RecordBatch, bool), delta_kernel::Error> { + Ok((ArrowEngineData::try_from_engine_data(data)?.into(), flag)) + }, + ) + .flatten() + .collect::, _>>()?; + + scan_as_log_data(snapshot, curr_data) +} + +fn scan_as_log_data( + snapshot: &LazySnapshot, + curr_data: Vec<(RecordBatch, bool)>, +) -> Result { + let scan_iter = curr_data.clone().into_iter().map(|(data, flag)| { + Ok(( + Box::new(ArrowEngineData::new(data.clone())) as Box, + flag, + )) + }); + + let res = scan_action_iter(snapshot.engine_ref().as_ref(), scan_iter, None) + .map(|res| { + res.and_then(|(d, selection)| { + Ok(( + RecordBatch::from(ArrowEngineData::try_from_engine_data(d)?), + selection, + )) + }) + }) + .zip(curr_data.into_iter()) + .map(|(scan_res, (data_raw, _))| match scan_res { + Ok((_, selection)) => { + let data = filter_record_batch(&data_raw, &BooleanArray::from(selection))?; + Ok(data.project(&[0])?) + } + Err(e) => Err(e), + }) + .collect::, _>>()?; + + Ok(concat_batches(res[0].schema_ref(), &res)?) +} + +#[cfg(test)] +mod tests { + use deltalake_test::acceptance::{read_dat_case, TestCaseInfo}; + use deltalake_test::TestResult; + + use super::super::tests::get_dat_dir; + use super::*; + + #[tokio::test] + async fn load_eager_snapshot() -> TestResult<()> { + let mut dat_dir = get_dat_dir(); + dat_dir.push("multi_partitioned"); + + let dat_info: TestCaseInfo = read_dat_case(dat_dir)?; + let table_info = dat_info.table_summary()?; + + let table = Table::try_from_uri(dat_info.table_root()?)?; + + let mut snapshot = EagerSnapshot::try_new( + table.location(), + Arc::new(object_store::local::LocalFileSystem::default()), + Default::default(), + Some(1), + None, + ) + .await?; + + // assert_eq!(snapshot.version(), table_info.version); + // assert_eq!( + // snapshot.protocol().min_reader_version(), + // table_info.min_reader_version + // ); + + snapshot.update()?; + + for file in snapshot.file_actions()? { + println!("file: {:#?}", file.unwrap()); + } + + Ok(()) + } +} diff --git a/crates/core/src/kernel/snapshot_next/iterators.rs b/crates/core/src/kernel/snapshot_next/iterators.rs new file mode 100644 index 0000000000..4700cb9da3 --- /dev/null +++ b/crates/core/src/kernel/snapshot_next/iterators.rs @@ -0,0 +1,310 @@ +use std::collections::HashSet; +use std::sync::Arc; + +use arrow_array::cast::AsArray; +use arrow_array::types::Int64Type; +use arrow_array::{ + Array, ArrayRef, BooleanArray, Int64Array, RecordBatch, StringArray, StructArray, +}; +use chrono::{DateTime, Utc}; +use delta_kernel::actions::visitors::AddVisitor; +use delta_kernel::actions::Add; +use delta_kernel::actions::ADD_NAME; +use delta_kernel::engine::arrow_data::ArrowEngineData; +use delta_kernel::engine::arrow_expression::ProvidesColumnByName; +use delta_kernel::engine_data::{GetData, RowVisitor}; +use delta_kernel::expressions::{Scalar, StructData}; + +use crate::kernel::scalars::ScalarExt; +use crate::{DeltaResult, DeltaTableError}; + +pub struct AddIterator<'a> { + paths: &'a StringArray, + getters: Arc>>, + index: usize, +} + +impl AddIterator<'_> { + pub fn try_new<'a>(actions: &'a RecordBatch) -> DeltaResult> { + validate_column::(actions, &[ADD_NAME, "path"])?; + validate_column::(actions, &[ADD_NAME, "size"])?; + validate_column::(actions, &[ADD_NAME, "modificationTime"])?; + validate_column::(actions, &[ADD_NAME, "dataChange"])?; + + let visitor = AddVisitor::new(); + let fields = visitor.selected_column_names_and_types(); + + let mut mask = HashSet::new(); + for column in fields.0 { + for i in 0..column.len() { + mask.insert(&column[..i + 1]); + } + } + + let mut getters = vec![]; + ArrowEngineData::extract_columns(&mut vec![], &mut getters, fields.1, &mask, actions)?; + + let paths = extract_column(actions, &[ADD_NAME, "path"])?.as_string::(); + + Ok(AddIterator { + paths, + getters: Arc::new(getters), + index: 0, + }) + } +} + +impl Iterator for AddIterator<'_> { + type Item = DeltaResult; + + fn next(&mut self) -> Option { + if self.index < self.paths.len() { + let path = self.paths.value(self.index).to_string(); + let add = AddVisitor::visit_add(self.index, path, self.getters.as_slice()) + .map_err(DeltaTableError::from); + self.index += 1; + Some(add) + } else { + None + } + } +} + +pub struct AddView { + actions: RecordBatch, + index: usize, +} + +impl AddView { + pub fn try_new(actions: RecordBatch) -> DeltaResult { + validate_column::(&actions, &[ADD_NAME, "path"])?; + validate_column::(&actions, &[ADD_NAME, "size"])?; + validate_column::(&actions, &[ADD_NAME, "modificationTime"])?; + validate_column::(&actions, &[ADD_NAME, "dataChange"])?; + Ok(Self { actions, index: 0 }) + } +} + +impl Iterator for AddView { + type Item = AddViewItem; + + fn next(&mut self) -> Option { + if self.index < self.actions.num_rows() { + let add = AddViewItem { + actions: self.actions.clone(), + index: self.index, + }; + self.index += 1; + Some(add) + } else { + None + } + } +} + +pub struct AddViewItem { + actions: RecordBatch, + index: usize, +} + +impl AddViewItem { + pub fn path(&self) -> &str { + extract_column(&self.actions, &[ADD_NAME, "path"]) + .unwrap() + .as_string::() + .value(self.index) + } + + pub fn size(&self) -> i64 { + extract_column(&self.actions, &[ADD_NAME, "size"]) + .unwrap() + .as_primitive::() + .value(self.index) + } + + pub fn modification_time(&self) -> i64 { + extract_column(&self.actions, &[ADD_NAME, "modificationTime"]) + .unwrap() + .as_primitive::() + .value(self.index) + } + + /// Datetime of the last modification time of the file. + pub fn modification_datetime(&self) -> DeltaResult> { + DateTime::from_timestamp_millis(self.modification_time()).ok_or(DeltaTableError::from( + crate::protocol::ProtocolError::InvalidField(format!( + "invalid modification_time: {:?}", + self.modification_time() + )), + )) + } + + pub fn data_change(&self) -> bool { + extract_column(&self.actions, &[ADD_NAME, "dataChange"]) + .unwrap() + .as_boolean() + .value(self.index) + } + + pub fn stats(&self) -> Option<&str> { + extract_column(&self.actions, &[ADD_NAME, "stats"]) + .ok() + .and_then(|c| c.as_string_opt::().map(|v| v.value(self.index))) + } + + pub fn base_row_id(&self) -> Option { + extract_column(&self.actions, &[ADD_NAME, "baseRowId"]) + .ok() + .and_then(|c| { + c.as_primitive_opt::() + .map(|v| v.value(self.index)) + }) + } + + pub fn default_row_commit_version(&self) -> Option { + extract_column(&self.actions, &[ADD_NAME, "defaultRowCommitVersion"]) + .ok() + .and_then(|c| { + c.as_primitive_opt::() + .map(|v| v.value(self.index)) + }) + } + + pub fn clustering_provider(&self) -> Option<&str> { + extract_column(&self.actions, &[ADD_NAME, "clusteringProvider"]) + .ok() + .and_then(|c| c.as_string_opt::().map(|v| v.value(self.index))) + } +} + +#[derive(Clone)] +pub struct LogicalFileView { + files: RecordBatch, + index: usize, +} + +impl LogicalFileView { + /// Path of the file. + pub fn path(&self) -> &str { + self.files.column(0).as_string::().value(self.index) + } + + /// Size of the file in bytes. + pub fn size(&self) -> i64 { + self.files + .column(1) + .as_primitive::() + .value(self.index) + } + + /// Modification time of the file in milliseconds since epoch. + pub fn modification_time(&self) -> i64 { + self.files + .column(2) + .as_primitive::() + .value(self.index) + } + + /// Datetime of the last modification time of the file. + pub fn modification_datetime(&self) -> DeltaResult> { + DateTime::from_timestamp_millis(self.modification_time()).ok_or(DeltaTableError::from( + crate::protocol::ProtocolError::InvalidField(format!( + "invalid modification_time: {:?}", + self.modification_time() + )), + )) + } + + pub fn stats(&self) -> Option<&str> { + let col = self.files.column(3).as_string::(); + col.is_valid(self.index).then(|| col.value(self.index)) + } + + pub fn partition_values(&self) -> Option { + self.files + .column_by_name("fileConstantValues") + .and_then(|col| col.as_struct_opt()) + .and_then(|s| s.column_by_name("partitionValues")) + .and_then(|arr| { + arr.is_valid(self.index) + .then(|| match Scalar::from_array(arr, self.index) { + Some(Scalar::Struct(s)) => Some(s), + _ => None, + }) + .flatten() + }) + } +} + +impl Iterator for LogicalFileView { + type Item = LogicalFileView; + + fn next(&mut self) -> Option { + if self.index < self.files.num_rows() { + let file = LogicalFileView { + files: self.files.clone(), + index: self.index, + }; + self.index += 1; + Some(file) + } else { + None + } + } +} + +fn validate_column<'a, T: Array + 'static>( + actions: &'a RecordBatch, + col: &'a [impl AsRef], +) -> DeltaResult<()> { + if let Ok(arr) = extract_column(actions, col) { + if arr.as_any().downcast_ref::().is_none() { + return Err(DeltaTableError::from( + crate::protocol::ProtocolError::InvalidField(format!("Invalid column: {:?}", arr)), + )); + } + if arr.null_count() > 0 { + return Err(DeltaTableError::from( + crate::protocol::ProtocolError::InvalidField(format!( + "Column has null values: {:?}", + arr + )), + )); + } + } else { + return Err(DeltaTableError::from( + crate::protocol::ProtocolError::InvalidField(format!("Column not found",)), + )); + } + Ok(()) +} + +fn extract_column<'a>( + mut parent: &'a dyn ProvidesColumnByName, + col: &[impl AsRef], +) -> DeltaResult<&'a ArrayRef> { + let mut field_names = col.iter(); + let Some(mut field_name) = field_names.next() else { + return Err(arrow_schema::ArrowError::SchemaError( + "Empty column path".to_string(), + ))?; + }; + loop { + let child = parent.column_by_name(field_name.as_ref()).ok_or_else(|| { + arrow_schema::ArrowError::SchemaError(format!("No such field: {}", field_name.as_ref())) + })?; + field_name = match field_names.next() { + Some(name) => name, + None => return Ok(child), + }; + parent = child + .as_any() + .downcast_ref::() + .ok_or_else(|| { + arrow_schema::ArrowError::SchemaError(format!( + "Not a struct: {}", + field_name.as_ref() + )) + })?; + } +} diff --git a/crates/core/src/kernel/snapshot_next/lazy.rs b/crates/core/src/kernel/snapshot_next/lazy.rs new file mode 100644 index 0000000000..386d0f63d6 --- /dev/null +++ b/crates/core/src/kernel/snapshot_next/lazy.rs @@ -0,0 +1,229 @@ +//! Snapshot of a Delta Table at a specific version. +//! +use std::sync::{Arc, LazyLock}; + +use arrow::compute::filter_record_batch; +use arrow_array::{BooleanArray, RecordBatch}; +use delta_kernel::actions::set_transaction::{SetTransactionMap, SetTransactionScanner}; +use delta_kernel::actions::{get_log_schema, REMOVE_NAME}; +use delta_kernel::actions::{Metadata, Protocol, SetTransaction}; +use delta_kernel::engine::arrow_data::ArrowEngineData; +use delta_kernel::engine::default::executor::tokio::{ + TokioBackgroundExecutor, TokioMultiThreadExecutor, +}; +use delta_kernel::engine::default::DefaultEngine; +use delta_kernel::schema::Schema; +use delta_kernel::snapshot::Snapshot as SnapshotInner; +use delta_kernel::table_properties::TableProperties; +use delta_kernel::{Engine, Expression, ExpressionRef, Table, Version}; +use itertools::Itertools; +use object_store::path::Path; +use object_store::ObjectStore; +use url::Url; + +use super::cache::CommitCacheObjectStore; +use super::Snapshot; +use crate::{DeltaResult, DeltaTableError}; + +// TODO: avoid repetitive parsing of json stats + +#[derive(Clone)] +pub struct LazySnapshot { + pub(super) inner: Arc, + engine: Arc, +} + +impl Snapshot for LazySnapshot { + fn table_root(&self) -> &Url { + &self.inner.table_root() + } + + fn version(&self) -> Version { + self.inner.version() + } + + fn schema(&self) -> &Schema { + self.inner.schema() + } + + fn protocol(&self) -> &Protocol { + self.inner.protocol() + } + + fn metadata(&self) -> &Metadata { + self.inner.metadata() + } + + fn table_properties(&self) -> &TableProperties { + &self.inner.table_properties() + } + + fn files(&self) -> DeltaResult>> { + Ok(self + .files_impl(None)? + .map(|batch| batch.map_err(|e| e.into()))) + } + + fn tombstones(&self) -> DeltaResult>> { + static META_PREDICATE: LazyLock> = LazyLock::new(|| { + Some(Arc::new( + Expression::column([REMOVE_NAME, "path"]).is_not_null(), + )) + }); + let read_schema = get_log_schema().project(&[REMOVE_NAME])?; + Ok(self + .inner + ._log_segment() + .replay( + self.engine.as_ref(), + read_schema.clone(), + read_schema, + META_PREDICATE.clone(), + )? + .map_ok(|(d, _)| Ok(RecordBatch::from(ArrowEngineData::try_from_engine_data(d)?))) + .flatten()) + } + + fn application_transactions(&self) -> DeltaResult { + let scanner = SetTransactionScanner::new(self.inner.clone()); + Ok(scanner.application_transactions(self.engine.as_ref())?) + } + + fn application_transaction( + &self, + app_id: impl AsRef, + ) -> DeltaResult> { + let scanner = SetTransactionScanner::new(self.inner.clone()); + Ok(scanner.application_transaction(self.engine.as_ref(), app_id.as_ref())?) + } +} + +impl LazySnapshot { + /// Create a new [`Snapshot`] instance. + pub fn new(inner: Arc, engine: Arc) -> Self { + Self { inner, engine } + } + + /// Create a new [`Snapshot`] instance for a table. + pub async fn try_new( + table: Table, + store: Arc, + version: impl Into>, + ) -> DeltaResult { + // TODO: how to deal with the dedicated IO runtime? Would this already be covered by the + // object store implementation pass to this? + let table_root = Path::from_url_path(table.location().path())?; + let store_str = format!("{}", store); + let is_local = store_str.starts_with("LocalFileSystem"); + let store = Arc::new(CommitCacheObjectStore::new(store)); + let handle = tokio::runtime::Handle::current(); + let engine: Arc = match handle.runtime_flavor() { + tokio::runtime::RuntimeFlavor::MultiThread => Arc::new(DefaultEngine::new_with_opts( + store, + table_root, + Arc::new(TokioMultiThreadExecutor::new(handle)), + !is_local, + )), + tokio::runtime::RuntimeFlavor::CurrentThread => Arc::new(DefaultEngine::new_with_opts( + store, + table_root, + Arc::new(TokioBackgroundExecutor::new()), + !is_local, + )), + _ => return Err(DeltaTableError::generic("unsupported runtime flavor")), + }; + + let snapshot = table.snapshot(engine.as_ref(), version.into())?; + Ok(Self::new(Arc::new(snapshot), engine)) + } + + /// A shared reference to the engine used for interacting with the Delta Table. + pub(super) fn engine_ref(&self) -> &Arc { + &self.engine + } + + /// Get the timestamp of the given version in miliscends since epoch. + /// + /// Extracts the timestamp from the commit file of the given version + /// from the current log segment. If the commit file is not part of the + /// current log segment, `None` is returned. + pub fn version_timestamp(&self, version: Version) -> Option { + self.inner + ._log_segment() + .ascending_commit_files + .iter() + .find(|f| f.version == version) + .map(|f| f.location.last_modified) + } + + /// read all active files from the log + fn files_impl( + &self, + predicate: impl Into>>, + ) -> DeltaResult>> { + let scan = self + .inner + .clone() + .scan_builder() + .with_predicate(predicate) + .build()?; + Ok(scan.scan_data(self.engine.as_ref())?.map(|res| { + res.and_then(|(data, mut predicate)| { + let batch: RecordBatch = ArrowEngineData::try_from_engine_data(data)?.into(); + if predicate.len() < batch.num_rows() { + predicate + .extend(std::iter::repeat(true).take(batch.num_rows() - predicate.len())); + } + Ok(filter_record_batch(&batch, &BooleanArray::from(predicate))?) + }) + })) + } +} + +#[cfg(test)] +mod tests { + use deltalake_test::acceptance::{read_dat_case, TestCaseInfo}; + use deltalake_test::TestResult; + + use super::super::tests::get_dat_dir; + use super::*; + + async fn load_snapshot() -> TestResult<()> { + // some comment + let mut dat_dir = get_dat_dir(); + dat_dir.push("multi_partitioned"); + + let dat_info: TestCaseInfo = read_dat_case(dat_dir)?; + let table_info = dat_info.table_summary()?; + + let table = Table::try_from_uri(dat_info.table_root()?)?; + + let snapshot = LazySnapshot::try_new( + table, + Arc::new(object_store::local::LocalFileSystem::default()), + None, + ) + .await?; + + assert_eq!(snapshot.version(), table_info.version); + assert_eq!( + ( + snapshot.protocol().min_reader_version(), + snapshot.protocol().min_writer_version() + ), + (table_info.min_reader_version, table_info.min_writer_version) + ); + + Ok(()) + } + + #[tokio::test(flavor = "multi_thread")] + async fn load_snapshot_multi() -> TestResult<()> { + load_snapshot().await + } + + #[tokio::test(flavor = "current_thread")] + async fn load_snapshot_current() -> TestResult<()> { + load_snapshot().await + } +} diff --git a/crates/core/src/kernel/snapshot_next/mod.rs b/crates/core/src/kernel/snapshot_next/mod.rs new file mode 100644 index 0000000000..879ef2824a --- /dev/null +++ b/crates/core/src/kernel/snapshot_next/mod.rs @@ -0,0 +1,161 @@ +//! Snapshot of a Delta Table at a specific version. +//! +use std::sync::Arc; + +use arrow_array::RecordBatch; +use delta_kernel::actions::visitors::SetTransactionMap; +use delta_kernel::actions::{Add, Metadata, Protocol, SetTransaction}; +use delta_kernel::expressions::{Scalar, StructData}; +use delta_kernel::schema::Schema; +use delta_kernel::table_properties::TableProperties; +use delta_kernel::Version; +use iterators::{AddIterator, AddView, AddViewItem}; +use url::Url; + +use crate::{DeltaResult, DeltaTableError}; + +pub use eager::EagerSnapshot; +pub use lazy::LazySnapshot; + +mod cache; +mod eager; +mod iterators; +mod lazy; + +// TODO: avoid repetitive parsing of json stats + +#[derive(thiserror::Error, Debug)] +enum SnapshotError { + #[error("Tried accessing file data at snapshot initialized with no files.")] + FilesNotInitialized, +} + +impl From for DeltaTableError { + fn from(e: SnapshotError) -> Self { + match &e { + SnapshotError::FilesNotInitialized => DeltaTableError::generic(e), + } + } +} + +/// Helper trait to extract individual values from a `StructData`. +pub trait StructDataExt { + fn get(&self, key: &str) -> Option<&Scalar>; +} + +impl StructDataExt for StructData { + fn get(&self, key: &str) -> Option<&Scalar> { + self.fields() + .iter() + .zip(self.values().iter()) + .find(|(k, _)| k.name() == key) + .map(|(_, v)| v) + } +} + +pub trait Snapshot { + /// Location where the Delta Table (metadata) is stored. + fn table_root(&self) -> &Url; + + /// Version of this `Snapshot` in the table. + fn version(&self) -> Version; + + /// Table [`Schema`] at this `Snapshot`s version. + fn schema(&self) -> &Schema; + + /// Table [`Metadata`] at this `Snapshot`s version. + fn metadata(&self) -> &Metadata; + + /// Table [`Protocol`] at this `Snapshot`s version. + fn protocol(&self) -> &Protocol; + + /// Get the [`TableProperties`] for this [`Snapshot`]. + fn table_properties(&self) -> &TableProperties; + + fn files(&self) -> DeltaResult>>; + + fn files_view( + &self, + ) -> DeltaResult>>> { + Ok(self.files()?.map(|r| r.and_then(|b| AddView::try_new(b)))) + } + + fn tombstones(&self) -> DeltaResult>>; + + /// Scan the Delta Log to obtain the latest transaction for all applications + /// + /// This method requires a full scan of the log to find all transactions. + /// When a specific application id is requested, it is much more efficient to use + /// [`application_transaction`](Self::application_transaction) instead. + fn application_transactions(&self) -> DeltaResult; + + /// Scan the Delta Log for the latest transaction entry for a specific application. + /// + /// Initiates a log scan, but terminates as soon as the transaction + /// for the given application is found. + fn application_transaction( + &self, + app_id: impl AsRef, + ) -> DeltaResult>; +} + +impl Snapshot for Arc { + fn table_root(&self) -> &Url { + self.as_ref().table_root() + } + + fn version(&self) -> Version { + self.as_ref().version() + } + + fn schema(&self) -> &Schema { + self.as_ref().schema() + } + + fn metadata(&self) -> &Metadata { + self.as_ref().metadata() + } + + fn protocol(&self) -> &Protocol { + self.as_ref().protocol() + } + + fn table_properties(&self) -> &TableProperties { + self.as_ref().table_properties() + } + + fn files(&self) -> DeltaResult>> { + self.as_ref().files() + } + + fn tombstones(&self) -> DeltaResult>> { + self.as_ref().tombstones() + } + + fn application_transactions(&self) -> DeltaResult { + self.as_ref().application_transactions() + } + + fn application_transaction( + &self, + app_id: impl AsRef, + ) -> DeltaResult> { + self.as_ref().application_transaction(app_id) + } +} + +#[cfg(test)] +mod tests { + use std::path::PathBuf; + + pub(super) fn get_dat_dir() -> PathBuf { + let d = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + let mut rep_root = d + .parent() + .and_then(|p| p.parent()) + .expect("valid directory") + .to_path_buf(); + rep_root.push("dat/out/reader_tests/generated"); + rep_root + } +} diff --git a/crates/core/src/storage/mod.rs b/crates/core/src/storage/mod.rs index 31f4f60c77..8361bf138e 100644 --- a/crates/core/src/storage/mod.rs +++ b/crates/core/src/storage/mod.rs @@ -30,7 +30,6 @@ pub use retry_ext::ObjectStoreRetryExt; use std::ops::Range; pub use utils::*; -pub(crate) mod cache; pub mod file; pub mod retry_ext; pub mod utils; From b0f794f85379064be8412a2ea2f0f67107ac5215 Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Fri, 17 Jan 2025 00:13:49 +0100 Subject: [PATCH 07/14] test: run some more dat tests Signed-off-by: Robert Pack --- .../{load_dat => load-dat}/action.yaml | 6 +- .github/workflows/build.yml | 17 ++-- Cargo.toml | 14 +-- crates/core/Cargo.toml | 6 +- crates/core/tests/dat.rs | 99 +++++++++++++++++++ 5 files changed, 125 insertions(+), 17 deletions(-) rename .github/actions/{load_dat => load-dat}/action.yaml (81%) create mode 100644 crates/core/tests/dat.rs diff --git a/.github/actions/load_dat/action.yaml b/.github/actions/load-dat/action.yaml similarity index 81% rename from .github/actions/load_dat/action.yaml rename to .github/actions/load-dat/action.yaml index 071db58ba0..6d40707b3c 100644 --- a/.github/actions/load_dat/action.yaml +++ b/.github/actions/load-dat/action.yaml @@ -19,8 +19,8 @@ runs: - name: load DAT shell: bash run: | - rm -rf {{ inputs.target-directory }} + rm -rf ${{ inputs.target-directory }} curl -OL https://github.com/delta-incubator/dat/releases/download/v${{ inputs.version }}/deltalake-dat-v${{ inputs.version }}.tar.gz - mkdir -p {{ inputs.target-directory }} - tar --no-same-permissions -xzf deltalake-dat-v${{ inputs.version }}.tar.gz --directory {{ inputs.target-directory }} + mkdir -p ${{ inputs.target-directory }} + tar --no-same-permissions -xzf deltalake-dat-v${{ inputs.version }}.tar.gz --directory ${{ inputs.target-directory }} rm deltalake-dat-v${{ inputs.version }}.tar.gz diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 578ae305ea..823d10ff0c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -20,7 +20,7 @@ jobs: uses: actions-rs/toolchain@v1 with: profile: default - toolchain: '1.81' + toolchain: "1.81" override: true - name: Format @@ -42,7 +42,7 @@ jobs: uses: actions-rs/toolchain@v1 with: profile: default - toolchain: '1.81' + toolchain: "1.81" override: true - name: build and lint with clippy @@ -79,9 +79,12 @@ jobs: uses: actions-rs/toolchain@v1 with: profile: default - toolchain: '1.81' + toolchain: "1.81" override: true + - name: Load DAT data + uses: ./.github/actions/load-dat + - name: Run tests run: cargo test --verbose --features ${{ env.DEFAULT_FEATURES }} @@ -114,7 +117,7 @@ jobs: uses: actions-rs/toolchain@v1 with: profile: default - toolchain: '1.81' + toolchain: "1.81" override: true # Install Java and Hadoop for HDFS integration tests @@ -129,6 +132,9 @@ jobs: tar -xf hadoop-3.4.0.tar.gz -C $GITHUB_WORKSPACE echo "$GITHUB_WORKSPACE/hadoop-3.4.0/bin" >> $GITHUB_PATH + - name: Load DAT data + uses: ./.github/actions/load-dat + - name: Start emulated services run: docker compose up -d @@ -160,7 +166,7 @@ jobs: uses: actions-rs/toolchain@v1 with: profile: default - toolchain: '1.81' + toolchain: "1.81" override: true - name: Download Lakectl @@ -175,4 +181,3 @@ jobs: - name: Run tests with rustls (default) run: | cargo test --features integration_test_lakefs,lakefs,datafusion - diff --git a/Cargo.toml b/Cargo.toml index c1bc6ea502..c3e53c69af 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,15 +26,15 @@ debug = true debug = "line-tables-only" [workspace.dependencies] -#delta_kernel = { version = "=0.6.0", features = ["default-engine"] } -delta_kernel = { path = "../delta-kernel-rs/kernel", features = [ - "default-engine", - "developer-visibility", -] } -# delta_kernel = { git = "https://github.com/roeap/delta-kernel-rs", rev = "fcc43b50dafdc5e6b84c206492bbde8ed1115529", features = [ +# delta_kernel = { version = "=0.6.0", features = ["default-engine"] } +# delta_kernel = { path = "../delta-kernel-rs/kernel", features = [ # "default-engine", # "developer-visibility", # ] } +delta_kernel = { git = "https://github.com/roeap/delta-kernel-rs", rev = "caeb70ab78e4d5f3b56b5105fd3587c1046d1e1b", features = [ + "default-engine", + "developer-visibility", +] } # arrow arrow = { version = "53" } @@ -48,7 +48,7 @@ arrow-ord = { version = "53" } arrow-row = { version = "53" } arrow-schema = { version = "53" } arrow-select = { version = "53" } -object_store = { version = "0.11.2" , features = ["cloud"]} +object_store = { version = "0.11.2", features = ["cloud"] } parquet = { version = "53" } # datafusion diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index 232cc5f00d..6571371451 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -46,7 +46,7 @@ datafusion-functions-aggregate = { workspace = true, optional = true } # serde serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } -strum = { workspace = true} +strum = { workspace = true } # "stdlib" bytes = { workspace = true } @@ -130,3 +130,7 @@ datafusion = [ datafusion-ext = ["datafusion"] json = ["parquet/json"] python = ["arrow/pyarrow"] + +[[test]] +name = "dat" +harness = false diff --git a/crates/core/tests/dat.rs b/crates/core/tests/dat.rs new file mode 100644 index 0000000000..82daf5c20e --- /dev/null +++ b/crates/core/tests/dat.rs @@ -0,0 +1,99 @@ +use std::path::Path; +use std::sync::Arc; + +use delta_kernel::Table; +use deltalake_core::kernel::snapshot_next::{LazySnapshot, Snapshot}; +use deltalake_test::acceptance::read_dat_case; + +static SKIPPED_TESTS: &[&str; 1] = &["iceberg_compat_v1"]; + +fn reader_test_lazy(path: &Path) -> datatest_stable::Result<()> { + let root_dir = format!( + "{}/{}", + env!["CARGO_MANIFEST_DIR"], + path.parent().unwrap().to_str().unwrap() + ); + for skipped in SKIPPED_TESTS { + if root_dir.ends_with(skipped) { + println!("Skipping test: {}", skipped); + return Ok(()); + } + } + + tokio::runtime::Builder::new_current_thread() + .enable_all() + .build()? + .block_on(async { + let case = read_dat_case(root_dir).unwrap(); + + let table = Table::try_from_uri(case.table_root().unwrap()).expect("table"); + let snapshot = LazySnapshot::try_new( + table, + Arc::new(object_store::local::LocalFileSystem::default()), + None, + ) + .await + .unwrap(); + + let table_info = case.table_summary().expect("load summary"); + assert_eq!(snapshot.version(), table_info.version); + assert_eq!( + ( + snapshot.protocol().min_reader_version(), + snapshot.protocol().min_writer_version() + ), + (table_info.min_reader_version, table_info.min_writer_version) + ); + }); + Ok(()) +} + +fn reader_test_eager(path: &Path) -> datatest_stable::Result<()> { + let root_dir = format!( + "{}/{}", + env!["CARGO_MANIFEST_DIR"], + path.parent().unwrap().to_str().unwrap() + ); + for skipped in SKIPPED_TESTS { + if root_dir.ends_with(skipped) { + println!("Skipping test: {}", skipped); + return Ok(()); + } + } + + tokio::runtime::Builder::new_current_thread() + .enable_all() + .build()? + .block_on(async { + let case = read_dat_case(root_dir).unwrap(); + + let table = Table::try_from_uri(case.table_root().unwrap()).expect("table"); + let snapshot = LazySnapshot::try_new( + table, + Arc::new(object_store::local::LocalFileSystem::default()), + None, + ) + .await + .unwrap(); + + let table_info = case.table_summary().expect("load summary"); + assert_eq!(snapshot.version(), table_info.version); + assert_eq!( + ( + snapshot.protocol().min_reader_version(), + snapshot.protocol().min_writer_version() + ), + (table_info.min_reader_version, table_info.min_writer_version) + ); + }); + Ok(()) +} + +datatest_stable::harness!( + reader_test_lazy, + "../../dat/out/reader_tests/generated/", + r"test_case_info\.json", + reader_test_eager, + "../../dat/out/reader_tests/generated/", + r"test_case_info\.json" +); From 7a559ac0f637854de46475a6ed183827ad762cb6 Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Sun, 19 Jan 2025 00:48:34 +0100 Subject: [PATCH 08/14] feat: add commit infos apis to new snapshots Signed-off-by: Robert Pack --- Cargo.toml | 2 +- crates/core/src/kernel/snapshot_next/eager.rs | 20 +++- .../src/kernel/snapshot_next/iterators.rs | 6 +- crates/core/src/kernel/snapshot_next/lazy.rs | 61 +++++++++++- crates/core/src/kernel/snapshot_next/mod.rs | 98 ++++++++++++++++++- 5 files changed, 173 insertions(+), 14 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c3e53c69af..3935efd8c3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,7 +31,7 @@ debug = "line-tables-only" # "default-engine", # "developer-visibility", # ] } -delta_kernel = { git = "https://github.com/roeap/delta-kernel-rs", rev = "caeb70ab78e4d5f3b56b5105fd3587c1046d1e1b", features = [ +delta_kernel = { git = "https://github.com/roeap/delta-kernel-rs", rev = "023abf1ee604b77bbaa5efec97e043fc4bdf220b", features = [ "default-engine", "developer-visibility", ] } diff --git a/crates/core/src/kernel/snapshot_next/eager.rs b/crates/core/src/kernel/snapshot_next/eager.rs index 88306b8e49..a1b0c0d4ca 100644 --- a/crates/core/src/kernel/snapshot_next/eager.rs +++ b/crates/core/src/kernel/snapshot_next/eager.rs @@ -2,7 +2,6 @@ use std::sync::Arc; use arrow::compute::{concat_batches, filter_record_batch}; use arrow_array::{BooleanArray, RecordBatch}; -use chrono::format::Item; use delta_kernel::actions::set_transaction::SetTransactionMap; use delta_kernel::actions::{get_log_add_schema, get_log_schema, ADD_NAME, REMOVE_NAME}; use delta_kernel::actions::{Add, Metadata, Protocol, SetTransaction}; @@ -11,7 +10,7 @@ use delta_kernel::log_segment::LogSegment; use delta_kernel::scan::log_replay::scan_action_iter; use delta_kernel::schema::Schema; use delta_kernel::table_properties::TableProperties; -use delta_kernel::{EngineData, Expression, Table, Version}; +use delta_kernel::{Engine, EngineData, Expression, Table, Version}; use itertools::Itertools; use object_store::ObjectStore; use url::Url; @@ -19,6 +18,7 @@ use url::Url; use super::iterators::{AddIterator, AddView, AddViewItem}; use super::lazy::LazySnapshot; use super::{Snapshot, SnapshotError}; +use crate::kernel::CommitInfo; use crate::{DeltaResult, DeltaTableConfig, DeltaTableError}; /// An eager snapshot of a Delta Table at a specific version. @@ -77,6 +77,14 @@ impl Snapshot for EagerSnapshot { ) -> DeltaResult> { self.snapshot.application_transaction(app_id) } + + fn commit_infos( + &self, + start_version: impl Into>, + limit: impl Into>, + ) -> DeltaResult> { + self.snapshot.commit_infos(start_version, limit) + } } impl EagerSnapshot { @@ -92,7 +100,7 @@ impl EagerSnapshot { LazySnapshot::try_new(Table::try_from_uri(table_root)?, store, version).await?; let files = config .require_files - .then(|| -> DeltaResult<_> { Ok(replay_file_actions(&snapshot)?) }) + .then(|| -> DeltaResult<_> { replay_file_actions(&snapshot) }) .transpose()?; Ok(Self { snapshot, @@ -101,6 +109,10 @@ impl EagerSnapshot { }) } + pub(crate) fn engine_ref(&self) -> &Arc { + self.snapshot.engine_ref() + } + pub fn file_data(&self) -> DeltaResult<&RecordBatch> { Ok(self .files @@ -122,7 +134,7 @@ impl EagerSnapshot { .files .as_ref() .map(|f| f.num_rows()) - .ok_or_else(|| SnapshotError::FilesNotInitialized)?) + .ok_or(SnapshotError::FilesNotInitialized)?) } pub(crate) fn update(&mut self) -> DeltaResult<()> { diff --git a/crates/core/src/kernel/snapshot_next/iterators.rs b/crates/core/src/kernel/snapshot_next/iterators.rs index 4700cb9da3..375fc0061e 100644 --- a/crates/core/src/kernel/snapshot_next/iterators.rs +++ b/crates/core/src/kernel/snapshot_next/iterators.rs @@ -25,7 +25,7 @@ pub struct AddIterator<'a> { } impl AddIterator<'_> { - pub fn try_new<'a>(actions: &'a RecordBatch) -> DeltaResult> { + pub fn try_new(actions: &RecordBatch) -> DeltaResult> { validate_column::(actions, &[ADD_NAME, "path"])?; validate_column::(actions, &[ADD_NAME, "size"])?; validate_column::(actions, &[ADD_NAME, "modificationTime"])?; @@ -108,7 +108,7 @@ pub struct AddViewItem { } impl AddViewItem { - pub fn path(&self) -> &str { + pub fn path(&self) -> &str { extract_column(&self.actions, &[ADD_NAME, "path"]) .unwrap() .as_string::() @@ -273,7 +273,7 @@ fn validate_column<'a, T: Array + 'static>( } } else { return Err(DeltaTableError::from( - crate::protocol::ProtocolError::InvalidField(format!("Column not found",)), + crate::protocol::ProtocolError::InvalidField("Column not found".to_string()), )); } Ok(()) diff --git a/crates/core/src/kernel/snapshot_next/lazy.rs b/crates/core/src/kernel/snapshot_next/lazy.rs index 386d0f63d6..2125b70d93 100644 --- a/crates/core/src/kernel/snapshot_next/lazy.rs +++ b/crates/core/src/kernel/snapshot_next/lazy.rs @@ -1,5 +1,6 @@ //! Snapshot of a Delta Table at a specific version. //! +use std::io::{BufRead, BufReader, Cursor}; use std::sync::{Arc, LazyLock}; use arrow::compute::filter_record_batch; @@ -12,6 +13,7 @@ use delta_kernel::engine::default::executor::tokio::{ TokioBackgroundExecutor, TokioMultiThreadExecutor, }; use delta_kernel::engine::default::DefaultEngine; +use delta_kernel::log_segment::LogSegment; use delta_kernel::schema::Schema; use delta_kernel::snapshot::Snapshot as SnapshotInner; use delta_kernel::table_properties::TableProperties; @@ -23,6 +25,7 @@ use url::Url; use super::cache::CommitCacheObjectStore; use super::Snapshot; +use crate::kernel::{Action, CommitInfo}; use crate::{DeltaResult, DeltaTableError}; // TODO: avoid repetitive parsing of json stats @@ -35,7 +38,7 @@ pub struct LazySnapshot { impl Snapshot for LazySnapshot { fn table_root(&self) -> &Url { - &self.inner.table_root() + self.inner.table_root() } fn version(&self) -> Version { @@ -55,7 +58,7 @@ impl Snapshot for LazySnapshot { } fn table_properties(&self) -> &TableProperties { - &self.inner.table_properties() + self.inner.table_properties() } fn files(&self) -> DeltaResult>> { @@ -96,6 +99,58 @@ impl Snapshot for LazySnapshot { let scanner = SetTransactionScanner::new(self.inner.clone()); Ok(scanner.application_transaction(self.engine.as_ref(), app_id.as_ref())?) } + + fn commit_infos( + &self, + start_version: impl Into>, + limit: impl Into>, + ) -> DeltaResult> { + // let start_version = start_version.into(); + let fs_client = self.engine.get_file_system_client(); + let end_version = start_version.into().unwrap_or_else(|| self.version()); + let start_version = limit + .into() + .and_then(|limit| { + if limit == 0 { + Some(end_version) + } else { + Some(end_version.saturating_sub(limit as u64 - 1)) + } + }) + .unwrap_or(0); + let log_root = self.inner.table_root().join("_delta_log").unwrap(); + let mut log_segment = LogSegment::for_table_changes( + fs_client.as_ref(), + log_root, + start_version, + end_version, + )?; + log_segment.ascending_commit_files.reverse(); + let files = log_segment + .ascending_commit_files + .iter() + .map(|commit_file| (commit_file.location.location.clone(), None)) + .collect_vec(); + + Ok(fs_client + .read_files(files)? + .zip(log_segment.ascending_commit_files.into_iter()) + .filter_map(|(data, path)| { + data.ok().and_then(|d| { + let reader = BufReader::new(Cursor::new(d)); + for line in reader.lines() { + match line.and_then(|l| Ok(serde_json::from_str::(&l)?)) { + Ok(Action::CommitInfo(commit_info)) => { + return Some((path.version, commit_info)) + } + Err(e) => return None, + _ => continue, + }; + } + None + }) + })) + } } impl LazySnapshot { @@ -138,7 +193,7 @@ impl LazySnapshot { } /// A shared reference to the engine used for interacting with the Delta Table. - pub(super) fn engine_ref(&self) -> &Arc { + pub(crate) fn engine_ref(&self) -> &Arc { &self.engine } diff --git a/crates/core/src/kernel/snapshot_next/mod.rs b/crates/core/src/kernel/snapshot_next/mod.rs index 879ef2824a..b02367c3d0 100644 --- a/crates/core/src/kernel/snapshot_next/mod.rs +++ b/crates/core/src/kernel/snapshot_next/mod.rs @@ -4,14 +4,15 @@ use std::sync::Arc; use arrow_array::RecordBatch; use delta_kernel::actions::visitors::SetTransactionMap; -use delta_kernel::actions::{Add, Metadata, Protocol, SetTransaction}; +use delta_kernel::actions::{Metadata, Protocol, SetTransaction}; use delta_kernel::expressions::{Scalar, StructData}; use delta_kernel::schema::Schema; use delta_kernel::table_properties::TableProperties; use delta_kernel::Version; -use iterators::{AddIterator, AddView, AddViewItem}; +use iterators::{AddView, AddViewItem}; use url::Url; +use crate::kernel::actions::CommitInfo; use crate::{DeltaResult, DeltaTableError}; pub use eager::EagerSnapshot; @@ -77,7 +78,7 @@ pub trait Snapshot { fn files_view( &self, ) -> DeltaResult>>> { - Ok(self.files()?.map(|r| r.and_then(|b| AddView::try_new(b)))) + Ok(self.files()?.map(|r| r.and_then(AddView::try_new))) } fn tombstones(&self) -> DeltaResult>>; @@ -93,10 +94,40 @@ pub trait Snapshot { /// /// Initiates a log scan, but terminates as soon as the transaction /// for the given application is found. + /// + /// # Parameters + /// - `app_id`: The application id for which to fetch the transaction. + /// + /// # Returns + /// The latest transaction for the given application id, if it exists. fn application_transaction( &self, app_id: impl AsRef, ) -> DeltaResult>; + + /// Get commit info for the table. + /// + /// The [`CommitInfo`]s are returned in descending order of version + /// with the most recent commit first starting from the `start_version`. + /// + /// [`CommitInfo`]s are read on a best-effort basis. If the action + /// for a version is not available or cannot be parsed, it is skipped. + /// + /// # Parameters + /// - `start_version`: The version from which to start fetching commit info. + /// Defaults to the latest version. + /// - `limit`: The maximum number of commit infos to fetch. + /// + /// # Returns + /// An iterator of commit info tuples. The first element of the tuple is the version + /// of the commit, the second element is the corresponding commit info. + // TODO(roeap): this is currently using our commit info, we should be using + // the definition form kernel, once handling over there matured. + fn commit_infos( + &self, + start_version: impl Into>, + limit: impl Into>, + ) -> DeltaResult>; } impl Snapshot for Arc { @@ -142,6 +173,67 @@ impl Snapshot for Arc { ) -> DeltaResult> { self.as_ref().application_transaction(app_id) } + + fn commit_infos( + &self, + start_version: impl Into>, + limit: impl Into>, + ) -> DeltaResult> { + self.as_ref().commit_infos(start_version, limit) + } +} + +impl Snapshot for Box { + fn table_root(&self) -> &Url { + self.as_ref().table_root() + } + + fn version(&self) -> Version { + self.as_ref().version() + } + + fn schema(&self) -> &Schema { + self.as_ref().schema() + } + + fn metadata(&self) -> &Metadata { + self.as_ref().metadata() + } + + fn protocol(&self) -> &Protocol { + self.as_ref().protocol() + } + + fn table_properties(&self) -> &TableProperties { + self.as_ref().table_properties() + } + + fn files(&self) -> DeltaResult>> { + self.as_ref().files() + } + + fn tombstones(&self) -> DeltaResult>> { + self.as_ref().tombstones() + } + + fn application_transactions(&self) -> DeltaResult { + self.as_ref().application_transactions() + } + + fn application_transaction( + &self, + app_id: impl AsRef, + ) -> DeltaResult> { + self.as_ref().application_transaction(app_id) + } + + fn commit_infos( + &self, + start_version: impl Into>, + limit: impl Into>, + ) -> DeltaResult> { + self.as_ref().commit_infos(start_version, limit) + } } #[cfg(test)] From adb9df8e3ae73aabafc317fe05f6f89be55f43d3 Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Mon, 20 Jan 2025 00:48:02 +0100 Subject: [PATCH 09/14] feat: snapshot updates and improved file data iterators Signed-off-by: Robert Pack --- .github/workflows/codecov.yml | 10 +- Cargo.toml | 2 +- crates/core/src/kernel/snapshot_next/eager.rs | 118 +++++++++++------- .../src/kernel/snapshot_next/iterators.rs | 106 +++++++++++----- crates/core/src/kernel/snapshot_next/lazy.rs | 51 ++++---- crates/core/src/kernel/snapshot_next/mod.rs | 116 ++++++++--------- 6 files changed, 234 insertions(+), 169 deletions(-) diff --git a/.github/workflows/codecov.yml b/.github/workflows/codecov.yml index 69212c55b0..dbb6fbd0ad 100644 --- a/.github/workflows/codecov.yml +++ b/.github/workflows/codecov.yml @@ -16,17 +16,25 @@ jobs: CARGO_TERM_COLOR: always steps: - uses: actions/checkout@v4 + - name: Install rust uses: actions-rs/toolchain@v1 with: profile: default - toolchain: '1.81' + toolchain: "1.81" override: true + - name: Install cargo-llvm-cov uses: taiki-e/install-action@cargo-llvm-cov + - uses: Swatinem/rust-cache@v2 + + - name: Load DAT data + uses: ./.github/actions/load-dat + - name: Generate code coverage run: cargo llvm-cov --features ${DEFAULT_FEATURES} --workspace --codecov --output-path codecov.json -- --skip read_table_version_hdfs --skip test_read_tables_lakefs + - name: Upload coverage to Codecov uses: codecov/codecov-action@v4 with: diff --git a/Cargo.toml b/Cargo.toml index 3935efd8c3..3981d0f095 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,7 +31,7 @@ debug = "line-tables-only" # "default-engine", # "developer-visibility", # ] } -delta_kernel = { git = "https://github.com/roeap/delta-kernel-rs", rev = "023abf1ee604b77bbaa5efec97e043fc4bdf220b", features = [ +delta_kernel = { git = "https://github.com/roeap/delta-kernel-rs", rev = "2e09bfcc0447283a3acc320ad2350f4075dba83e", features = [ "default-engine", "developer-visibility", ] } diff --git a/crates/core/src/kernel/snapshot_next/eager.rs b/crates/core/src/kernel/snapshot_next/eager.rs index a1b0c0d4ca..16e5073e40 100644 --- a/crates/core/src/kernel/snapshot_next/eager.rs +++ b/crates/core/src/kernel/snapshot_next/eager.rs @@ -10,12 +10,12 @@ use delta_kernel::log_segment::LogSegment; use delta_kernel::scan::log_replay::scan_action_iter; use delta_kernel::schema::Schema; use delta_kernel::table_properties::TableProperties; -use delta_kernel::{Engine, EngineData, Expression, Table, Version}; +use delta_kernel::{Engine, EngineData, Expression, ExpressionRef, Table, Version}; use itertools::Itertools; use object_store::ObjectStore; use url::Url; -use super::iterators::{AddIterator, AddView, AddViewItem}; +use super::iterators::AddIterator; use super::lazy::LazySnapshot; use super::{Snapshot, SnapshotError}; use crate::kernel::CommitInfo; @@ -28,7 +28,6 @@ use crate::{DeltaResult, DeltaTableConfig, DeltaTableError}; pub struct EagerSnapshot { snapshot: LazySnapshot, files: Option, - predicate: Option>, } impl Snapshot for EagerSnapshot { @@ -56,11 +55,15 @@ impl Snapshot for EagerSnapshot { self.snapshot.table_properties() } - fn files(&self) -> DeltaResult>> { - Ok(std::iter::once(Ok(self - .files - .clone() - .ok_or(SnapshotError::FilesNotInitialized)?))) + fn files( + &self, + predicate: impl Into>, + ) -> DeltaResult>> { + Ok(std::iter::once(scan_as_log_data( + &self.snapshot, + vec![(self.file_data()?.clone(), false)], + predicate, + ))) } fn tombstones(&self) -> DeltaResult>> { @@ -85,6 +88,10 @@ impl Snapshot for EagerSnapshot { ) -> DeltaResult> { self.snapshot.commit_infos(start_version, limit) } + + fn update(&mut self, target_version: impl Into>) -> DeltaResult { + self.update_impl(target_version) + } } impl EagerSnapshot { @@ -94,7 +101,6 @@ impl EagerSnapshot { store: Arc, config: DeltaTableConfig, version: impl Into>, - predicate: impl Into>>, ) -> DeltaResult { let snapshot = LazySnapshot::try_new(Table::try_from_uri(table_root)?, store, version).await?; @@ -102,11 +108,7 @@ impl EagerSnapshot { .require_files .then(|| -> DeltaResult<_> { replay_file_actions(&snapshot) }) .transpose()?; - Ok(Self { - snapshot, - files, - predicate: predicate.into(), - }) + Ok(Self { snapshot, files }) } pub(crate) fn engine_ref(&self) -> &Arc { @@ -120,10 +122,6 @@ impl EagerSnapshot { .ok_or(SnapshotError::FilesNotInitialized)?) } - pub fn files(&self) -> DeltaResult> { - AddView::try_new(self.file_data()?.clone()) - } - pub fn file_actions(&self) -> DeltaResult> + '_> { AddIterator::try_new(self.file_data()?) } @@ -137,14 +135,28 @@ impl EagerSnapshot { .ok_or(SnapshotError::FilesNotInitialized)?) } - pub(crate) fn update(&mut self) -> DeltaResult<()> { - let log_root = self.snapshot.table_root().join("_delta_log/").unwrap(); - let fs_client = self.snapshot.engine_ref().get_file_system_client(); + pub(crate) fn update_impl( + &mut self, + target_version: impl Into>, + ) -> DeltaResult { + let target_version = target_version.into(); + + let mut snapshot = self.snapshot.clone(); + if !snapshot.update(target_version.clone())? { + return Ok(false); + } + + let log_root = snapshot.table_root().join("_delta_log/").unwrap(); + let fs_client = snapshot.engine_ref().get_file_system_client(); let commit_read_schema = get_log_schema().project(&[ADD_NAME, REMOVE_NAME])?; let checkpoint_read_schema = get_log_add_schema().clone(); - let segment = - LogSegment::for_table_changes(fs_client.as_ref(), log_root, self.version() + 1, None)?; + let segment = LogSegment::for_table_changes( + fs_client.as_ref(), + log_root, + self.snapshot.version() + 1, + snapshot.version(), + )?; let mut slice_iter = segment .replay( self.snapshot.engine_ref().as_ref(), @@ -168,9 +180,9 @@ impl EagerSnapshot { false, )); - self.files = Some(scan_as_log_data(&self.snapshot, slice_iter)?); + self.files = Some(scan_as_log_data(&self.snapshot, slice_iter, None)?); - Ok(()) + Ok(true) } } @@ -195,12 +207,13 @@ fn replay_file_actions(snapshot: &LazySnapshot) -> DeltaResult { .flatten() .collect::, _>>()?; - scan_as_log_data(snapshot, curr_data) + scan_as_log_data(snapshot, curr_data, None) } fn scan_as_log_data( snapshot: &LazySnapshot, curr_data: Vec<(RecordBatch, bool)>, + predicate: impl Into>, ) -> Result { let scan_iter = curr_data.clone().into_iter().map(|(data, flag)| { Ok(( @@ -209,24 +222,36 @@ fn scan_as_log_data( )) }); - let res = scan_action_iter(snapshot.engine_ref().as_ref(), scan_iter, None) - .map(|res| { - res.and_then(|(d, selection)| { - Ok(( - RecordBatch::from(ArrowEngineData::try_from_engine_data(d)?), - selection, - )) - }) - }) - .zip(curr_data.into_iter()) - .map(|(scan_res, (data_raw, _))| match scan_res { - Ok((_, selection)) => { - let data = filter_record_batch(&data_raw, &BooleanArray::from(selection))?; - Ok(data.project(&[0])?) - } - Err(e) => Err(e), + let scan = snapshot + .inner + .clone() + .scan_builder() + .with_predicate(predicate) + .build()?; + + let res = scan_action_iter( + snapshot.engine_ref().as_ref(), + scan_iter, + scan.physical_predicate() + .map(|p| (p, scan.schema().clone())), + ) + .map(|res| { + res.and_then(|(d, selection)| { + Ok(( + RecordBatch::from(ArrowEngineData::try_from_engine_data(d)?), + selection, + )) }) - .collect::, _>>()?; + }) + .zip(curr_data.into_iter()) + .map(|(scan_res, (data_raw, _))| match scan_res { + Ok((_, selection)) => { + let data = filter_record_batch(&data_raw, &BooleanArray::from(selection))?; + Ok(data.project(&[0])?) + } + Err(e) => Err(e), + }) + .collect::, _>>()?; Ok(concat_batches(res[0].schema_ref(), &res)?) } @@ -253,18 +278,19 @@ mod tests { table.location(), Arc::new(object_store::local::LocalFileSystem::default()), Default::default(), - Some(1), - None, + 0, ) .await?; + println!("before update"); + // assert_eq!(snapshot.version(), table_info.version); // assert_eq!( // snapshot.protocol().min_reader_version(), // table_info.min_reader_version // ); - snapshot.update()?; + snapshot.update(None)?; for file in snapshot.file_actions()? { println!("file: {:#?}", file.unwrap()); diff --git a/crates/core/src/kernel/snapshot_next/iterators.rs b/crates/core/src/kernel/snapshot_next/iterators.rs index 375fc0061e..9a01e24254 100644 --- a/crates/core/src/kernel/snapshot_next/iterators.rs +++ b/crates/core/src/kernel/snapshot_next/iterators.rs @@ -4,7 +4,8 @@ use std::sync::Arc; use arrow_array::cast::AsArray; use arrow_array::types::Int64Type; use arrow_array::{ - Array, ArrayRef, BooleanArray, Int64Array, RecordBatch, StringArray, StructArray, + Array, ArrayRef, BooleanArray, Int64Array, RecordBatch, RecordBatchReader, StringArray, + StructArray, }; use chrono::{DateTime, Utc}; use delta_kernel::actions::visitors::AddVisitor; @@ -76,38 +77,6 @@ pub struct AddView { } impl AddView { - pub fn try_new(actions: RecordBatch) -> DeltaResult { - validate_column::(&actions, &[ADD_NAME, "path"])?; - validate_column::(&actions, &[ADD_NAME, "size"])?; - validate_column::(&actions, &[ADD_NAME, "modificationTime"])?; - validate_column::(&actions, &[ADD_NAME, "dataChange"])?; - Ok(Self { actions, index: 0 }) - } -} - -impl Iterator for AddView { - type Item = AddViewItem; - - fn next(&mut self) -> Option { - if self.index < self.actions.num_rows() { - let add = AddViewItem { - actions: self.actions.clone(), - index: self.index, - }; - self.index += 1; - Some(add) - } else { - None - } - } -} - -pub struct AddViewItem { - actions: RecordBatch, - index: usize, -} - -impl AddViewItem { pub fn path(&self) -> &str { extract_column(&self.actions, &[ADD_NAME, "path"]) .unwrap() @@ -253,6 +222,77 @@ impl Iterator for LogicalFileView { } } +pub struct AddViewIterator +where + I: IntoIterator>, +{ + inner: I::IntoIter, + batch: Option, + current: usize, +} + +impl AddViewIterator +where + I: IntoIterator>, +{ + /// Create a new [AddViewIterator]. + /// + /// If `iter` is an infallible iterator, use `.map(Ok)`. + pub fn new(iter: I) -> Self { + Self { + inner: iter.into_iter(), + batch: None, + current: 0, + } + } +} + +impl Iterator for AddViewIterator +where + I: IntoIterator>, +{ + type Item = DeltaResult; + + fn next(&mut self) -> Option { + if let Some(batch) = &self.batch { + if self.current < batch.num_rows() { + let item = AddView { + actions: batch.clone(), + index: self.current, + }; + self.current += 1; + return Some(Ok(item)); + } + } + match self.inner.next() { + Some(Ok(batch)) => { + if validate_add(&batch).is_err() { + return Some(Err(DeltaTableError::generic( + "Invalid add action data encountered.", + ))); + } + self.batch = Some(batch); + self.current = 0; + self.next() + } + Some(Err(e)) => Some(Err(e)), + None => None, + } + } + + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } +} + +pub(crate) fn validate_add(batch: &RecordBatch) -> DeltaResult<()> { + validate_column::(batch, &[ADD_NAME, "path"])?; + validate_column::(batch, &[ADD_NAME, "size"])?; + validate_column::(batch, &[ADD_NAME, "modificationTime"])?; + validate_column::(batch, &[ADD_NAME, "dataChange"])?; + Ok(()) +} + fn validate_column<'a, T: Array + 'static>( actions: &'a RecordBatch, col: &'a [impl AsRef], diff --git a/crates/core/src/kernel/snapshot_next/lazy.rs b/crates/core/src/kernel/snapshot_next/lazy.rs index 2125b70d93..1a360d8371 100644 --- a/crates/core/src/kernel/snapshot_next/lazy.rs +++ b/crates/core/src/kernel/snapshot_next/lazy.rs @@ -61,9 +61,24 @@ impl Snapshot for LazySnapshot { self.inner.table_properties() } - fn files(&self) -> DeltaResult>> { - Ok(self - .files_impl(None)? + fn files( + &self, + predicate: impl Into>>, + ) -> DeltaResult>> { + let scan = self + .inner + .clone() + .scan_builder() + .with_predicate(predicate) + .build()?; + Ok(scan + .scan_data(self.engine.as_ref())? + .map(|res| { + res.and_then(|(data, predicate)| { + let batch: RecordBatch = ArrowEngineData::try_from_engine_data(data)?.into(); + Ok(filter_record_batch(&batch, &BooleanArray::from(predicate))?) + }) + }) .map(|batch| batch.map_err(|e| e.into()))) } @@ -151,6 +166,13 @@ impl Snapshot for LazySnapshot { }) })) } + + fn update(&mut self, target_version: impl Into>) -> DeltaResult { + let mut snapshot = self.inner.as_ref().clone(); + let did_update = snapshot.update(target_version, self.engine_ref().as_ref())?; + self.inner = Arc::new(snapshot); + Ok(did_update) + } } impl LazySnapshot { @@ -210,29 +232,6 @@ impl LazySnapshot { .find(|f| f.version == version) .map(|f| f.location.last_modified) } - - /// read all active files from the log - fn files_impl( - &self, - predicate: impl Into>>, - ) -> DeltaResult>> { - let scan = self - .inner - .clone() - .scan_builder() - .with_predicate(predicate) - .build()?; - Ok(scan.scan_data(self.engine.as_ref())?.map(|res| { - res.and_then(|(data, mut predicate)| { - let batch: RecordBatch = ArrowEngineData::try_from_engine_data(data)?.into(); - if predicate.len() < batch.num_rows() { - predicate - .extend(std::iter::repeat(true).take(batch.num_rows() - predicate.len())); - } - Ok(filter_record_batch(&batch, &BooleanArray::from(predicate))?) - }) - })) - } } #[cfg(test)] diff --git a/crates/core/src/kernel/snapshot_next/mod.rs b/crates/core/src/kernel/snapshot_next/mod.rs index b02367c3d0..9acd55494d 100644 --- a/crates/core/src/kernel/snapshot_next/mod.rs +++ b/crates/core/src/kernel/snapshot_next/mod.rs @@ -1,15 +1,13 @@ //! Snapshot of a Delta Table at a specific version. -//! -use std::sync::Arc; use arrow_array::RecordBatch; use delta_kernel::actions::visitors::SetTransactionMap; -use delta_kernel::actions::{Metadata, Protocol, SetTransaction}; +use delta_kernel::actions::{Add, Metadata, Protocol, SetTransaction}; use delta_kernel::expressions::{Scalar, StructData}; use delta_kernel::schema::Schema; use delta_kernel::table_properties::TableProperties; -use delta_kernel::Version; -use iterators::{AddView, AddViewItem}; +use delta_kernel::{ExpressionRef, Version}; +use iterators::{AddView, AddViewIterator}; use url::Url; use crate::kernel::actions::CommitInfo; @@ -54,6 +52,10 @@ impl StructDataExt for StructData { } } +/// In-memory representation of a specific snapshot of a Delta table. While a `DeltaTable` exists +/// throughout time, `Snapshot`s represent a view of a table at a specific point in time; they +/// have a defined schema (which may change over time for any given table), specific version, and +/// frozen log segment. pub trait Snapshot { /// Location where the Delta Table (metadata) is stored. fn table_root(&self) -> &Url; @@ -65,22 +67,47 @@ pub trait Snapshot { fn schema(&self) -> &Schema; /// Table [`Metadata`] at this `Snapshot`s version. + /// + /// Metadata contains information about the table, such as the table name, + /// the schema, the partition columns, the configuration, etc. fn metadata(&self) -> &Metadata; /// Table [`Protocol`] at this `Snapshot`s version. + /// + /// The protocol indicates the min reader / writer version required to + /// read / write the table. For modern readers / writers, the reader / + /// writer features active in the table are also available. fn protocol(&self) -> &Protocol; /// Get the [`TableProperties`] for this [`Snapshot`]. fn table_properties(&self) -> &TableProperties; - fn files(&self) -> DeltaResult>>; + /// Get all currently active files in the table. + /// + /// # Parameters + /// - `predicate`: An optional predicate to filter the files based on file statistics. + /// + /// # Returns + /// An iterator of [`RecordBatch`]es, where each batch contains add action data. + fn files( + &self, + predicate: impl Into>, + ) -> DeltaResult>>; fn files_view( &self, - ) -> DeltaResult>>> { - Ok(self.files()?.map(|r| r.and_then(AddView::try_new))) + predicate: impl Into>, + ) -> DeltaResult>> { + Ok(AddViewIterator::new(self.files(predicate)?)) } + /// Get all tombstones in the table. + /// + /// Remove Actions (tombstones) are records that indicate that a file has been deleted. + /// They are returned mostly for the purposes of VACUUM operations. + /// + /// # Returns + /// An iterator of [`RecordBatch`]es, where each batch contains remove action data. fn tombstones(&self) -> DeltaResult>>; /// Scan the Delta Log to obtain the latest transaction for all applications @@ -128,59 +155,17 @@ pub trait Snapshot { start_version: impl Into>, limit: impl Into>, ) -> DeltaResult>; -} - -impl Snapshot for Arc { - fn table_root(&self) -> &Url { - self.as_ref().table_root() - } - - fn version(&self) -> Version { - self.as_ref().version() - } - fn schema(&self) -> &Schema { - self.as_ref().schema() - } - - fn metadata(&self) -> &Metadata { - self.as_ref().metadata() - } - - fn protocol(&self) -> &Protocol { - self.as_ref().protocol() - } - - fn table_properties(&self) -> &TableProperties { - self.as_ref().table_properties() - } - - fn files(&self) -> DeltaResult>> { - self.as_ref().files() - } - - fn tombstones(&self) -> DeltaResult>> { - self.as_ref().tombstones() - } - - fn application_transactions(&self) -> DeltaResult { - self.as_ref().application_transactions() - } - - fn application_transaction( - &self, - app_id: impl AsRef, - ) -> DeltaResult> { - self.as_ref().application_transaction(app_id) - } - - fn commit_infos( - &self, - start_version: impl Into>, - limit: impl Into>, - ) -> DeltaResult> { - self.as_ref().commit_infos(start_version, limit) - } + /// Update the snapshot to a specific version. + /// + /// The target version must be greater then the current version of the snapshot. + /// + /// # Parameters + /// - `target_version`: The version to update the snapshot to. Defaults to latest. + /// + /// # Returns + /// A boolean indicating if the snapshot was updated. + fn update(&mut self, target_version: impl Into>) -> DeltaResult; } impl Snapshot for Box { @@ -208,8 +193,11 @@ impl Snapshot for Box { self.as_ref().table_properties() } - fn files(&self) -> DeltaResult>> { - self.as_ref().files() + fn files( + &self, + predicate: impl Into>, + ) -> DeltaResult>> { + self.as_ref().files(predicate) } fn tombstones(&self) -> DeltaResult>> { @@ -234,6 +222,10 @@ impl Snapshot for Box { ) -> DeltaResult> { self.as_ref().commit_infos(start_version, limit) } + + fn update(&mut self, target_version: impl Into>) -> DeltaResult { + self.as_mut().update(target_version) + } } #[cfg(test)] From f3b0edb08389dc4814670be2364ccc27b3a91f6d Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Tue, 21 Jan 2025 15:02:11 +0100 Subject: [PATCH 10/14] fix: cocnsistent schemas in file replay and object safe snapshot trait Signed-off-by: Robert Pack --- Cargo.toml | 10 +- crates/core/src/kernel/snapshot_next/eager.rs | 171 ++---------- .../src/kernel/snapshot_next/iterators.rs | 8 +- crates/core/src/kernel/snapshot_next/lazy.rs | 117 ++++---- crates/core/src/kernel/snapshot_next/mod.rs | 259 ++++++++++++++++-- 5 files changed, 327 insertions(+), 238 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 3981d0f095..0bbe1e07ab 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,14 +27,14 @@ debug = "line-tables-only" [workspace.dependencies] # delta_kernel = { version = "=0.6.0", features = ["default-engine"] } -# delta_kernel = { path = "../delta-kernel-rs/kernel", features = [ -# "default-engine", -# "developer-visibility", -# ] } -delta_kernel = { git = "https://github.com/roeap/delta-kernel-rs", rev = "2e09bfcc0447283a3acc320ad2350f4075dba83e", features = [ +delta_kernel = { path = "../delta-kernel-rs/kernel", features = [ "default-engine", "developer-visibility", ] } +# delta_kernel = { git = "https://github.com/roeap/delta-kernel-rs", rev = "2e09bfcc0447283a3acc320ad2350f4075dba83e", features = [ +# "default-engine", +# "developer-visibility", +# ] } # arrow arrow = { version = "53" } diff --git a/crates/core/src/kernel/snapshot_next/eager.rs b/crates/core/src/kernel/snapshot_next/eager.rs index 16e5073e40..83e0a00863 100644 --- a/crates/core/src/kernel/snapshot_next/eager.rs +++ b/crates/core/src/kernel/snapshot_next/eager.rs @@ -1,23 +1,21 @@ use std::sync::Arc; -use arrow::compute::{concat_batches, filter_record_batch}; -use arrow_array::{BooleanArray, RecordBatch}; +use arrow_array::RecordBatch; use delta_kernel::actions::set_transaction::SetTransactionMap; use delta_kernel::actions::{get_log_add_schema, get_log_schema, ADD_NAME, REMOVE_NAME}; use delta_kernel::actions::{Add, Metadata, Protocol, SetTransaction}; use delta_kernel::engine::arrow_data::ArrowEngineData; use delta_kernel::log_segment::LogSegment; -use delta_kernel::scan::log_replay::scan_action_iter; use delta_kernel::schema::Schema; use delta_kernel::table_properties::TableProperties; -use delta_kernel::{Engine, EngineData, Expression, ExpressionRef, Table, Version}; +use delta_kernel::{ExpressionRef, Table, Version}; use itertools::Itertools; use object_store::ObjectStore; use url::Url; use super::iterators::AddIterator; use super::lazy::LazySnapshot; -use super::{Snapshot, SnapshotError}; +use super::{replay_file_actions, scan_as_log_data, Snapshot, SnapshotError}; use crate::kernel::CommitInfo; use crate::{DeltaResult, DeltaTableConfig, DeltaTableError}; @@ -55,18 +53,25 @@ impl Snapshot for EagerSnapshot { self.snapshot.table_properties() } + fn logical_files( + &self, + _predicate: Option, + ) -> DeltaResult>>> { + todo!() + } + fn files( &self, - predicate: impl Into>, - ) -> DeltaResult>> { - Ok(std::iter::once(scan_as_log_data( + predicate: Option, + ) -> DeltaResult>>> { + Ok(Box::new(std::iter::once(scan_as_log_data( &self.snapshot, vec![(self.file_data()?.clone(), false)], predicate, - ))) + )))) } - fn tombstones(&self) -> DeltaResult>> { + fn tombstones(&self) -> DeltaResult>>> { self.snapshot.tombstones() } @@ -74,22 +79,19 @@ impl Snapshot for EagerSnapshot { self.snapshot.application_transactions() } - fn application_transaction( - &self, - app_id: impl AsRef, - ) -> DeltaResult> { + fn application_transaction(&self, app_id: &str) -> DeltaResult> { self.snapshot.application_transaction(app_id) } fn commit_infos( &self, - start_version: impl Into>, - limit: impl Into>, - ) -> DeltaResult> { + start_version: Option, + limit: Option, + ) -> DeltaResult>> { self.snapshot.commit_infos(start_version, limit) } - fn update(&mut self, target_version: impl Into>) -> DeltaResult { + fn update(&mut self, target_version: Option) -> DeltaResult { self.update_impl(target_version) } } @@ -106,15 +108,11 @@ impl EagerSnapshot { LazySnapshot::try_new(Table::try_from_uri(table_root)?, store, version).await?; let files = config .require_files - .then(|| -> DeltaResult<_> { replay_file_actions(&snapshot) }) + .then(|| -> DeltaResult<_> { replay_file_actions(&snapshot, None) }) .transpose()?; Ok(Self { snapshot, files }) } - pub(crate) fn engine_ref(&self) -> &Arc { - self.snapshot.engine_ref() - } - pub fn file_data(&self) -> DeltaResult<&RecordBatch> { Ok(self .files @@ -135,18 +133,16 @@ impl EagerSnapshot { .ok_or(SnapshotError::FilesNotInitialized)?) } - pub(crate) fn update_impl( - &mut self, - target_version: impl Into>, - ) -> DeltaResult { - let target_version = target_version.into(); - + fn update_impl(&mut self, target_version: Option) -> DeltaResult { let mut snapshot = self.snapshot.clone(); if !snapshot.update(target_version.clone())? { return Ok(false); } - let log_root = snapshot.table_root().join("_delta_log/").unwrap(); + let log_root = snapshot + .table_root() + .join("_delta_log/") + .map_err(|e| DeltaTableError::generic(e))?; let fs_client = snapshot.engine_ref().get_file_system_client(); let commit_read_schema = get_log_schema().project(&[ADD_NAME, REMOVE_NAME])?; let checkpoint_read_schema = get_log_add_schema().clone(); @@ -181,121 +177,8 @@ impl EagerSnapshot { )); self.files = Some(scan_as_log_data(&self.snapshot, slice_iter, None)?); + self.snapshot = snapshot; Ok(true) } } - -fn replay_file_actions(snapshot: &LazySnapshot) -> DeltaResult { - let commit_read_schema = get_log_schema().project(&[ADD_NAME, REMOVE_NAME])?; - let checkpoint_read_schema = get_log_add_schema().clone(); - - let curr_data = snapshot - .inner - ._log_segment() - .replay( - snapshot.engine_ref().as_ref(), - commit_read_schema.clone(), - checkpoint_read_schema.clone(), - None, - )? - .map_ok( - |(data, flag)| -> Result<(RecordBatch, bool), delta_kernel::Error> { - Ok((ArrowEngineData::try_from_engine_data(data)?.into(), flag)) - }, - ) - .flatten() - .collect::, _>>()?; - - scan_as_log_data(snapshot, curr_data, None) -} - -fn scan_as_log_data( - snapshot: &LazySnapshot, - curr_data: Vec<(RecordBatch, bool)>, - predicate: impl Into>, -) -> Result { - let scan_iter = curr_data.clone().into_iter().map(|(data, flag)| { - Ok(( - Box::new(ArrowEngineData::new(data.clone())) as Box, - flag, - )) - }); - - let scan = snapshot - .inner - .clone() - .scan_builder() - .with_predicate(predicate) - .build()?; - - let res = scan_action_iter( - snapshot.engine_ref().as_ref(), - scan_iter, - scan.physical_predicate() - .map(|p| (p, scan.schema().clone())), - ) - .map(|res| { - res.and_then(|(d, selection)| { - Ok(( - RecordBatch::from(ArrowEngineData::try_from_engine_data(d)?), - selection, - )) - }) - }) - .zip(curr_data.into_iter()) - .map(|(scan_res, (data_raw, _))| match scan_res { - Ok((_, selection)) => { - let data = filter_record_batch(&data_raw, &BooleanArray::from(selection))?; - Ok(data.project(&[0])?) - } - Err(e) => Err(e), - }) - .collect::, _>>()?; - - Ok(concat_batches(res[0].schema_ref(), &res)?) -} - -#[cfg(test)] -mod tests { - use deltalake_test::acceptance::{read_dat_case, TestCaseInfo}; - use deltalake_test::TestResult; - - use super::super::tests::get_dat_dir; - use super::*; - - #[tokio::test] - async fn load_eager_snapshot() -> TestResult<()> { - let mut dat_dir = get_dat_dir(); - dat_dir.push("multi_partitioned"); - - let dat_info: TestCaseInfo = read_dat_case(dat_dir)?; - let table_info = dat_info.table_summary()?; - - let table = Table::try_from_uri(dat_info.table_root()?)?; - - let mut snapshot = EagerSnapshot::try_new( - table.location(), - Arc::new(object_store::local::LocalFileSystem::default()), - Default::default(), - 0, - ) - .await?; - - println!("before update"); - - // assert_eq!(snapshot.version(), table_info.version); - // assert_eq!( - // snapshot.protocol().min_reader_version(), - // table_info.min_reader_version - // ); - - snapshot.update(None)?; - - for file in snapshot.file_actions()? { - println!("file: {:#?}", file.unwrap()); - } - - Ok(()) - } -} diff --git a/crates/core/src/kernel/snapshot_next/iterators.rs b/crates/core/src/kernel/snapshot_next/iterators.rs index 9a01e24254..38aa0e1c2f 100644 --- a/crates/core/src/kernel/snapshot_next/iterators.rs +++ b/crates/core/src/kernel/snapshot_next/iterators.rs @@ -4,8 +4,7 @@ use std::sync::Arc; use arrow_array::cast::AsArray; use arrow_array::types::Int64Type; use arrow_array::{ - Array, ArrayRef, BooleanArray, Int64Array, RecordBatch, RecordBatchReader, StringArray, - StructArray, + Array, ArrayRef, BooleanArray, Int64Array, RecordBatch, StringArray, StructArray, }; use chrono::{DateTime, Utc}; use delta_kernel::actions::visitors::AddVisitor; @@ -27,10 +26,7 @@ pub struct AddIterator<'a> { impl AddIterator<'_> { pub fn try_new(actions: &RecordBatch) -> DeltaResult> { - validate_column::(actions, &[ADD_NAME, "path"])?; - validate_column::(actions, &[ADD_NAME, "size"])?; - validate_column::(actions, &[ADD_NAME, "modificationTime"])?; - validate_column::(actions, &[ADD_NAME, "dataChange"])?; + validate_add(&actions)?; let visitor = AddVisitor::new(); let fields = visitor.selected_column_names_and_types(); diff --git a/crates/core/src/kernel/snapshot_next/lazy.rs b/crates/core/src/kernel/snapshot_next/lazy.rs index 1a360d8371..572b4ead37 100644 --- a/crates/core/src/kernel/snapshot_next/lazy.rs +++ b/crates/core/src/kernel/snapshot_next/lazy.rs @@ -24,7 +24,7 @@ use object_store::ObjectStore; use url::Url; use super::cache::CommitCacheObjectStore; -use super::Snapshot; +use super::{replay_file_actions, Snapshot}; use crate::kernel::{Action, CommitInfo}; use crate::{DeltaResult, DeltaTableError}; @@ -61,45 +61,57 @@ impl Snapshot for LazySnapshot { self.inner.table_properties() } - fn files( + fn logical_files( &self, - predicate: impl Into>>, - ) -> DeltaResult>> { + predicate: Option, + ) -> DeltaResult>>> { let scan = self .inner .clone() .scan_builder() .with_predicate(predicate) .build()?; - Ok(scan - .scan_data(self.engine.as_ref())? - .map(|res| { - res.and_then(|(data, predicate)| { - let batch: RecordBatch = ArrowEngineData::try_from_engine_data(data)?.into(); - Ok(filter_record_batch(&batch, &BooleanArray::from(predicate))?) + Ok(Box::new( + scan.scan_data(self.engine.as_ref())? + .map(|res| { + res.and_then(|(data, predicate)| { + let batch: RecordBatch = + ArrowEngineData::try_from_engine_data(data)?.into(); + Ok(filter_record_batch(&batch, &BooleanArray::from(predicate))?) + }) }) - }) - .map(|batch| batch.map_err(|e| e.into()))) + .map(|batch| batch.map_err(|e| e.into())), + )) + } + + fn files( + &self, + predicate: Option>, + ) -> DeltaResult>>> { + Ok(Box::new(std::iter::once(replay_file_actions( + &self, predicate, + )))) } - fn tombstones(&self) -> DeltaResult>> { + fn tombstones(&self) -> DeltaResult>>> { static META_PREDICATE: LazyLock> = LazyLock::new(|| { Some(Arc::new( Expression::column([REMOVE_NAME, "path"]).is_not_null(), )) }); let read_schema = get_log_schema().project(&[REMOVE_NAME])?; - Ok(self - .inner - ._log_segment() - .replay( - self.engine.as_ref(), - read_schema.clone(), - read_schema, - META_PREDICATE.clone(), - )? - .map_ok(|(d, _)| Ok(RecordBatch::from(ArrowEngineData::try_from_engine_data(d)?))) - .flatten()) + Ok(Box::new( + self.inner + ._log_segment() + .replay( + self.engine.as_ref(), + read_schema.clone(), + read_schema, + META_PREDICATE.clone(), + )? + .map_ok(|(d, _)| Ok(RecordBatch::from(ArrowEngineData::try_from_engine_data(d)?))) + .flatten(), + )) } fn application_transactions(&self) -> DeltaResult { @@ -107,24 +119,20 @@ impl Snapshot for LazySnapshot { Ok(scanner.application_transactions(self.engine.as_ref())?) } - fn application_transaction( - &self, - app_id: impl AsRef, - ) -> DeltaResult> { + fn application_transaction(&self, app_id: &str) -> DeltaResult> { let scanner = SetTransactionScanner::new(self.inner.clone()); - Ok(scanner.application_transaction(self.engine.as_ref(), app_id.as_ref())?) + Ok(scanner.application_transaction(self.engine.as_ref(), app_id)?) } fn commit_infos( &self, - start_version: impl Into>, - limit: impl Into>, - ) -> DeltaResult> { + start_version: Option, + limit: Option, + ) -> DeltaResult>> { // let start_version = start_version.into(); let fs_client = self.engine.get_file_system_client(); - let end_version = start_version.into().unwrap_or_else(|| self.version()); + let end_version = start_version.unwrap_or_else(|| self.version()); let start_version = limit - .into() .and_then(|limit| { if limit == 0 { Some(end_version) @@ -133,6 +141,7 @@ impl Snapshot for LazySnapshot { } }) .unwrap_or(0); + let log_root = self.inner.table_root().join("_delta_log").unwrap(); let mut log_segment = LogSegment::for_table_changes( fs_client.as_ref(), @@ -147,27 +156,29 @@ impl Snapshot for LazySnapshot { .map(|commit_file| (commit_file.location.location.clone(), None)) .collect_vec(); - Ok(fs_client - .read_files(files)? - .zip(log_segment.ascending_commit_files.into_iter()) - .filter_map(|(data, path)| { - data.ok().and_then(|d| { - let reader = BufReader::new(Cursor::new(d)); - for line in reader.lines() { - match line.and_then(|l| Ok(serde_json::from_str::(&l)?)) { - Ok(Action::CommitInfo(commit_info)) => { - return Some((path.version, commit_info)) - } - Err(e) => return None, - _ => continue, - }; - } - None - }) - })) + Ok(Box::new( + fs_client + .read_files(files)? + .zip(log_segment.ascending_commit_files.into_iter()) + .filter_map(|(data, path)| { + data.ok().and_then(|d| { + let reader = BufReader::new(Cursor::new(d)); + for line in reader.lines() { + match line.and_then(|l| Ok(serde_json::from_str::(&l)?)) { + Ok(Action::CommitInfo(commit_info)) => { + return Some((path.version, commit_info)) + } + Err(_) => return None, + _ => continue, + }; + } + None + }) + }), + )) } - fn update(&mut self, target_version: impl Into>) -> DeltaResult { + fn update(&mut self, target_version: Option) -> DeltaResult { let mut snapshot = self.inner.as_ref().clone(); let did_update = snapshot.update(target_version, self.engine_ref().as_ref())?; self.inner = Arc::new(snapshot); diff --git a/crates/core/src/kernel/snapshot_next/mod.rs b/crates/core/src/kernel/snapshot_next/mod.rs index 9acd55494d..817d94e477 100644 --- a/crates/core/src/kernel/snapshot_next/mod.rs +++ b/crates/core/src/kernel/snapshot_next/mod.rs @@ -1,13 +1,23 @@ //! Snapshot of a Delta Table at a specific version. -use arrow_array::RecordBatch; +use std::sync::Arc; + +use arrow_array::{BooleanArray, RecordBatch, StructArray}; +use arrow_select::concat::concat_batches; +use arrow_select::filter::filter_record_batch; use delta_kernel::actions::visitors::SetTransactionMap; -use delta_kernel::actions::{Add, Metadata, Protocol, SetTransaction}; +use delta_kernel::actions::{ + get_log_add_schema, get_log_schema, Metadata, Protocol, SetTransaction, ADD_NAME, REMOVE_NAME, +}; +use delta_kernel::engine::arrow_data::ArrowEngineData; +use delta_kernel::engine::arrow_expression::apply_schema; use delta_kernel::expressions::{Scalar, StructData}; -use delta_kernel::schema::Schema; +use delta_kernel::scan::log_replay::scan_action_iter; +use delta_kernel::schema::{DataType, Schema}; use delta_kernel::table_properties::TableProperties; -use delta_kernel::{ExpressionRef, Version}; +use delta_kernel::{EngineData, ExpressionRef, Version}; use iterators::{AddView, AddViewIterator}; +use itertools::Itertools; use url::Url; use crate::kernel::actions::CommitInfo; @@ -91,14 +101,19 @@ pub trait Snapshot { /// An iterator of [`RecordBatch`]es, where each batch contains add action data. fn files( &self, - predicate: impl Into>, - ) -> DeltaResult>>; + predicate: Option, + ) -> DeltaResult>>>; + + fn logical_files( + &self, + predicate: Option, + ) -> DeltaResult>>>; fn files_view( &self, - predicate: impl Into>, - ) -> DeltaResult>> { - Ok(AddViewIterator::new(self.files(predicate)?)) + predicate: Option, + ) -> DeltaResult>>> { + Ok(Box::new(AddViewIterator::new(self.files(predicate)?))) } /// Get all tombstones in the table. @@ -108,7 +123,7 @@ pub trait Snapshot { /// /// # Returns /// An iterator of [`RecordBatch`]es, where each batch contains remove action data. - fn tombstones(&self) -> DeltaResult>>; + fn tombstones(&self) -> DeltaResult>>>; /// Scan the Delta Log to obtain the latest transaction for all applications /// @@ -127,10 +142,7 @@ pub trait Snapshot { /// /// # Returns /// The latest transaction for the given application id, if it exists. - fn application_transaction( - &self, - app_id: impl AsRef, - ) -> DeltaResult>; + fn application_transaction(&self, app_id: &str) -> DeltaResult>; /// Get commit info for the table. /// @@ -152,9 +164,9 @@ pub trait Snapshot { // the definition form kernel, once handling over there matured. fn commit_infos( &self, - start_version: impl Into>, - limit: impl Into>, - ) -> DeltaResult>; + start_version: Option, + limit: Option, + ) -> DeltaResult>>; /// Update the snapshot to a specific version. /// @@ -165,7 +177,7 @@ pub trait Snapshot { /// /// # Returns /// A boolean indicating if the snapshot was updated. - fn update(&mut self, target_version: impl Into>) -> DeltaResult; + fn update(&mut self, target_version: Option) -> DeltaResult; } impl Snapshot for Box { @@ -193,14 +205,21 @@ impl Snapshot for Box { self.as_ref().table_properties() } + fn logical_files( + &self, + predicate: Option, + ) -> DeltaResult>>> { + self.as_ref().logical_files(predicate) + } + fn files( &self, - predicate: impl Into>, - ) -> DeltaResult>> { + predicate: Option, + ) -> DeltaResult>>> { self.as_ref().files(predicate) } - fn tombstones(&self) -> DeltaResult>> { + fn tombstones(&self) -> DeltaResult>>> { self.as_ref().tombstones() } @@ -208,29 +227,114 @@ impl Snapshot for Box { self.as_ref().application_transactions() } - fn application_transaction( - &self, - app_id: impl AsRef, - ) -> DeltaResult> { + fn application_transaction(&self, app_id: &str) -> DeltaResult> { self.as_ref().application_transaction(app_id) } fn commit_infos( &self, - start_version: impl Into>, - limit: impl Into>, - ) -> DeltaResult> { + start_version: Option, + limit: Option, + ) -> DeltaResult>> { self.as_ref().commit_infos(start_version, limit) } - fn update(&mut self, target_version: impl Into>) -> DeltaResult { + fn update(&mut self, target_version: Option) -> DeltaResult { self.as_mut().update(target_version) } } +fn replay_file_actions( + snapshot: &LazySnapshot, + predicate: impl Into>, +) -> DeltaResult { + let commit_read_schema = get_log_schema().project(&[ADD_NAME, REMOVE_NAME])?; + let checkpoint_read_schema = get_log_add_schema().clone(); + + let curr_data = snapshot + .inner + ._log_segment() + .replay( + snapshot.engine_ref().as_ref(), + commit_read_schema.clone(), + checkpoint_read_schema.clone(), + None, + )? + .map_ok( + |(data, flag)| -> Result<(RecordBatch, bool), delta_kernel::Error> { + Ok((ArrowEngineData::try_from_engine_data(data)?.into(), flag)) + }, + ) + .flatten() + .collect::, _>>()?; + + scan_as_log_data(snapshot, curr_data, predicate) +} + +// helper function to replay log data as stored using kernel log replay. +// The kernel replay usually emits a tuple of (data, selection) where data is the +// data is a re-ordered subset of the full data in the log which is relevant to the +// engine. this function leverages the replay, but applies the selection to the +// original data to get the final data. +fn scan_as_log_data( + snapshot: &LazySnapshot, + curr_data: impl IntoIterator, + predicate: impl Into>, +) -> Result { + let curr_data = curr_data.into_iter().collect::>(); + let scan_iter = curr_data.clone().into_iter().map(|(data, flag)| { + Ok(( + Box::new(ArrowEngineData::new(data.clone())) as Box, + flag, + )) + }); + + let scan = snapshot + .inner + .as_ref() + .clone() + .into_scan_builder() + .with_predicate(predicate) + .build()?; + + let res = scan_action_iter( + snapshot.engine_ref().as_ref(), + scan_iter, + scan.physical_predicate() + .map(|p| (p, scan.schema().clone())), + ) + .map(|res| { + res.and_then(|(d, selection)| { + Ok(( + RecordBatch::from(ArrowEngineData::try_from_engine_data(d)?), + selection, + )) + }) + }) + .zip(curr_data.into_iter()) + .map(|(scan_res, (data_raw, _))| match scan_res { + Ok((_, selection)) => { + let data = filter_record_batch(&data_raw, &BooleanArray::from(selection))?; + let dt: DataType = get_log_add_schema().as_ref().clone().into(); + let data: StructArray = data.project(&[0])?.into(); + apply_schema(&data, &dt) + } + Err(e) => Err(e), + }) + .collect::, _>>()?; + + let schema_ref = Arc::new(get_log_add_schema().as_ref().try_into()?); + Ok(concat_batches(&schema_ref, &res)?) +} + #[cfg(test)] mod tests { - use std::path::PathBuf; + use std::{future::Future, path::PathBuf, pin::Pin}; + + use delta_kernel::Table; + use deltalake_test::utils::*; + + use super::*; pub(super) fn get_dat_dir() -> PathBuf { let d = PathBuf::from(env!("CARGO_MANIFEST_DIR")); @@ -242,4 +346,99 @@ mod tests { rep_root.push("dat/out/reader_tests/generated"); rep_root } + + fn get_lazy( + ctx: &IntegrationContext, + table: TestTables, + version: Option, + ) -> TestResult>>>>> { + let store = ctx.table_builder(table).build_storage()?.object_store(None); + let table = Table::try_from_uri("memory:///")?; + Ok(Box::pin(async move { + Ok(Box::new(LazySnapshot::try_new(table, store, version).await?) as Box) + })) + } + + fn get_eager( + ctx: &IntegrationContext, + table: TestTables, + version: Option, + ) -> TestResult>>>>> { + let store = ctx.table_builder(table).build_storage()?.object_store(None); + let config = Default::default(); + Ok(Box::pin(async move { + Ok( + Box::new(EagerSnapshot::try_new("memory:///", store, config, version).await?) + as Box, + ) + })) + } + + #[tokio::test] + async fn test_snapshots() -> TestResult { + let context = IntegrationContext::new(Box::::default())?; + context.load_table(TestTables::Checkpoints).await?; + context.load_table(TestTables::Simple).await?; + context.load_table(TestTables::SimpleWithCheckpoint).await?; + context.load_table(TestTables::WithDvSmall).await?; + + test_snapshot(&context, get_lazy).await?; + test_snapshot(&context, get_eager).await?; + + Ok(()) + } + + // NOTE: test needs to be async, so that we can pick up the runtime from the context + async fn test_snapshot(ctx: &IntegrationContext, get_snapshot: F) -> TestResult<()> + where + F: Fn( + &IntegrationContext, + TestTables, + Option, + ) -> TestResult>>>>>, + { + for version in 0..=12 { + let snapshot = get_snapshot(ctx, TestTables::Checkpoints, Some(version))?.await?; + assert_eq!(snapshot.version(), version); + + test_files(snapshot.as_ref())?; + test_files_view(snapshot.as_ref())?; + test_commit_infos(snapshot.as_ref())?; + } + + let mut snapshot = get_snapshot(ctx, TestTables::Checkpoints, Some(0))?.await?; + for version in 1..=12 { + snapshot.update(Some(version))?; + assert_eq!(snapshot.version(), version); + + test_files(snapshot.as_ref())?; + test_files_view(snapshot.as_ref())?; + test_commit_infos(snapshot.as_ref())?; + } + + Ok(()) + } + + fn test_files(snapshot: &dyn Snapshot) -> TestResult<()> { + let batches = snapshot.files(None)?.collect::, _>>()?; + let num_files = batches.iter().map(|b| b.num_rows() as i64).sum::(); + assert_eq!((num_files as u64), snapshot.version()); + Ok(()) + } + + fn test_commit_infos(snapshot: &dyn Snapshot) -> TestResult<()> { + let commit_infos = snapshot.commit_infos(None, Some(100))?.collect::>(); + assert_eq!((commit_infos.len() as u64), snapshot.version() + 1); + assert_eq!(commit_infos.first().unwrap().0, snapshot.version()); + Ok(()) + } + + fn test_files_view(snapshot: &dyn Snapshot) -> TestResult<()> { + let num_files_view = snapshot + .files_view(None)? + .map(|f| f.unwrap().path().to_string()) + .count() as u64; + assert_eq!(num_files_view, snapshot.version()); + Ok(()) + } } From e83c3caaf68aae00d27d719c0005b681c9f8a492 Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Tue, 21 Jan 2025 17:28:00 +0100 Subject: [PATCH 11/14] test: more snapshot tests Signed-off-by: Robert Pack --- Cargo.toml | 10 +-- crates/core/src/kernel/snapshot_next/eager.rs | 37 +++++++++-- .../src/kernel/snapshot_next/iterators.rs | 66 ++++++++++++++++++- crates/core/src/kernel/snapshot_next/lazy.rs | 2 +- crates/core/src/kernel/snapshot_next/mod.rs | 55 ++++++++++++---- crates/core/src/operations/transaction/mod.rs | 30 +++++++-- 6 files changed, 171 insertions(+), 29 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 0bbe1e07ab..00c970fe8e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,14 +27,14 @@ debug = "line-tables-only" [workspace.dependencies] # delta_kernel = { version = "=0.6.0", features = ["default-engine"] } -delta_kernel = { path = "../delta-kernel-rs/kernel", features = [ - "default-engine", - "developer-visibility", -] } -# delta_kernel = { git = "https://github.com/roeap/delta-kernel-rs", rev = "2e09bfcc0447283a3acc320ad2350f4075dba83e", features = [ +# delta_kernel = { path = "../delta-kernel-rs/kernel", features = [ # "default-engine", # "developer-visibility", # ] } +delta_kernel = { git = "https://github.com/roeap/delta-kernel-rs", rev = "50c1c023b7e9d60df69f6e592b91e4cc06a5a0b1", features = [ + "default-engine", + "developer-visibility", +] } # arrow arrow = { version = "53" } diff --git a/crates/core/src/kernel/snapshot_next/eager.rs b/crates/core/src/kernel/snapshot_next/eager.rs index 83e0a00863..2e8ff24059 100644 --- a/crates/core/src/kernel/snapshot_next/eager.rs +++ b/crates/core/src/kernel/snapshot_next/eager.rs @@ -1,14 +1,16 @@ use std::sync::Arc; -use arrow_array::RecordBatch; +use arrow_array::{BooleanArray, RecordBatch}; +use arrow_select::filter::filter_record_batch; use delta_kernel::actions::set_transaction::SetTransactionMap; use delta_kernel::actions::{get_log_add_schema, get_log_schema, ADD_NAME, REMOVE_NAME}; use delta_kernel::actions::{Add, Metadata, Protocol, SetTransaction}; use delta_kernel::engine::arrow_data::ArrowEngineData; use delta_kernel::log_segment::LogSegment; +use delta_kernel::scan::log_replay::scan_action_iter; use delta_kernel::schema::Schema; use delta_kernel::table_properties::TableProperties; -use delta_kernel::{ExpressionRef, Table, Version}; +use delta_kernel::{EngineData, ExpressionRef, Table, Version}; use itertools::Itertools; use object_store::ObjectStore; use url::Url; @@ -55,9 +57,36 @@ impl Snapshot for EagerSnapshot { fn logical_files( &self, - _predicate: Option, + predicate: Option, ) -> DeltaResult>>> { - todo!() + let scan = self + .snapshot + .inner + .as_ref() + .clone() + .into_scan_builder() + .with_predicate(predicate) + .build()?; + + let iter = scan_action_iter( + self.snapshot.engine_ref().as_ref(), + vec![Ok(( + Box::new(ArrowEngineData::new(self.file_data()?.clone())) as Box, + false, + ))] + .into_iter(), + scan.physical_predicate() + .map(|p| (p, scan.schema().clone())), + ) + .map(|res| { + res.and_then(|(data, predicate)| { + let batch: RecordBatch = ArrowEngineData::try_from_engine_data(data)?.into(); + Ok(filter_record_batch(&batch, &BooleanArray::from(predicate))?) + }) + }) + .map(|batch| batch.map_err(|e| e.into())); + + Ok(Box::new(iter)) } fn files( diff --git a/crates/core/src/kernel/snapshot_next/iterators.rs b/crates/core/src/kernel/snapshot_next/iterators.rs index 38aa0e1c2f..c353881e2c 100644 --- a/crates/core/src/kernel/snapshot_next/iterators.rs +++ b/crates/core/src/kernel/snapshot_next/iterators.rs @@ -1,5 +1,5 @@ use std::collections::HashSet; -use std::sync::Arc; +use std::sync::{Arc, LazyLock}; use arrow_array::cast::AsArray; use arrow_array::types::Int64Type; @@ -14,6 +14,7 @@ use delta_kernel::engine::arrow_data::ArrowEngineData; use delta_kernel::engine::arrow_expression::ProvidesColumnByName; use delta_kernel::engine_data::{GetData, RowVisitor}; use delta_kernel::expressions::{Scalar, StructData}; +use delta_kernel::scan::scan_row_schema; use crate::kernel::scalars::ScalarExt; use crate::{DeltaResult, DeltaTableError}; @@ -218,6 +219,69 @@ impl Iterator for LogicalFileView { } } +pub struct LogicalFileViewIterator +where + I: IntoIterator>, +{ + inner: I::IntoIter, + batch: Option, + current: usize, +} + +impl LogicalFileViewIterator +where + I: IntoIterator>, +{ + /// Create a new [LogicalFileViewIterator]. + /// + /// If `iter` is an infallible iterator, use `.map(Ok)`. + pub fn new(iter: I) -> Self { + Self { + inner: iter.into_iter(), + batch: None, + current: 0, + } + } +} + +// impl Iterator for LogicalFileViewIterator +// where +// I: IntoIterator>, +// { +// type Item = DeltaResult; +// +// fn next(&mut self) -> Option { +// if let Some(batch) = &self.batch { +// if self.current < batch.num_rows() { +// let item = LogicalFileView { +// files: batch.clone(), +// index: self.current, +// }; +// self.current += 1; +// return Some(Ok(item)); +// } +// } +// match self.inner.next() { +// Some(Ok(batch)) => { +// if validate_logical_file(&batch).is_err() { +// return Some(Err(DeltaTableError::generic( +// "Invalid logical file data encountered.", +// ))); +// } +// self.batch = Some(batch); +// self.current = 0; +// self.next() +// } +// Some(Err(e)) => Some(Err(e)), +// None => None, +// } +// } +// +// fn size_hint(&self) -> (usize, Option) { +// self.inner.size_hint() +// } +// } + pub struct AddViewIterator where I: IntoIterator>, diff --git a/crates/core/src/kernel/snapshot_next/lazy.rs b/crates/core/src/kernel/snapshot_next/lazy.rs index 572b4ead37..20ccfb7031 100644 --- a/crates/core/src/kernel/snapshot_next/lazy.rs +++ b/crates/core/src/kernel/snapshot_next/lazy.rs @@ -3,8 +3,8 @@ use std::io::{BufRead, BufReader, Cursor}; use std::sync::{Arc, LazyLock}; -use arrow::compute::filter_record_batch; use arrow_array::{BooleanArray, RecordBatch}; +use arrow_select::filter::filter_record_batch; use delta_kernel::actions::set_transaction::{SetTransactionMap, SetTransactionScanner}; use delta_kernel::actions::{get_log_schema, REMOVE_NAME}; use delta_kernel::actions::{Metadata, Protocol, SetTransaction}; diff --git a/crates/core/src/kernel/snapshot_next/mod.rs b/crates/core/src/kernel/snapshot_next/mod.rs index 817d94e477..0023f4236d 100644 --- a/crates/core/src/kernel/snapshot_next/mod.rs +++ b/crates/core/src/kernel/snapshot_next/mod.rs @@ -13,6 +13,7 @@ use delta_kernel::engine::arrow_data::ArrowEngineData; use delta_kernel::engine::arrow_expression::apply_schema; use delta_kernel::expressions::{Scalar, StructData}; use delta_kernel::scan::log_replay::scan_action_iter; +use delta_kernel::scan::scan_row_schema; use delta_kernel::schema::{DataType, Schema}; use delta_kernel::table_properties::TableProperties; use delta_kernel::{EngineData, ExpressionRef, Version}; @@ -92,19 +93,34 @@ pub trait Snapshot { /// Get the [`TableProperties`] for this [`Snapshot`]. fn table_properties(&self) -> &TableProperties; - /// Get all currently active files in the table. + fn logical_file_schema(&self) -> &'static Schema { + scan_row_schema() + } + + /// Get all logical files present in the current snapshot. /// /// # Parameters /// - `predicate`: An optional predicate to filter the files based on file statistics. /// /// # Returns - /// An iterator of [`RecordBatch`]es, where each batch contains add action data. - fn files( + /// An iterator of [`RecordBatch`]es, where each batch contains logical file data. + fn logical_files( &self, predicate: Option, ) -> DeltaResult>>>; - fn logical_files( + /// Get all currently active files in the table. + /// + /// # Parameters + /// - `predicate`: An optional predicate to filter the files based on file statistics. + /// + /// # Returns + /// An iterator of [`RecordBatch`]es, where each batch contains add action data. + #[deprecated( + since = "0.25.0", + note = "Use `logical_files` instead, which returns a more focussed dataset and avoids computational overhead." + )] + fn files( &self, predicate: Option, ) -> DeltaResult>>>; @@ -113,6 +129,7 @@ pub trait Snapshot { &self, predicate: Option, ) -> DeltaResult>>> { + #[allow(deprecated)] Ok(Box::new(AddViewIterator::new(self.files(predicate)?))) } @@ -216,6 +233,7 @@ impl Snapshot for Box { &self, predicate: Option, ) -> DeltaResult>>> { + #[allow(deprecated)] self.as_ref().files(predicate) } @@ -404,6 +422,7 @@ mod tests { test_files(snapshot.as_ref())?; test_files_view(snapshot.as_ref())?; test_commit_infos(snapshot.as_ref())?; + test_logical_files(snapshot.as_ref())?; } let mut snapshot = get_snapshot(ctx, TestTables::Checkpoints, Some(0))?.await?; @@ -414,22 +433,29 @@ mod tests { test_files(snapshot.as_ref())?; test_files_view(snapshot.as_ref())?; test_commit_infos(snapshot.as_ref())?; + test_logical_files(snapshot.as_ref())?; } Ok(()) } - fn test_files(snapshot: &dyn Snapshot) -> TestResult<()> { - let batches = snapshot.files(None)?.collect::, _>>()?; - let num_files = batches.iter().map(|b| b.num_rows() as i64).sum::(); + fn test_logical_files(snapshot: &dyn Snapshot) -> TestResult<()> { + let logical_files = snapshot + .logical_files(None)? + .collect::, _>>()?; + let num_files = logical_files + .iter() + .map(|b| b.num_rows() as i64) + .sum::(); assert_eq!((num_files as u64), snapshot.version()); Ok(()) } - fn test_commit_infos(snapshot: &dyn Snapshot) -> TestResult<()> { - let commit_infos = snapshot.commit_infos(None, Some(100))?.collect::>(); - assert_eq!((commit_infos.len() as u64), snapshot.version() + 1); - assert_eq!(commit_infos.first().unwrap().0, snapshot.version()); + fn test_files(snapshot: &dyn Snapshot) -> TestResult<()> { + #[allow(deprecated)] + let batches = snapshot.files(None)?.collect::, _>>()?; + let num_files = batches.iter().map(|b| b.num_rows() as i64).sum::(); + assert_eq!((num_files as u64), snapshot.version()); Ok(()) } @@ -441,4 +467,11 @@ mod tests { assert_eq!(num_files_view, snapshot.version()); Ok(()) } + + fn test_commit_infos(snapshot: &dyn Snapshot) -> TestResult<()> { + let commit_infos = snapshot.commit_infos(None, Some(100))?.collect::>(); + assert_eq!((commit_infos.len() as u64), snapshot.version() + 1); + assert_eq!(commit_infos.first().unwrap().0, snapshot.version()); + Ok(()) + } } diff --git a/crates/core/src/operations/transaction/mod.rs b/crates/core/src/operations/transaction/mod.rs index bb173c40b7..d7d99f0dbe 100644 --- a/crates/core/src/operations/transaction/mod.rs +++ b/crates/core/src/operations/transaction/mod.rs @@ -80,6 +80,7 @@ use bytes::Bytes; use chrono::Utc; use conflict_checker::ConflictChecker; use futures::future::BoxFuture; +use itertools::Itertools; use object_store::path::Path; use object_store::Error as ObjectStoreError; use serde_json::Value; @@ -268,22 +269,37 @@ pub struct CommitData { impl CommitData { /// Create new data to be committed pub fn new( - mut actions: Vec, + actions: Vec, operation: DeltaOperation, mut app_metadata: HashMap, app_transactions: Vec, ) -> Self { - if !actions.iter().any(|a| matches!(a, Action::CommitInfo(..))) { - let mut commit_info = operation.get_commit_info(); - commit_info.timestamp = Some(Utc::now().timestamp_millis()); + // When in-commit-timestamps are enabled, we need to ensure that the commit info is the first action + // in the commit log. If it is not present, we need to add it. + // https://github.com/delta-io/delta/blob/master/PROTOCOL.md#writer-requirements-for-in-commit-timestamps + let mut commit_info = None::; + let mut actions = actions + .into_iter() + .inspect(|a| { + if matches!(a, Action::CommitInfo(..)) { + commit_info = Some(a.clone()) + } + }) + .filter(|a| matches!(a, Action::CommitInfo(..))) + .collect_vec(); + if !commit_info.is_some() { + let mut cm = operation.get_commit_info(); + cm.timestamp = Some(Utc::now().timestamp_millis()); app_metadata.insert( "clientVersion".to_string(), Value::String(format!("delta-rs.{}", crate_version())), ); - app_metadata.extend(commit_info.info); - commit_info.info = app_metadata.clone(); - actions.push(Action::CommitInfo(commit_info)) + app_metadata.extend(cm.info); + cm.info = app_metadata.clone(); + commit_info = Some(Action::CommitInfo(cm)); } + // safety: we assured commit_info is Some just above. + actions.insert(0, commit_info.unwrap()); for txn in &app_transactions { actions.push(Action::Txn(txn.clone())) From 5364f4a7e2141c947e76f668725e1e5124dd3106 Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Tue, 21 Jan 2025 18:58:39 +0100 Subject: [PATCH 12/14] feat: allow iterating over logical files Signed-off-by: Robert Pack --- .../src/kernel/snapshot_next/iterators.rs | 103 ++++++++---------- crates/core/src/kernel/snapshot_next/mod.rs | 28 ++++- 2 files changed, 74 insertions(+), 57 deletions(-) diff --git a/crates/core/src/kernel/snapshot_next/iterators.rs b/crates/core/src/kernel/snapshot_next/iterators.rs index c353881e2c..1bfec67eec 100644 --- a/crates/core/src/kernel/snapshot_next/iterators.rs +++ b/crates/core/src/kernel/snapshot_next/iterators.rs @@ -1,5 +1,5 @@ use std::collections::HashSet; -use std::sync::{Arc, LazyLock}; +use std::sync::Arc; use arrow_array::cast::AsArray; use arrow_array::types::Int64Type; @@ -14,7 +14,6 @@ use delta_kernel::engine::arrow_data::ArrowEngineData; use delta_kernel::engine::arrow_expression::ProvidesColumnByName; use delta_kernel::engine_data::{GetData, RowVisitor}; use delta_kernel::expressions::{Scalar, StructData}; -use delta_kernel::scan::scan_row_schema; use crate::kernel::scalars::ScalarExt; use crate::{DeltaResult, DeltaTableError}; @@ -202,23 +201,6 @@ impl LogicalFileView { } } -impl Iterator for LogicalFileView { - type Item = LogicalFileView; - - fn next(&mut self) -> Option { - if self.index < self.files.num_rows() { - let file = LogicalFileView { - files: self.files.clone(), - index: self.index, - }; - self.index += 1; - Some(file) - } else { - None - } - } -} - pub struct LogicalFileViewIterator where I: IntoIterator>, @@ -244,43 +226,43 @@ where } } -// impl Iterator for LogicalFileViewIterator -// where -// I: IntoIterator>, -// { -// type Item = DeltaResult; -// -// fn next(&mut self) -> Option { -// if let Some(batch) = &self.batch { -// if self.current < batch.num_rows() { -// let item = LogicalFileView { -// files: batch.clone(), -// index: self.current, -// }; -// self.current += 1; -// return Some(Ok(item)); -// } -// } -// match self.inner.next() { -// Some(Ok(batch)) => { -// if validate_logical_file(&batch).is_err() { -// return Some(Err(DeltaTableError::generic( -// "Invalid logical file data encountered.", -// ))); -// } -// self.batch = Some(batch); -// self.current = 0; -// self.next() -// } -// Some(Err(e)) => Some(Err(e)), -// None => None, -// } -// } -// -// fn size_hint(&self) -> (usize, Option) { -// self.inner.size_hint() -// } -// } +impl Iterator for LogicalFileViewIterator +where + I: IntoIterator>, +{ + type Item = DeltaResult; + + fn next(&mut self) -> Option { + if let Some(batch) = &self.batch { + if self.current < batch.num_rows() { + let item = LogicalFileView { + files: batch.clone(), + index: self.current, + }; + self.current += 1; + return Some(Ok(item)); + } + } + match self.inner.next() { + Some(Ok(batch)) => { + if validate_logical_file(&batch).is_err() { + return Some(Err(DeltaTableError::generic( + "Invalid logical file data encountered.", + ))); + } + self.batch = Some(batch); + self.current = 0; + self.next() + } + Some(Err(e)) => Some(Err(e)), + None => None, + } + } + + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } +} pub struct AddViewIterator where @@ -353,6 +335,15 @@ pub(crate) fn validate_add(batch: &RecordBatch) -> DeltaResult<()> { Ok(()) } +fn validate_logical_file(batch: &RecordBatch) -> DeltaResult<()> { + validate_column::(batch, &["path"])?; + validate_column::(batch, &["size"])?; + validate_column::(batch, &["modificationTime"])?; + // validate_column::(batch, &["deletionVector"])?; + // validate_column::(batch, &["fileConstantValues"])?; + Ok(()) +} + fn validate_column<'a, T: Array + 'static>( actions: &'a RecordBatch, col: &'a [impl AsRef], diff --git a/crates/core/src/kernel/snapshot_next/mod.rs b/crates/core/src/kernel/snapshot_next/mod.rs index 0023f4236d..a415f71a99 100644 --- a/crates/core/src/kernel/snapshot_next/mod.rs +++ b/crates/core/src/kernel/snapshot_next/mod.rs @@ -17,7 +17,7 @@ use delta_kernel::scan::scan_row_schema; use delta_kernel::schema::{DataType, Schema}; use delta_kernel::table_properties::TableProperties; use delta_kernel::{EngineData, ExpressionRef, Version}; -use iterators::{AddView, AddViewIterator}; +use iterators::{AddView, AddViewIterator, LogicalFileView, LogicalFileViewIterator}; use itertools::Itertools; use url::Url; @@ -109,6 +109,16 @@ pub trait Snapshot { predicate: Option, ) -> DeltaResult>>>; + fn logical_files_view( + &self, + predicate: Option, + ) -> DeltaResult>>> { + #[allow(deprecated)] + Ok(Box::new(LogicalFileViewIterator::new( + self.logical_files(predicate)?, + ))) + } + /// Get all currently active files in the table. /// /// # Parameters @@ -125,6 +135,10 @@ pub trait Snapshot { predicate: Option, ) -> DeltaResult>>>; + #[deprecated( + since = "0.25.0", + note = "Use `logical_files_view` instead, which returns a more focussed dataset and avoids computational overhead." + )] fn files_view( &self, predicate: Option, @@ -423,6 +437,7 @@ mod tests { test_files_view(snapshot.as_ref())?; test_commit_infos(snapshot.as_ref())?; test_logical_files(snapshot.as_ref())?; + test_logical_files_view(snapshot.as_ref())?; } let mut snapshot = get_snapshot(ctx, TestTables::Checkpoints, Some(0))?.await?; @@ -434,6 +449,7 @@ mod tests { test_files_view(snapshot.as_ref())?; test_commit_infos(snapshot.as_ref())?; test_logical_files(snapshot.as_ref())?; + test_logical_files_view(snapshot.as_ref())?; } Ok(()) @@ -451,6 +467,15 @@ mod tests { Ok(()) } + fn test_logical_files_view(snapshot: &dyn Snapshot) -> TestResult<()> { + let num_files_view = snapshot + .logical_files_view(None)? + .map(|f| f.unwrap().path().to_string()) + .count() as u64; + assert_eq!(num_files_view, snapshot.version()); + Ok(()) + } + fn test_files(snapshot: &dyn Snapshot) -> TestResult<()> { #[allow(deprecated)] let batches = snapshot.files(None)?.collect::, _>>()?; @@ -460,6 +485,7 @@ mod tests { } fn test_files_view(snapshot: &dyn Snapshot) -> TestResult<()> { + #[allow(deprecated)] let num_files_view = snapshot .files_view(None)? .map(|f| f.unwrap().path().to_string()) From 51349f46a93c287299cc0517d04adb3dac3c820c Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Tue, 21 Jan 2025 20:08:31 +0100 Subject: [PATCH 13/14] fix: revert accidentally commited file Signed-off-by: Robert Pack --- crates/core/src/operations/transaction/mod.rs | 30 +++++-------------- 1 file changed, 7 insertions(+), 23 deletions(-) diff --git a/crates/core/src/operations/transaction/mod.rs b/crates/core/src/operations/transaction/mod.rs index d7d99f0dbe..bb173c40b7 100644 --- a/crates/core/src/operations/transaction/mod.rs +++ b/crates/core/src/operations/transaction/mod.rs @@ -80,7 +80,6 @@ use bytes::Bytes; use chrono::Utc; use conflict_checker::ConflictChecker; use futures::future::BoxFuture; -use itertools::Itertools; use object_store::path::Path; use object_store::Error as ObjectStoreError; use serde_json::Value; @@ -269,37 +268,22 @@ pub struct CommitData { impl CommitData { /// Create new data to be committed pub fn new( - actions: Vec, + mut actions: Vec, operation: DeltaOperation, mut app_metadata: HashMap, app_transactions: Vec, ) -> Self { - // When in-commit-timestamps are enabled, we need to ensure that the commit info is the first action - // in the commit log. If it is not present, we need to add it. - // https://github.com/delta-io/delta/blob/master/PROTOCOL.md#writer-requirements-for-in-commit-timestamps - let mut commit_info = None::; - let mut actions = actions - .into_iter() - .inspect(|a| { - if matches!(a, Action::CommitInfo(..)) { - commit_info = Some(a.clone()) - } - }) - .filter(|a| matches!(a, Action::CommitInfo(..))) - .collect_vec(); - if !commit_info.is_some() { - let mut cm = operation.get_commit_info(); - cm.timestamp = Some(Utc::now().timestamp_millis()); + if !actions.iter().any(|a| matches!(a, Action::CommitInfo(..))) { + let mut commit_info = operation.get_commit_info(); + commit_info.timestamp = Some(Utc::now().timestamp_millis()); app_metadata.insert( "clientVersion".to_string(), Value::String(format!("delta-rs.{}", crate_version())), ); - app_metadata.extend(cm.info); - cm.info = app_metadata.clone(); - commit_info = Some(Action::CommitInfo(cm)); + app_metadata.extend(commit_info.info); + commit_info.info = app_metadata.clone(); + actions.push(Action::CommitInfo(commit_info)) } - // safety: we assured commit_info is Some just above. - actions.insert(0, commit_info.unwrap()); for txn in &app_transactions { actions.push(Action::Txn(txn.clone())) From 5d2cf488ff7f69393b1e3c73bf72f1dcdd832768 Mon Sep 17 00:00:00 2001 From: Robert Pack Date: Tue, 21 Jan 2025 21:55:18 +0100 Subject: [PATCH 14/14] fix: tombstone replay Signed-off-by: Robert Pack --- .gitignore | 1 - crates/core/src/kernel/snapshot_next/lazy.rs | 93 ++++++++++++-------- crates/core/src/kernel/snapshot_next/mod.rs | 13 +-- 3 files changed, 58 insertions(+), 49 deletions(-) diff --git a/.gitignore b/.gitignore index ee7ca99235..c5aca6465b 100644 --- a/.gitignore +++ b/.gitignore @@ -22,7 +22,6 @@ __blobstorage__ .githubchangeloggenerator.cache.log .githubchangeloggenerator.cache/ .githubchangeloggenerator* -data .zed/ # Add all Cargo.lock files except for those in binary crates diff --git a/crates/core/src/kernel/snapshot_next/lazy.rs b/crates/core/src/kernel/snapshot_next/lazy.rs index 20ccfb7031..3a203e1ad1 100644 --- a/crates/core/src/kernel/snapshot_next/lazy.rs +++ b/crates/core/src/kernel/snapshot_next/lazy.rs @@ -9,15 +9,16 @@ use delta_kernel::actions::set_transaction::{SetTransactionMap, SetTransactionSc use delta_kernel::actions::{get_log_schema, REMOVE_NAME}; use delta_kernel::actions::{Metadata, Protocol, SetTransaction}; use delta_kernel::engine::arrow_data::ArrowEngineData; +use delta_kernel::engine::arrow_expression::evaluate_expression; use delta_kernel::engine::default::executor::tokio::{ TokioBackgroundExecutor, TokioMultiThreadExecutor, }; use delta_kernel::engine::default::DefaultEngine; use delta_kernel::log_segment::LogSegment; -use delta_kernel::schema::Schema; +use delta_kernel::schema::{DataType, Schema}; use delta_kernel::snapshot::Snapshot as SnapshotInner; use delta_kernel::table_properties::TableProperties; -use delta_kernel::{Engine, Expression, ExpressionRef, Table, Version}; +use delta_kernel::{Engine, Expression, ExpressionHandler, ExpressionRef, Table, Version}; use itertools::Itertools; use object_store::path::Path; use object_store::ObjectStore; @@ -25,7 +26,7 @@ use url::Url; use super::cache::CommitCacheObjectStore; use super::{replay_file_actions, Snapshot}; -use crate::kernel::{Action, CommitInfo}; +use crate::kernel::{Action, CommitInfo, ARROW_HANDLER}; use crate::{DeltaResult, DeltaTableError}; // TODO: avoid repetitive parsing of json stats @@ -94,11 +95,8 @@ impl Snapshot for LazySnapshot { } fn tombstones(&self) -> DeltaResult>>> { - static META_PREDICATE: LazyLock> = LazyLock::new(|| { - Some(Arc::new( - Expression::column([REMOVE_NAME, "path"]).is_not_null(), - )) - }); + static META_PREDICATE: LazyLock = + LazyLock::new(|| Arc::new(Expression::column([REMOVE_NAME, "path"]).is_not_null())); let read_schema = get_log_schema().project(&[REMOVE_NAME])?; Ok(Box::new( self.inner @@ -107,9 +105,23 @@ impl Snapshot for LazySnapshot { self.engine.as_ref(), read_schema.clone(), read_schema, - META_PREDICATE.clone(), + Some(META_PREDICATE.clone()), )? - .map_ok(|(d, _)| Ok(RecordBatch::from(ArrowEngineData::try_from_engine_data(d)?))) + .map_ok(|(d, _)| { + let batch = RecordBatch::from(ArrowEngineData::try_from_engine_data(d)?); + let selection = evaluate_expression( + META_PREDICATE.as_ref(), + &batch, + Some(&DataType::BOOLEAN), + )?; + let filter = selection + .as_any() + .downcast_ref::() + .ok_or_else(|| { + DeltaTableError::generic("failed to downcast to BooleanArray") + })?; + Ok(filter_record_batch(&batch, filter)?) + }) .flatten(), )) } @@ -247,37 +259,46 @@ impl LazySnapshot { #[cfg(test)] mod tests { - use deltalake_test::acceptance::{read_dat_case, TestCaseInfo}; + use delta_kernel::schema::StructType; + use deltalake_test::utils::*; use deltalake_test::TestResult; - use super::super::tests::get_dat_dir; use super::*; async fn load_snapshot() -> TestResult<()> { - // some comment - let mut dat_dir = get_dat_dir(); - dat_dir.push("multi_partitioned"); - - let dat_info: TestCaseInfo = read_dat_case(dat_dir)?; - let table_info = dat_info.table_summary()?; - - let table = Table::try_from_uri(dat_info.table_root()?)?; - - let snapshot = LazySnapshot::try_new( - table, - Arc::new(object_store::local::LocalFileSystem::default()), - None, - ) - .await?; - - assert_eq!(snapshot.version(), table_info.version); - assert_eq!( - ( - snapshot.protocol().min_reader_version(), - snapshot.protocol().min_writer_version() - ), - (table_info.min_reader_version, table_info.min_writer_version) - ); + let ctx = IntegrationContext::new(Box::::default())?; + ctx.load_table(TestTables::Simple).await?; + + let store = ctx + .table_builder(TestTables::Simple) + .build_storage()? + .object_store(None); + let table = Table::try_from_uri("memory:///")?; + let snapshot = LazySnapshot::try_new(table, store, None).await?; + + let schema_string = r#"{"type":"struct","fields":[{"name":"id","type":"long","nullable":true,"metadata":{}}]}"#; + let expected: StructType = serde_json::from_str(schema_string)?; + assert_eq!(snapshot.schema(), &expected); + + let infos = snapshot.commit_infos(None, None)?.collect_vec(); + assert_eq!(infos.len(), 5); + + let tombstones: Vec<_> = snapshot.tombstones()?.try_collect()?; + let num_tombstones = tombstones.iter().map(|b| b.num_rows() as i64).sum::(); + assert_eq!(num_tombstones, 31); + + let expected = vec![ + "part-00000-2befed33-c358-4768-a43c-3eda0d2a499d-c000.snappy.parquet", + "part-00000-c1777d7d-89d9-4790-b38a-6ee7e24456b1-c000.snappy.parquet", + "part-00001-7891c33d-cedc-47c3-88a6-abcfb049d3b4-c000.snappy.parquet", + "part-00004-315835fe-fb44-4562-98f6-5e6cfa3ae45d-c000.snappy.parquet", + "part-00007-3a0e4727-de0d-41b6-81ef-5223cf40f025-c000.snappy.parquet", + ]; + let file_names: Vec<_> = snapshot + .logical_files_view(None)? + .map_ok(|f| f.path().to_owned()) + .try_collect()?; + assert_eq!(file_names, expected); Ok(()) } diff --git a/crates/core/src/kernel/snapshot_next/mod.rs b/crates/core/src/kernel/snapshot_next/mod.rs index a415f71a99..079ceb0298 100644 --- a/crates/core/src/kernel/snapshot_next/mod.rs +++ b/crates/core/src/kernel/snapshot_next/mod.rs @@ -361,24 +361,13 @@ fn scan_as_log_data( #[cfg(test)] mod tests { - use std::{future::Future, path::PathBuf, pin::Pin}; + use std::{future::Future, pin::Pin}; use delta_kernel::Table; use deltalake_test::utils::*; use super::*; - pub(super) fn get_dat_dir() -> PathBuf { - let d = PathBuf::from(env!("CARGO_MANIFEST_DIR")); - let mut rep_root = d - .parent() - .and_then(|p| p.parent()) - .expect("valid directory") - .to_path_buf(); - rep_root.push("dat/out/reader_tests/generated"); - rep_root - } - fn get_lazy( ctx: &IntegrationContext, table: TestTables,