Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RFC] feat!: kernel-based log replay #3137

Draft
wants to merge 14 commits into
base: main
Choose a base branch
from
Draft
26 changes: 26 additions & 0 deletions .github/actions/load-dat/action.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
name: Delta Acceptance Tests
description: Load Delta Lake acceptance test data

inputs:
version:
description: "The Python version to set up"
required: false
default: "0.0.3"

target-directory:
description: target directory for acceptance test data
required: false
default: ${{ github.workspace }}/dat

runs:
using: composite

steps:
- name: load DAT
shell: bash
run: |
rm -rf ${{ inputs.target-directory }}
curl -OL https://github.com/delta-incubator/dat/releases/download/v${{ inputs.version }}/deltalake-dat-v${{ inputs.version }}.tar.gz
mkdir -p ${{ inputs.target-directory }}
tar --no-same-permissions -xzf deltalake-dat-v${{ inputs.version }}.tar.gz --directory ${{ inputs.target-directory }}
rm deltalake-dat-v${{ inputs.version }}.tar.gz
4 changes: 2 additions & 2 deletions .github/actions/setup-env/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@ description: "Set up Python, virtual environment, and Rust toolchain"
inputs:
python-version:
description: "The Python version to set up"
required: true
required: false
default: "3.10"

rust-toolchain:
description: "The Rust toolchain to set up"
required: true
required: false
default: "stable"

runs:
Expand Down
17 changes: 11 additions & 6 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
uses: actions-rs/toolchain@v1
with:
profile: default
toolchain: '1.81'
toolchain: "1.81"
override: true

- name: Format
Expand All @@ -42,7 +42,7 @@ jobs:
uses: actions-rs/toolchain@v1
with:
profile: default
toolchain: '1.81'
toolchain: "1.81"
override: true

- name: build and lint with clippy
Expand Down Expand Up @@ -79,9 +79,12 @@ jobs:
uses: actions-rs/toolchain@v1
with:
profile: default
toolchain: '1.81'
toolchain: "1.81"
override: true

- name: Load DAT data
uses: ./.github/actions/load-dat

- name: Run tests
run: cargo test --verbose --features ${{ env.DEFAULT_FEATURES }}

Expand Down Expand Up @@ -114,7 +117,7 @@ jobs:
uses: actions-rs/toolchain@v1
with:
profile: default
toolchain: '1.81'
toolchain: "1.81"
override: true

# Install Java and Hadoop for HDFS integration tests
Expand All @@ -129,6 +132,9 @@ jobs:
tar -xf hadoop-3.4.0.tar.gz -C $GITHUB_WORKSPACE
echo "$GITHUB_WORKSPACE/hadoop-3.4.0/bin" >> $GITHUB_PATH

- name: Load DAT data
uses: ./.github/actions/load-dat

- name: Start emulated services
run: docker compose up -d

Expand Down Expand Up @@ -160,7 +166,7 @@ jobs:
uses: actions-rs/toolchain@v1
with:
profile: default
toolchain: '1.81'
toolchain: "1.81"
override: true

- name: Download Lakectl
Expand All @@ -175,4 +181,3 @@ jobs:
- name: Run tests with rustls (default)
run: |
cargo test --features integration_test_lakefs,lakefs,datafusion

10 changes: 9 additions & 1 deletion .github/workflows/codecov.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,25 @@ jobs:
CARGO_TERM_COLOR: always
steps:
- uses: actions/checkout@v4

- name: Install rust
uses: actions-rs/toolchain@v1
with:
profile: default
toolchain: '1.81'
toolchain: "1.81"
override: true

- name: Install cargo-llvm-cov
uses: taiki-e/install-action@cargo-llvm-cov

- uses: Swatinem/rust-cache@v2

- name: Load DAT data
uses: ./.github/actions/load-dat

- name: Generate code coverage
run: cargo llvm-cov --features ${DEFAULT_FEATURES} --workspace --codecov --output-path codecov.json -- --skip read_table_version_hdfs --skip test_read_tables_lakefs

- name: Upload coverage to Codecov
uses: codecov/codecov-action@v4
with:
Expand Down
5 changes: 3 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ __blobstorage__
.githubchangeloggenerator.cache.log
.githubchangeloggenerator.cache/
.githubchangeloggenerator*
data
.zed/

# Add all Cargo.lock files except for those in binary crates
Cargo.lock
Expand All @@ -32,4 +32,5 @@ Cargo.lock

justfile
site
__pycache__
__pycache__
dat/
16 changes: 11 additions & 5 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,15 @@ debug = true
debug = "line-tables-only"

[workspace.dependencies]
delta_kernel = { version = "=0.6.0", features = ["default-engine"] }
#delta_kernel = { path = "../delta-kernel-rs/kernel", features = ["sync-engine"] }
# delta_kernel = { version = "=0.6.0", features = ["default-engine"] }
# delta_kernel = { path = "../delta-kernel-rs/kernel", features = [
# "default-engine",
# "developer-visibility",
# ] }
delta_kernel = { git = "https://github.com/roeap/delta-kernel-rs", rev = "50c1c023b7e9d60df69f6e592b91e4cc06a5a0b1", features = [
"default-engine",
"developer-visibility",
] }

# arrow
arrow = { version = "53" }
Expand All @@ -41,7 +48,7 @@ arrow-ord = { version = "53" }
arrow-row = { version = "53" }
arrow-schema = { version = "53" }
arrow-select = { version = "53" }
object_store = { version = "0.11.2" , features = ["cloud"]}
object_store = { version = "0.11.2", features = ["cloud"] }
parquet = { version = "53" }

# datafusion
Expand All @@ -59,7 +66,7 @@ datafusion-sql = { version = "44" }
# serde
serde = { version = "1.0.194", features = ["derive"] }
serde_json = "1"
strum = { version = "*"}
strum = { version = "*" }


# "stdlib"
Expand All @@ -77,4 +84,3 @@ async-trait = { version = "0.1" }
futures = { version = "0.3" }
tokio = { version = "1" }
num_cpus = { version = "1" }

17 changes: 11 additions & 6 deletions crates/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ rust-version.workspace = true
features = ["datafusion", "json", "unity-experimental"]

[dependencies]
delta_kernel.workspace = true
delta_kernel = { workspace = true }

# arrow
arrow = { workspace = true }
Expand All @@ -29,10 +29,7 @@ arrow-ord = { workspace = true }
arrow-row = { workspace = true }
arrow-schema = { workspace = true, features = ["serde"] }
arrow-select = { workspace = true }
parquet = { workspace = true, features = [
"async",
"object_store",
] }
parquet = { workspace = true, features = ["async", "object_store"] }
pin-project-lite = "^0.2.7"

# datafusion
Expand All @@ -49,7 +46,7 @@ datafusion-functions-aggregate = { workspace = true, optional = true }
# serde
serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true }
strum = { workspace = true}
strum = { workspace = true }

# "stdlib"
bytes = { workspace = true }
Expand All @@ -76,6 +73,9 @@ tokio = { workspace = true, features = [
"parking_lot",
] }

# cahce
quick_cache = { version = "0.6.9" }

# other deps (these should be organized and pulled into workspace.dependencies as necessary)
cfg-if = "1"
dashmap = "6"
Expand All @@ -101,6 +101,7 @@ humantime = { version = "2.1.0" }
[dev-dependencies]
criterion = "0.5"
ctor = "0"
datatest-stable = "0.2"
deltalake-test = { path = "../test", features = ["datafusion"] }
dotenvy = "0"
fs_extra = "1.2.0"
Expand Down Expand Up @@ -129,3 +130,7 @@ datafusion = [
datafusion-ext = ["datafusion"]
json = ["parquet/json"]
python = ["arrow/pyarrow"]

[[test]]
name = "dat"
harness = false
1 change: 1 addition & 0 deletions crates/core/src/kernel/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ pub mod error;
pub mod models;
pub mod scalars;
mod snapshot;
pub mod snapshot_next;

pub use error::*;
pub use models::*;
Expand Down
Loading
Loading