Skip to content

Commit 79c6b0f

Browse files
authored
chore: Python CUDA bridge: CI and buffer handoff ABI (#8618)
CI coverage for the Python CUDA bridge through the vortex-data[cuda] optional-extra path. Extend the private metadata bridge to carry host buffer-export capsules instead of only a buffer count. The base Python package exports repr(C) VortexBufferExport descriptors, and vortex-python-cuda imports them into local BufferHandles before deserializing arrays through its own VortexSession. Tests now cover primitive, nullable, bool, and struct arrays across the bridge, plus the existing CUDA Arrow Device smoke path. Signed-off-by: Alexander Droste <alexander.droste@protonmail.com>
1 parent f2758d3 commit 79c6b0f

13 files changed

Lines changed: 370 additions & 28 deletions

File tree

.github/workflows/ci.yml

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,39 @@ jobs:
132132
uv run --all-packages make html
133133
working-directory: docs/
134134

135+
python-cuda-test:
136+
name: "Python CUDA (test)"
137+
if: github.repository == 'vortex-data/vortex'
138+
runs-on: >-
139+
${{ format('runs-on={0}/runner=gpu/tag=python-cuda-test', github.run_id) }}
140+
timeout-minutes: 30
141+
env:
142+
RUST_LOG: "info,maturin=off,uv=debug"
143+
MATURIN_PEP517_ARGS: "--profile ci"
144+
steps:
145+
- uses: runs-on/action@v2
146+
with:
147+
sccache: s3
148+
- uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7
149+
- uses: ./.github/actions/setup-rust
150+
with:
151+
repo-token: ${{ secrets.GITHUB_TOKEN }}
152+
components: cargo
153+
- name: Pin rustup proxy to repository toolchain
154+
run: |
155+
TOOLCHAIN="$(grep '^channel' rust-toolchain.toml | cut -d '"' -f 2)"
156+
echo "RUSTUP_TOOLCHAIN=$TOOLCHAIN" >> "$GITHUB_ENV"
157+
- name: Install uv
158+
uses: spiraldb/actions/.github/actions/setup-uv@a746510eafaa926484c354541cfc49b2ec06cc63 # 0.18.6
159+
with:
160+
sync: false
161+
162+
- name: Pytest - PyVortex CUDA bridge
163+
run: |
164+
uv run --extra cuda \
165+
pytest --benchmark-disable ../vortex-python-cuda/test/test_native_bridge.py
166+
working-directory: vortex-python/
167+
135168
rust-docs:
136169
name: "Rust (docs)"
137170
needs: duckdb-ready

Cargo.lock

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ members = [
3636
"vortex-ffi",
3737
"fuzz",
3838
"vortex-jni",
39+
"vortex-python-abi",
3940
"vortex-python",
4041
"vortex-python-cuda",
4142
"vortex-tui",

vortex-python-abi/Cargo.toml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
[package]
2+
name = "vortex-python-abi"
3+
authors = { workspace = true }
4+
categories = { workspace = true }
5+
description = "Shared internal ABI types for Vortex Python extension modules."
6+
edition = { workspace = true }
7+
homepage = { workspace = true }
8+
include = { workspace = true }
9+
keywords = { workspace = true }
10+
license = { workspace = true }
11+
publish = false
12+
readme = { workspace = true }
13+
repository = { workspace = true }
14+
rust-version = { workspace = true }
15+
version = { workspace = true }
16+
17+
[lints]
18+
workspace = true

vortex-python-abi/src/lib.rs

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
//! Shared private ABI for Python buffer handoff between `vortex-data` extension modules.
5+
6+
use std::ffi::CStr;
7+
use std::ffi::c_void;
8+
9+
/// Name used for PyCapsules carrying [`VortexBufferExport`] pointers.
10+
pub const BUFFER_EXPORT_CAPSULE_NAME: &CStr = c"vortex_buffer_export";
11+
12+
/// Current version of the [`VortexBufferExport`] ABI.
13+
pub const VORTEX_BUFFER_EXPORT_VERSION: u32 = 1;
14+
15+
/// Buffer kind for host-accessible buffers.
16+
pub const VORTEX_BUFFER_HOST: u32 = 0;
17+
18+
/// Buffer kind for device-accessible buffers.
19+
pub const VORTEX_BUFFER_DEVICE: u32 = 1;
20+
21+
/// C-ABI descriptor for passing buffers between `vortex-data` and optional extension modules.
22+
///
23+
/// This type is shared by Rust crates, but the values are exchanged through Python capsules between
24+
/// independently compiled extension modules. The producer owns allocation details and must provide a
25+
/// `release` callback that releases both `private_data` and the descriptor itself.
26+
#[repr(C)]
27+
pub struct VortexBufferExport {
28+
/// ABI version. Consumers must reject unsupported versions.
29+
pub version: u32,
30+
/// Buffer kind. Consumers may support [`VORTEX_BUFFER_HOST`] or [`VORTEX_BUFFER_DEVICE`].
31+
pub kind: u32,
32+
/// Pointer to the first byte of the exported buffer, or null for empty buffers.
33+
pub ptr: *const u8,
34+
/// Length of the buffer in bytes.
35+
pub len: usize,
36+
/// Required byte alignment of `ptr`.
37+
pub alignment: usize,
38+
/// Device identifier for device buffers, or -1 for host buffers.
39+
pub device_id: i32,
40+
/// Optional synchronization event for device buffers.
41+
pub sync_event: *mut c_void,
42+
/// Producer-owned private data used by `release`.
43+
pub private_data: *mut c_void,
44+
/// Producer-owned release callback. It must release `private_data` and this descriptor.
45+
pub release: Option<unsafe extern "C" fn(*mut VortexBufferExport)>,
46+
}

vortex-python-cuda/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ extension-module = []
2828

2929
[dependencies]
3030
arrow-schema = { workspace = true }
31+
bytes = { workspace = true }
3132
pyo3 = { workspace = true, features = ["abi3", "abi3-py311"] }
3233
vortex = { workspace = true }
3334
vortex-cuda = { workspace = true }
35+
vortex-python-abi = { path = "../vortex-python-abi" }
Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
# pyright: reportMissingModuleSource=false, reportPrivateUsage=false
34

4-
from ._lib import ( # pyright: ignore[reportMissingModuleSource]
5-
_debug_array_metadata_dtype as _debug_array_metadata_dtype, # pyright: ignore[reportPrivateUsage]
6-
)
7-
from ._lib import ( # pyright: ignore[reportMissingModuleSource]
8-
cuda_available,
9-
export_device_array,
10-
)
5+
from . import _lib
6+
7+
_debug_array_metadata_dtype = _lib._debug_array_metadata_dtype
8+
_debug_array_metadata_display_values = _lib._debug_array_metadata_display_values
9+
cuda_available = _lib.cuda_available
10+
export_device_array = _lib.export_device_array
1111

1212
__all__ = ["cuda_available", "export_device_array"]

vortex-python-cuda/python/vortex_cuda/_lib.pyi

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# SPDX-FileCopyrightText: Copyright the Vortex contributors
33

44
def _debug_array_metadata_dtype(array: object) -> str: ...
5+
def _debug_array_metadata_display_values(array: object) -> str: ...
56
def cuda_available() -> bool: ...
67
def export_device_array(
78
array: object, requested_schema: object | None = None, **kwargs: object

vortex-python-cuda/src/lib.rs

Lines changed: 125 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ use vortex::buffer::ByteBuffer;
3535
use vortex::dtype::DType;
3636
use vortex::error::VortexError;
3737
use vortex::error::VortexResult;
38-
use vortex::error::vortex_bail;
3938
use vortex::error::vortex_ensure;
4039
use vortex::error::vortex_err;
4140
use vortex::flatbuffers::FlatBuffer;
@@ -49,12 +48,115 @@ use vortex_cuda::arrow::ArrowDeviceArrayWithSchema;
4948
use vortex_cuda::arrow::DeviceArrayExt;
5049
use vortex_cuda::arrow::release_device_array;
5150
use vortex_cuda::arrow::release_schema;
51+
use vortex_python_abi::BUFFER_EXPORT_CAPSULE_NAME;
52+
use vortex_python_abi::VORTEX_BUFFER_EXPORT_VERSION;
53+
use vortex_python_abi::VORTEX_BUFFER_HOST;
54+
use vortex_python_abi::VortexBufferExport;
5255

5356
const ARROW_SCHEMA_CAPSULE_NAME: &CStr = c_str!("arrow_schema");
5457
const USED_ARROW_SCHEMA_CAPSULE_NAME: &CStr = c_str!("used_arrow_schema");
5558
const ARROW_DEVICE_ARRAY_CAPSULE_NAME: &CStr = c_str!("arrow_device_array");
5659
const USED_ARROW_DEVICE_ARRAY_CAPSULE_NAME: &CStr = c_str!("used_arrow_device_array");
5760

61+
struct BufferExportGuard {
62+
export: NonNull<VortexBufferExport>,
63+
}
64+
65+
impl BufferExportGuard {
66+
fn export(&self) -> &VortexBufferExport {
67+
unsafe { self.export.as_ref() }
68+
}
69+
}
70+
71+
impl AsRef<[u8]> for BufferExportGuard {
72+
fn as_ref(&self) -> &[u8] {
73+
let export = self.export();
74+
if export.len == 0 {
75+
&[]
76+
} else {
77+
unsafe { std::slice::from_raw_parts(export.ptr, export.len) }
78+
}
79+
}
80+
}
81+
82+
impl Drop for BufferExportGuard {
83+
fn drop(&mut self) {
84+
// The producer's release callback owns cleanup of both private data and the descriptor.
85+
let export = unsafe { self.export.as_ref() };
86+
if let Some(release) = export.release {
87+
unsafe { release(self.export.as_ptr()) };
88+
}
89+
}
90+
}
91+
92+
// The guard is moved into `Bytes::from_owner`, which requires `Send + Sync`. After import we disable
93+
// the source capsule destructor and own the C export until this guard is dropped.
94+
unsafe impl Send for BufferExportGuard {}
95+
unsafe impl Sync for BufferExportGuard {}
96+
97+
fn import_buffer_from_capsule(capsule: &Bound<'_, PyCapsule>) -> PyResult<BufferHandle> {
98+
let export_ptr = capsule
99+
.pointer_checked(Some(BUFFER_EXPORT_CAPSULE_NAME))?
100+
.cast::<VortexBufferExport>();
101+
let export = unsafe { export_ptr.as_ref() };
102+
103+
if export.version != VORTEX_BUFFER_EXPORT_VERSION {
104+
return Err(PyValueError::new_err(format!(
105+
"unsupported VortexBufferExport version {}",
106+
export.version
107+
)));
108+
}
109+
if export.kind != VORTEX_BUFFER_HOST {
110+
return Err(PyValueError::new_err(format!(
111+
"unsupported buffer kind {} (only host buffers are supported in metadata bridge)",
112+
export.kind
113+
)));
114+
}
115+
116+
if export.len != 0 && export.ptr.is_null() {
117+
return Err(PyValueError::new_err(
118+
"non-empty VortexBufferExport has null data pointer",
119+
));
120+
}
121+
if export.release.is_none() {
122+
return Err(PyValueError::new_err(
123+
"VortexBufferExport is missing a release callback",
124+
));
125+
}
126+
127+
let len = export.len;
128+
let alignment = vortex::buffer::Alignment::try_from(
129+
u32::try_from(export.alignment)
130+
.map_err(|_| PyValueError::new_err("buffer alignment exceeds u32"))?,
131+
)
132+
.map_err(|e| PyValueError::new_err(e.to_string()))?;
133+
134+
if len != 0 && !alignment.is_ptr_aligned(export.ptr) {
135+
return Err(PyValueError::new_err(format!(
136+
"buffer pointer is not aligned to requested alignment {alignment}"
137+
)));
138+
}
139+
140+
// Transfer ownership of the boxed VortexBufferExport from the producer capsule into the Bytes
141+
// owner below. Otherwise the producer capsule could be dropped before the reconstructed
142+
// BufferHandle, leaving the Bytes owner with a dangling export pointer.
143+
unsafe { ffi::PyCapsule_SetDestructor(capsule.as_ptr(), None) };
144+
if PyErr::occurred(capsule.py()) {
145+
return Err(PyErr::fetch(capsule.py()));
146+
}
147+
148+
let guard = BufferExportGuard { export: export_ptr };
149+
150+
let byte_buffer = if len == 0 {
151+
drop(guard);
152+
ByteBuffer::empty_aligned(alignment)
153+
} else {
154+
ByteBuffer::from(bytes::Bytes::from_owner(guard)).aligned(alignment)
155+
};
156+
157+
Ok(BufferHandle::new_host(byte_buffer))
158+
}
159+
58160
struct ExportedDeviceArray(ArrowDeviceArrayWithSchema);
59161

60162
// The exported Arrow C Device structs own CPU-side metadata plus CUDA device pointers through their
@@ -101,7 +203,7 @@ struct ArrayMetadata {
101203
dtype: Vec<u8>,
102204
len: usize,
103205
metadata: Vec<u8>,
104-
buffer_count: usize,
206+
buffers: Vec<BufferHandle>,
105207
children: Vec<ArrayMetadata>,
106208
}
107209

@@ -147,6 +249,16 @@ fn parse_array_metadata(value: &Bound<'_, PyAny>) -> PyResult<ArrayMetadata> {
147249
)));
148250
}
149251

252+
let buffers = tuple
253+
.get_item(4)?
254+
.cast::<PyList>()?
255+
.iter()
256+
.map(|item| {
257+
let capsule: Bound<'_, PyCapsule> = item.extract()?;
258+
import_buffer_from_capsule(&capsule)
259+
})
260+
.collect::<PyResult<Vec<_>>>()?;
261+
150262
let children = tuple
151263
.get_item(5)?
152264
.cast::<PyList>()?
@@ -159,7 +271,7 @@ fn parse_array_metadata(value: &Bound<'_, PyAny>) -> PyResult<ArrayMetadata> {
159271
dtype: tuple.get_item(1)?.extract()?,
160272
len: tuple.get_item(2)?.extract()?,
161273
metadata: tuple.get_item(3)?.extract()?,
162-
buffer_count: tuple.get_item(4)?.extract()?,
274+
buffers,
163275
children,
164276
})
165277
}
@@ -173,14 +285,6 @@ fn deserialize_metadata_tree(
173285
metadata: &ArrayMetadata,
174286
session: &VortexSession,
175287
) -> VortexResult<ArrayRef> {
176-
if metadata.buffer_count != 0 {
177-
vortex_bail!(
178-
"metadata-only bridge cannot deserialize array {} with {} buffers yet",
179-
metadata.encoding_id,
180-
metadata.buffer_count
181-
);
182-
}
183-
184288
let dtype = dtype_from_metadata(metadata, session)?;
185289
let children = metadata
186290
.children
@@ -194,12 +298,11 @@ fn deserialize_metadata_tree(
194298
.registry()
195299
.find(&encoding_id)
196300
.ok_or_else(|| vortex_err!("Unknown array encoding: {}", metadata.encoding_id))?;
197-
let buffers: &[BufferHandle] = &[];
198301
let decoded = plugin.deserialize(
199302
&dtype,
200303
metadata.len,
201304
&metadata.metadata,
202-
buffers,
305+
&metadata.buffers,
203306
&children,
204307
session,
205308
)?;
@@ -246,6 +349,14 @@ fn _debug_array_metadata_dtype(array: Bound<'_, PyAny>) -> PyResult<String> {
246349
Ok(array.dtype().to_string())
247350
}
248351

352+
/// Return array values after crossing the private vtable-metadata bridge.
353+
#[pyfunction]
354+
fn _debug_array_metadata_display_values(array: Bound<'_, PyAny>) -> PyResult<String> {
355+
let metadata = extract_array_metadata(&array)?;
356+
let array = deserialize_metadata_tree(&metadata, &METADATA_SESSION).map_err(to_py_err)?;
357+
Ok(array.display_values().to_string())
358+
}
359+
249360
/// Export a PyVortex array as Arrow C Device schema and array PyCapsules.
250361
#[pyfunction]
251362
#[pyo3(signature = (array, requested_schema = None, **kwargs))]
@@ -461,6 +572,7 @@ unsafe extern "C" fn release_device_array_capsule(capsule: *mut ffi::PyObject) {
461572
fn _lib(m: &Bound<PyModule>) -> PyResult<()> {
462573
m.add_function(wrap_pyfunction!(cuda_available, m)?)?;
463574
m.add_function(wrap_pyfunction!(_debug_array_metadata_dtype, m)?)?;
575+
m.add_function(wrap_pyfunction!(_debug_array_metadata_display_values, m)?)?;
464576
m.add_function(wrap_pyfunction!(export_device_array, m)?)?;
465577
Ok(())
466578
}

0 commit comments

Comments
 (0)