Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
venv
**/.python-version
__pycache__
uv.lock

# macOS
**/.DS_Store
Expand Down
43 changes: 43 additions & 0 deletions crates/macros/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

[package]
name = "hudi-macros"
version.workspace = true
edition.workspace = true
license.workspace = true
rust-version.workspace = true
keywords.workspace = true
readme = "README.md"
description = "Procedural macros for generating language bindings from Hudi Rust enums"
homepage.workspace = true
repository = "https://github.com/apache/hudi-rs/tree/main/crates/macros/"

[lib]
proc-macro = true

[dependencies]
proc-macro2 = "1.0"
quote = "1.0"
syn = { version = "2.0", features = ["full", "extra-traits"] }
strum = { workspace = true }

[features]
default = []
pyo3 = []
jni = []
cpp = []
26 changes: 26 additions & 0 deletions crates/macros/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<!--
~ Licensed to the Apache Software Foundation (ASF) under one
~ or more contributor license agreements. See the NOTICE file
~ distributed with this work for additional information
~ regarding copyright ownership. The ASF licenses this file
~ to you under the Apache License, Version 2.0 (the
~ "License"); you may not use this file except in compliance
~ with the License. You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing,
~ software distributed under the License is distributed on an
~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
~ KIND, either express or implied. See the License for the
~ specific language governing permissions and limitations
~ under the License.
-->

# The `macros` crate

This crate provides procedural macros for automatically generating language bindings from Hudi Rust enums.

Eliminates duplication by generating binding code from canonical Rust enum definitions instead of manually maintaining separate enum definitions for each language binding.

Currently supports PyO3 (Python) bindings with `#[derive(AutoBind)]` and `#[auto_bind(pyo3)]`.
152 changes: 152 additions & 0 deletions crates/macros/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

//! Procedural macros for generating language bindings from Hudi Rust enums.
//!
//! This crate provides a unified approach to automatically generate language binding code
//! from canonical Rust enum definitions, eliminating the need for manual duplication
//! across different language bindings.

use proc_macro::TokenStream;
use quote::quote;
use syn::{parse_macro_input, Data, DeriveInput, Fields};

mod pyo3_generator;

/// Automatically generate language bindings for Rust enums.
///
/// This macro supports multiple language binding targets via the `#[auto_bind(...)]` attribute.
/// Currently supported targets:
/// - `pyo3`: Generate PyO3 Python bindings
///
/// # Example
///
/// ```rust,ignore
/// use hudi_bindings_macros::AutoBind;
/// use pyo3::prelude::*;
///
/// #[derive(Clone, Debug, AutoBind)]
/// #[auto_bind(pyo3)]
/// #[pyclass(name = "HudiTableConfig")]
/// pub struct PyHudiTableConfig {
/// inner: HudiTableConfig,
/// }
/// ```
///
/// # Requirements
///
/// The inner enum must:
/// - Implement `strum::IntoEnumIterator` (via `#[derive(EnumIter)]`)
/// - Implement `AsRef<str>` for string conversion
/// - Implement `Debug` for variant name extraction
///
/// The wrapper struct must:
/// - Have an `inner` field containing the enum
/// - Include appropriate binding-specific attributes (e.g., `#[pyclass]` for PyO3)
#[proc_macro_derive(AutoBind, attributes(auto_bind))]
pub fn derive_auto_bind(input: TokenStream) -> TokenStream {
let input = parse_macro_input!(input as DeriveInput);

let binding_targets = extract_binding_targets(&input);

if binding_targets.is_empty() {
panic!("AutoBind macro requires at least one #[auto_bind(...)] attribute. Supported targets: pyo3");
}

let wrapper_name = &input.ident;

let inner_enum_type = extract_inner_enum_type(&input);

let mut generated_code = quote! {};

for target in binding_targets {
match target.as_str() {
"pyo3" => {
let pyo3_code =
pyo3_generator::generate_pyo3_binding(wrapper_name, inner_enum_type);
generated_code.extend(pyo3_code);
}
_ => panic!(
"Unsupported binding target: {}. Supported targets: pyo3",
target
),
}
}

TokenStream::from(generated_code)
}

/// Extract binding targets from #[auto_bind(...)] attributes
fn extract_binding_targets(input: &DeriveInput) -> Vec<String> {
let mut targets = Vec::new();

for attr in &input.attrs {
if !attr.path().is_ident("auto_bind") {
continue;
}

match &attr.meta {
syn::Meta::Path(_) => {
targets.push("pyo3".to_string());
}
syn::Meta::List(meta_list) => {
// Handle #[auto_bind(pyo3, jni, etc.)]
let result = meta_list.parse_args_with(
syn::punctuated::Punctuated::<syn::Path, syn::Token![,]>::parse_terminated,
);
if let Ok(paths) = result {
for path in paths {
if let Some(ident) = path.get_ident() {
targets.push(ident.to_string());
}
}
}
}
syn::Meta::NameValue(_) => {
panic!("auto_bind attribute does not support name-value syntax");
}
}
}

targets
}

/// Extract the inner enum type from the wrapper struct
fn extract_inner_enum_type(input: &DeriveInput) -> &syn::Type {
match &input.data {
Data::Struct(data) => match &data.fields {
Fields::Named(fields) => {
fields
.named
.iter()
.find(|field| {
field
.ident
.as_ref()
.map(|ident| ident == "inner")
.unwrap_or(false)
})
.map(|field| &field.ty)
.expect("AutoBind requires a struct with an 'inner' field")
}
_ => panic!("AutoBind requires a struct with named fields"),
},
_ => panic!("AutoBind can only be used on structs"),
}
}
119 changes: 119 additions & 0 deletions crates/macros/src/pyo3_generator.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

//! PyO3 binding code generation for Rust enums.

use proc_macro2::TokenStream;
use quote::quote;

/// Generate PyO3 binding code for an enum wrapper struct.
///
/// This generates:
/// - A `value` getter property that returns the configuration key string via `AsRef<str>`
/// - An `all_variants` class method that returns all enum variants via `strum::IntoEnumIterator`
/// - String representation methods (`__str__`, `__repr__`)
/// - Equality comparison method (`__eq__`)
/// - A utility method `get_class_attributes` for creating class attributes at runtime
pub fn generate_pyo3_binding(
wrapper_name: &syn::Ident,
inner_enum_type: &syn::Type,
) -> TokenStream {
quote! {
impl #wrapper_name {
/// Get all enum variants as a map of SCREAMING_SNAKE_CASE names to instances.
///
/// This is used for runtime class attribute injection in Python modules.
pub fn get_class_attributes() -> std::collections::HashMap<&'static str, #wrapper_name> {
use ::strum::IntoEnumIterator;
let mut attrs = std::collections::HashMap::new();

for variant in <#inner_enum_type>::iter() {
let variant_name = format!("{:?}", variant);
let const_name = Self::convert_to_screaming_snake_case(&variant_name);
// We need to leak the string to get a 'static reference
let static_name: &'static str = Box::leak(const_name.into_boxed_str());
attrs.insert(static_name, #wrapper_name { inner: variant });
}

attrs
}

/// Convert PascalCase variant names to SCREAMING_SNAKE_CASE.
///
/// Example: `BaseFileFormat` -> `BASE_FILE_FORMAT`
fn convert_to_screaming_snake_case(input: &str) -> String {
let mut result = String::new();
let mut chars = input.chars().peekable();

while let Some(ch) = chars.next() {
if ch.is_uppercase() && !result.is_empty() {
// Add underscore before uppercase letters (except the first one)
if chars.peek().map_or(false, |next_ch| next_ch.is_lowercase()) {
result.push('_');
}
}
result.push(ch.to_uppercase().next().unwrap());
}

result
}
}

#[::pyo3::pymethods]
impl #wrapper_name {
#[getter]
fn value(&self) -> String {
self.inner.as_ref().to_string()
}

/// Get all enum variants as a list.
///
/// This is exposed as a Python class method that can be called as:
/// `HudiTableConfig.all_variants()`
#[classmethod]
fn all_variants(_cls: &::pyo3::Bound<'_, ::pyo3::types::PyType>) -> Vec<#wrapper_name> {
use ::strum::IntoEnumIterator;
<#inner_enum_type>::iter()
.map(|variant| #wrapper_name { inner: variant })
.collect()
}

/// Python `repr()` representation.
///
/// Returns a string like `PyHudiTableConfig(hoodie.table.name)`
fn __repr__(&self) -> String {
format!("{}({})", stringify!(#wrapper_name), self.value())
}

/// Python `str()` representation.
///
/// Returns the configuration key string directly.
fn __str__(&self) -> String {
self.value()
}

/// Python equality comparison.
///
/// Two enum instances are equal if they represent the same variant.
fn __eq__(&self, other: &Self) -> bool {
std::mem::discriminant(&self.inner) == std::mem::discriminant(&other.inner)
}
}
}
}
2 changes: 2 additions & 0 deletions python/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ doc = false

[dependencies]
hudi = { path = "../crates/hudi"}
hudi-macros = { path = "../crates/macros", features = ["pyo3"] }
strum = { workspace = true }
# arrow
arrow = { workspace = true, features = ["pyarrow"] }

Expand Down
4 changes: 4 additions & 0 deletions python/hudi/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@
HudiFileGroupReader,
HudiFileSlice,
HudiInstant,
HudiReadConfig,
HudiTable,
HudiTableConfig,
HudiTimeline,
)
from hudi._internal import __version__ as __version__
Expand All @@ -32,7 +34,9 @@
"HudiFileGroupReader",
"HudiFileSlice",
"HudiInstant",
"HudiReadConfig",
"HudiTable",
"HudiTableBuilder",
"HudiTableConfig",
"HudiTimeline",
]
Loading