diff --git a/.gitignore b/.gitignore index 35f0b009..cbdc688a 100644 --- a/.gitignore +++ b/.gitignore @@ -27,6 +27,7 @@ venv **/.python-version __pycache__ +uv.lock # macOS **/.DS_Store diff --git a/crates/macros/Cargo.toml b/crates/macros/Cargo.toml new file mode 100644 index 00000000..080366c2 --- /dev/null +++ b/crates/macros/Cargo.toml @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[package] +name = "hudi-macros" +version.workspace = true +edition.workspace = true +license.workspace = true +rust-version.workspace = true +keywords.workspace = true +readme = "README.md" +description = "Procedural macros for generating language bindings from Hudi Rust enums" +homepage.workspace = true +repository = "https://github.com/apache/hudi-rs/tree/main/crates/macros/" + +[lib] +proc-macro = true + +[dependencies] +proc-macro2 = "1.0" +quote = "1.0" +syn = { version = "2.0", features = ["full", "extra-traits"] } +strum = { workspace = true } + +[features] +default = [] +pyo3 = [] +jni = [] +cpp = [] \ No newline at end of file diff --git a/crates/macros/README.md b/crates/macros/README.md new file mode 100644 index 00000000..4d9031c1 --- /dev/null +++ b/crates/macros/README.md @@ -0,0 +1,26 @@ + + +# The `macros` crate + +This crate provides procedural macros for automatically generating language bindings from Hudi Rust enums. + +Eliminates duplication by generating binding code from canonical Rust enum definitions instead of manually maintaining separate enum definitions for each language binding. + +Currently supports PyO3 (Python) bindings with `#[derive(AutoBind)]` and `#[auto_bind(pyo3)]`. \ No newline at end of file diff --git a/crates/macros/src/lib.rs b/crates/macros/src/lib.rs new file mode 100644 index 00000000..3a6398a3 --- /dev/null +++ b/crates/macros/src/lib.rs @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +//! Procedural macros for generating language bindings from Hudi Rust enums. +//! +//! This crate provides a unified approach to automatically generate language binding code +//! from canonical Rust enum definitions, eliminating the need for manual duplication +//! across different language bindings. + +use proc_macro::TokenStream; +use quote::quote; +use syn::{parse_macro_input, Data, DeriveInput, Fields}; + +mod pyo3_generator; + +/// Automatically generate language bindings for Rust enums. +/// +/// This macro supports multiple language binding targets via the `#[auto_bind(...)]` attribute. +/// Currently supported targets: +/// - `pyo3`: Generate PyO3 Python bindings +/// +/// # Example +/// +/// ```rust,ignore +/// use hudi_bindings_macros::AutoBind; +/// use pyo3::prelude::*; +/// +/// #[derive(Clone, Debug, AutoBind)] +/// #[auto_bind(pyo3)] +/// #[pyclass(name = "HudiTableConfig")] +/// pub struct PyHudiTableConfig { +/// inner: HudiTableConfig, +/// } +/// ``` +/// +/// # Requirements +/// +/// The inner enum must: +/// - Implement `strum::IntoEnumIterator` (via `#[derive(EnumIter)]`) +/// - Implement `AsRef` for string conversion +/// - Implement `Debug` for variant name extraction +/// +/// The wrapper struct must: +/// - Have an `inner` field containing the enum +/// - Include appropriate binding-specific attributes (e.g., `#[pyclass]` for PyO3) +#[proc_macro_derive(AutoBind, attributes(auto_bind))] +pub fn derive_auto_bind(input: TokenStream) -> TokenStream { + let input = parse_macro_input!(input as DeriveInput); + + let binding_targets = extract_binding_targets(&input); + + if binding_targets.is_empty() { + panic!("AutoBind macro requires at least one #[auto_bind(...)] attribute. Supported targets: pyo3"); + } + + let wrapper_name = &input.ident; + + let inner_enum_type = extract_inner_enum_type(&input); + + let mut generated_code = quote! {}; + + for target in binding_targets { + match target.as_str() { + "pyo3" => { + let pyo3_code = + pyo3_generator::generate_pyo3_binding(wrapper_name, inner_enum_type); + generated_code.extend(pyo3_code); + } + _ => panic!( + "Unsupported binding target: {}. Supported targets: pyo3", + target + ), + } + } + + TokenStream::from(generated_code) +} + +/// Extract binding targets from #[auto_bind(...)] attributes +fn extract_binding_targets(input: &DeriveInput) -> Vec { + let mut targets = Vec::new(); + + for attr in &input.attrs { + if !attr.path().is_ident("auto_bind") { + continue; + } + + match &attr.meta { + syn::Meta::Path(_) => { + targets.push("pyo3".to_string()); + } + syn::Meta::List(meta_list) => { + // Handle #[auto_bind(pyo3, jni, etc.)] + let result = meta_list.parse_args_with( + syn::punctuated::Punctuated::::parse_terminated, + ); + if let Ok(paths) = result { + for path in paths { + if let Some(ident) = path.get_ident() { + targets.push(ident.to_string()); + } + } + } + } + syn::Meta::NameValue(_) => { + panic!("auto_bind attribute does not support name-value syntax"); + } + } + } + + targets +} + +/// Extract the inner enum type from the wrapper struct +fn extract_inner_enum_type(input: &DeriveInput) -> &syn::Type { + match &input.data { + Data::Struct(data) => match &data.fields { + Fields::Named(fields) => { + fields + .named + .iter() + .find(|field| { + field + .ident + .as_ref() + .map(|ident| ident == "inner") + .unwrap_or(false) + }) + .map(|field| &field.ty) + .expect("AutoBind requires a struct with an 'inner' field") + } + _ => panic!("AutoBind requires a struct with named fields"), + }, + _ => panic!("AutoBind can only be used on structs"), + } +} diff --git a/crates/macros/src/pyo3_generator.rs b/crates/macros/src/pyo3_generator.rs new file mode 100644 index 00000000..0152e6c6 --- /dev/null +++ b/crates/macros/src/pyo3_generator.rs @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +//! PyO3 binding code generation for Rust enums. + +use proc_macro2::TokenStream; +use quote::quote; + +/// Generate PyO3 binding code for an enum wrapper struct. +/// +/// This generates: +/// - A `value` getter property that returns the configuration key string via `AsRef` +/// - An `all_variants` class method that returns all enum variants via `strum::IntoEnumIterator` +/// - String representation methods (`__str__`, `__repr__`) +/// - Equality comparison method (`__eq__`) +/// - A utility method `get_class_attributes` for creating class attributes at runtime +pub fn generate_pyo3_binding( + wrapper_name: &syn::Ident, + inner_enum_type: &syn::Type, +) -> TokenStream { + quote! { + impl #wrapper_name { + /// Get all enum variants as a map of SCREAMING_SNAKE_CASE names to instances. + /// + /// This is used for runtime class attribute injection in Python modules. + pub fn get_class_attributes() -> std::collections::HashMap<&'static str, #wrapper_name> { + use ::strum::IntoEnumIterator; + let mut attrs = std::collections::HashMap::new(); + + for variant in <#inner_enum_type>::iter() { + let variant_name = format!("{:?}", variant); + let const_name = Self::convert_to_screaming_snake_case(&variant_name); + // We need to leak the string to get a 'static reference + let static_name: &'static str = Box::leak(const_name.into_boxed_str()); + attrs.insert(static_name, #wrapper_name { inner: variant }); + } + + attrs + } + + /// Convert PascalCase variant names to SCREAMING_SNAKE_CASE. + /// + /// Example: `BaseFileFormat` -> `BASE_FILE_FORMAT` + fn convert_to_screaming_snake_case(input: &str) -> String { + let mut result = String::new(); + let mut chars = input.chars().peekable(); + + while let Some(ch) = chars.next() { + if ch.is_uppercase() && !result.is_empty() { + // Add underscore before uppercase letters (except the first one) + if chars.peek().map_or(false, |next_ch| next_ch.is_lowercase()) { + result.push('_'); + } + } + result.push(ch.to_uppercase().next().unwrap()); + } + + result + } + } + + #[::pyo3::pymethods] + impl #wrapper_name { + #[getter] + fn value(&self) -> String { + self.inner.as_ref().to_string() + } + + /// Get all enum variants as a list. + /// + /// This is exposed as a Python class method that can be called as: + /// `HudiTableConfig.all_variants()` + #[classmethod] + fn all_variants(_cls: &::pyo3::Bound<'_, ::pyo3::types::PyType>) -> Vec<#wrapper_name> { + use ::strum::IntoEnumIterator; + <#inner_enum_type>::iter() + .map(|variant| #wrapper_name { inner: variant }) + .collect() + } + + /// Python `repr()` representation. + /// + /// Returns a string like `PyHudiTableConfig(hoodie.table.name)` + fn __repr__(&self) -> String { + format!("{}({})", stringify!(#wrapper_name), self.value()) + } + + /// Python `str()` representation. + /// + /// Returns the configuration key string directly. + fn __str__(&self) -> String { + self.value() + } + + /// Python equality comparison. + /// + /// Two enum instances are equal if they represent the same variant. + fn __eq__(&self, other: &Self) -> bool { + std::mem::discriminant(&self.inner) == std::mem::discriminant(&other.inner) + } + } + } +} diff --git a/python/Cargo.toml b/python/Cargo.toml index a87a987d..f437b5a4 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -34,6 +34,8 @@ doc = false [dependencies] hudi = { path = "../crates/hudi"} +hudi-macros = { path = "../crates/macros", features = ["pyo3"] } +strum = { workspace = true } # arrow arrow = { workspace = true, features = ["pyarrow"] } diff --git a/python/hudi/__init__.py b/python/hudi/__init__.py index 22e57ee0..4c3c5237 100644 --- a/python/hudi/__init__.py +++ b/python/hudi/__init__.py @@ -21,7 +21,9 @@ HudiFileGroupReader, HudiFileSlice, HudiInstant, + HudiReadConfig, HudiTable, + HudiTableConfig, HudiTimeline, ) from hudi._internal import __version__ as __version__ @@ -32,7 +34,9 @@ "HudiFileGroupReader", "HudiFileSlice", "HudiInstant", + "HudiReadConfig", "HudiTable", "HudiTableBuilder", + "HudiTableConfig", "HudiTimeline", ] diff --git a/python/hudi/_internal.pyi b/python/hudi/_internal.pyi index 9642e4e1..9e7e41b0 100644 --- a/python/hudi/_internal.pyi +++ b/python/hudi/_internal.pyi @@ -15,12 +15,45 @@ # specific language governing permissions and limitations # under the License. from dataclasses import dataclass +from enum import Enum from typing import Dict, List, Optional, Tuple import pyarrow # type: ignore __version__: str +class HudiTableConfig(Enum): + """Configurations for Hudi tables, most of them are persisted in `hoodie.properties`.""" + + BASE_FILE_FORMAT = "hoodie.table.base.file.format" + BASE_PATH = "hoodie.base.path" + CHECKSUM = "hoodie.table.checksum" + CREATE_SCHEMA = "hoodie.table.create.schema" + DATABASE_NAME = "hoodie.database.name" + DROPS_PARTITION_FIELDS = "hoodie.datasource.write.drop.partition.columns" + IS_HIVE_STYLE_PARTITIONING = "hoodie.datasource.write.hive_style_partitioning" + IS_PARTITION_PATH_URLENCODED = "hoodie.datasource.write.partitionpath.urlencode" + KEY_GENERATOR_CLASS = "hoodie.table.keygenerator.class" + PARTITION_FIELDS = "hoodie.table.partition.fields" + PRECOMBINE_FIELD = "hoodie.table.precombine.field" + POPULATES_META_FIELDS = "hoodie.populate.meta.fields" + RECORD_KEY_FIELDS = "hoodie.table.recordkey.fields" + RECORD_MERGE_STRATEGY = "hoodie.table.record.merge.strategy" + TABLE_NAME = "hoodie.table.name" + TABLE_TYPE = "hoodie.table.type" + TABLE_VERSION = "hoodie.table.version" + TIMELINE_LAYOUT_VERSION = "hoodie.timeline.layout.version" + TIMELINE_TIMEZONE = "hoodie.table.timeline.timezone" + +class HudiReadConfig(Enum): + """Configurations for reading Hudi tables.""" + + FILE_GROUP_START_TIMESTAMP = "hoodie.read.file_group.start_timestamp" + FILE_GROUP_END_TIMESTAMP = "hoodie.read.file_group.end_timestamp" + INPUT_PARTITIONS = "hoodie.read.input.partitions" + LISTING_PARALLELISM = "hoodie.read.listing.parallelism" + USE_READ_OPTIMIZED_MODE = "hoodie.read.use.read_optimized.mode" + @dataclass(init=False) class HudiFileGroupReader: """ diff --git a/python/hudi/table/builder.py b/python/hudi/table/builder.py index 4ed9b943..4f161f61 100644 --- a/python/hudi/table/builder.py +++ b/python/hudi/table/builder.py @@ -15,9 +15,9 @@ # specific language governing permissions and limitations # under the License. from dataclasses import dataclass, field -from typing import Dict, Optional +from typing import Dict, Optional, Union -from hudi._internal import HudiTable, build_hudi_table +from hudi._internal import HudiReadConfig, HudiTable, HudiTableConfig, build_hudi_table @dataclass @@ -57,18 +57,21 @@ def _add_options( target_attr = getattr(self, f"{category}_options") if category else self.options target_attr.update(options) - def with_hudi_option(self, k: str, v: str) -> "HudiTableBuilder": + def with_hudi_option( + self, k: Union[str, HudiTableConfig, HudiReadConfig], v: str + ) -> "HudiTableBuilder": """ Adds a Hudi option to the builder. Parameters: - k (str): The key of the option. + k (Union[str, HudiTableConfig, HudiReadConfig]): The key of the option. Can be a string or enum. v (str): The value of the option. Returns: HudiTableBuilder: The builder instance. """ - self._add_options({k: v}, "hudi") + key = k.value if isinstance(k, (HudiTableConfig, HudiReadConfig)) else k + self._add_options({key: v}, "hudi") return self def with_hudi_options(self, hudi_options: Dict[str, str]) -> "HudiTableBuilder": @@ -113,18 +116,21 @@ def with_storage_options( self._add_options(storage_options, "storage") return self - def with_option(self, k: str, v: str) -> "HudiTableBuilder": + def with_option( + self, k: Union[str, HudiTableConfig, HudiReadConfig], v: str + ) -> "HudiTableBuilder": """ Adds a generic option to the builder. Parameters: - k (str): The key of the option. + k (Union[str, HudiTableConfig, HudiReadConfig]): The key of the option. Can be a string or enum. v (str): The value of the option. Returns: HudiTableBuilder: The builder instance. """ - self._add_options({k: v}) + key = k.value if isinstance(k, (HudiTableConfig, HudiReadConfig)) else k + self._add_options({key: v}) return self def with_options(self, options: Dict[str, str]) -> "HudiTableBuilder": diff --git a/python/src/internal.rs b/python/src/internal.rs index 45f545b1..805725dc 100644 --- a/python/src/internal.rs +++ b/python/src/internal.rs @@ -26,6 +26,8 @@ use tokio::runtime::Runtime; #[cfg(feature = "datafusion")] use datafusion::error::DataFusionError; +use hudi::config::read::HudiReadConfig; +use hudi::config::table::HudiTableConfig; use hudi::error::CoreError; use hudi::file_group::file_slice::FileSlice; use hudi::file_group::reader::FileGroupReader; @@ -35,6 +37,7 @@ use hudi::table::builder::TableBuilder; use hudi::table::Table; use hudi::timeline::instant::Instant; use hudi::timeline::Timeline; +use hudi_macros::AutoBind; use pyo3::exceptions::PyException; use pyo3::{create_exception, pyclass, pyfunction, pymethods, PyErr, PyObject, PyResult, Python}; use std::error::Error; @@ -603,6 +606,22 @@ pub fn build_hudi_table( Ok(HudiTable { inner }) } +#[cfg(not(tarpaulin_include))] +#[derive(Clone, Debug, AutoBind)] +#[auto_bind(pyo3)] +#[pyclass(name = "HudiTableConfig")] +pub struct PyHudiTableConfig { + inner: HudiTableConfig, +} + +#[cfg(not(tarpaulin_include))] +#[derive(Clone, Debug, AutoBind)] +#[auto_bind(pyo3)] +#[pyclass(name = "HudiReadConfig")] +pub struct PyHudiReadConfig { + inner: HudiReadConfig, +} + #[cfg(not(tarpaulin_include))] pub fn rt() -> &'static Runtime { static TOKIO_RT: OnceLock = OnceLock::new(); diff --git a/python/src/lib.rs b/python/src/lib.rs index a962a428..ca9ff8fb 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -28,13 +28,28 @@ mod datafusion_internal; fn _internal(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> { m.add("__version__", env!("CARGO_PKG_VERSION"))?; - use internal::{HudiFileGroupReader, HudiFileSlice, HudiInstant, HudiTable, HudiTimeline}; + use internal::{ + HudiFileGroupReader, HudiFileSlice, HudiInstant, HudiTable, HudiTimeline, PyHudiReadConfig, + PyHudiTableConfig, + }; m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; m.add_class::()?; + m.add_class::()?; m.add_class::()?; + let table_config_type = m.getattr("HudiTableConfig")?; + for (name, instance) in PyHudiTableConfig::get_class_attributes() { + table_config_type.setattr(name, instance)?; + } + + let read_config_type = m.getattr("HudiReadConfig")?; + for (name, instance) in PyHudiReadConfig::get_class_attributes() { + read_config_type.setattr(name, instance)?; + } + #[cfg(feature = "datafusion")] { use datafusion_internal::HudiDataFusionDataSource; diff --git a/python/tests/test_table_builder.py b/python/tests/test_table_builder.py index 6a0cb220..3db516af 100644 --- a/python/tests/test_table_builder.py +++ b/python/tests/test_table_builder.py @@ -18,7 +18,7 @@ import pyarrow as pa import pytest -from hudi import HudiTableBuilder +from hudi import HudiReadConfig, HudiTableBuilder, HudiTableConfig @pytest.fixture @@ -141,3 +141,221 @@ def test_setting_table_options( table.hudi_options().get("hoodie.read.file_group.start_timestamp") == "resolved value" ) + + +def test_with_hudi_option_enum(builder): + """Test that HudiTableConfig and HudiReadConfig enums work with with_hudi_option.""" + builder.with_hudi_option(HudiTableConfig.TABLE_NAME, "test_table") + assert builder.hudi_options["hoodie.table.name"] == "test_table" + + builder.with_hudi_option(HudiReadConfig.INPUT_PARTITIONS, "5") + assert builder.hudi_options["hoodie.read.input.partitions"] == "5" + + +def test_with_option_enum(builder): + """Test that HudiTableConfig and HudiReadConfig enums work with with_option.""" + builder.with_option(HudiTableConfig.BASE_FILE_FORMAT, "parquet") + assert builder.options["hoodie.table.base.file.format"] == "parquet" + + builder.with_option(HudiReadConfig.LISTING_PARALLELISM, "10") + assert builder.options["hoodie.read.listing.parallelism"] == "10" + + +def test_enum_values_match_expected_strings(): + """Test that enum values match the expected configuration key strings.""" + assert HudiTableConfig.TABLE_NAME.value == "hoodie.table.name" + assert HudiTableConfig.TABLE_TYPE.value == "hoodie.table.type" + assert HudiTableConfig.BASE_FILE_FORMAT.value == "hoodie.table.base.file.format" + + assert HudiReadConfig.INPUT_PARTITIONS.value == "hoodie.read.input.partitions" + assert HudiReadConfig.LISTING_PARALLELISM.value == "hoodie.read.listing.parallelism" + assert ( + HudiReadConfig.USE_READ_OPTIMIZED_MODE.value + == "hoodie.read.use.read_optimized.mode" + ) + assert ( + HudiReadConfig.FILE_GROUP_START_TIMESTAMP.value + == "hoodie.read.file_group.start_timestamp" + ) + assert ( + HudiReadConfig.FILE_GROUP_END_TIMESTAMP.value + == "hoodie.read.file_group.end_timestamp" + ) + + +def test_auto_generated_enum_completeness(): + """Test that all expected enum variants are auto-generated.""" + expected_table_config_variants = [ + "BASE_FILE_FORMAT", + "BASE_PATH", + "CHECKSUM", + "CREATE_SCHEMA", + "DATABASE_NAME", + "DROPS_PARTITION_FIELDS", + "IS_HIVE_STYLE_PARTITIONING", + "IS_PARTITION_PATH_URLENCODED", + "KEY_GENERATOR_CLASS", + "PARTITION_FIELDS", + "PRECOMBINE_FIELD", + "POPULATES_META_FIELDS", + "RECORD_KEY_FIELDS", + "RECORD_MERGE_STRATEGY", + "TABLE_NAME", + "TABLE_TYPE", + "TABLE_VERSION", + "TIMELINE_LAYOUT_VERSION", + "TIMELINE_TIMEZONE", + ] + + for variant in expected_table_config_variants: + assert hasattr(HudiTableConfig, variant), f"HudiTableConfig missing {variant}" + enum_instance = getattr(HudiTableConfig, variant) + assert hasattr(enum_instance, "value"), ( + f"{variant} instance missing 'value' property" + ) + assert isinstance(enum_instance.value, str), f"{variant}.value is not a string" + + expected_read_config_variants = [ + "FILE_GROUP_START_TIMESTAMP", + "FILE_GROUP_END_TIMESTAMP", + "INPUT_PARTITIONS", + "LISTING_PARALLELISM", + "USE_READ_OPTIMIZED_MODE", + ] + + for variant in expected_read_config_variants: + assert hasattr(HudiReadConfig, variant), f"HudiReadConfig missing {variant}" + enum_instance = getattr(HudiReadConfig, variant) + assert hasattr(enum_instance, "value"), ( + f"{variant} instance missing 'value' property" + ) + assert isinstance(enum_instance.value, str), f"{variant}.value is not a string" + + +def test_auto_generated_enum_methods(): + """Test that auto-generated enum methods work correctly.""" + table_variants = HudiTableConfig.all_variants() + assert isinstance(table_variants, list), "all_variants() should return a list" + assert len(table_variants) == 19, "HudiTableConfig should have 19 variants" + + read_variants = HudiReadConfig.all_variants() + assert isinstance(read_variants, list), "all_variants() should return a list" + assert len(read_variants) == 5, "HudiReadConfig should have 5 variants" + + table_instance = HudiTableConfig.TABLE_NAME + assert str(table_instance) == "hoodie.table.name" + assert "HudiTableConfig" in repr(table_instance) + assert "hoodie.table.name" in repr(table_instance) + + read_instance = HudiReadConfig.INPUT_PARTITIONS + assert str(read_instance) == "hoodie.read.input.partitions" + assert "HudiReadConfig" in repr(read_instance) + assert "hoodie.read.input.partitions" in repr(read_instance) + + +def test_auto_generated_enum_equality(): + """Test that auto-generated enum instances support equality.""" + table1 = HudiTableConfig.TABLE_NAME + table2 = HudiTableConfig.TABLE_NAME + assert table1 == table2 + + table_name = HudiTableConfig.TABLE_NAME + table_type = HudiTableConfig.TABLE_TYPE + assert table_name != table_type + + table_config = HudiTableConfig.TABLE_NAME + read_config = HudiReadConfig.INPUT_PARTITIONS + assert table_config != read_config + + +def test_no_manual_duplication_regression(): + """Test that we haven't accidentally kept manual enum definitions.""" + import inspect + + # Get all class attributes of HudiTableConfig - filter out methods and properties + table_attrs = [] + for attr in dir(HudiTableConfig): + if attr.startswith("_") or attr in [ + "all_variants" + ]: # Skip special methods and class methods + continue + attr_value = getattr(HudiTableConfig, attr) + # Skip methods, classmethods, staticmethods, and properties + if ( + inspect.ismethod(attr_value) + or inspect.isfunction(attr_value) + or isinstance(attr_value, (classmethod, staticmethod, property)) + or inspect.isdatadescriptor(attr_value) + or hasattr(attr_value, "__self__") + ): # Skip bound methods + continue + table_attrs.append(attr) + + # Get all class attributes of HudiReadConfig - filter out methods and properties + read_attrs = [] + for attr in dir(HudiReadConfig): + if attr.startswith("_") or attr in [ + "all_variants" + ]: # Skip special methods and class methods + continue + attr_value = getattr(HudiReadConfig, attr) + # Skip methods, classmethods, staticmethods, and properties + if ( + inspect.ismethod(attr_value) + or inspect.isfunction(attr_value) + or isinstance(attr_value, (classmethod, staticmethod, property)) + or inspect.isdatadescriptor(attr_value) + or hasattr(attr_value, "__self__") + ): # Skip bound methods + continue + read_attrs.append(attr) + + # All attributes should be auto-generated enum instances + for attr in table_attrs: + instance = getattr(HudiTableConfig, attr) + assert hasattr(instance, "value"), ( + f"Table config {attr} should have 'value' property" + ) + assert isinstance(instance.value, str), ( + f"Table config {attr}.value should be a string" + ) + assert instance.value.startswith("hoodie."), ( + f"Table config {attr}.value should start with 'hoodie.'" + ) + + for attr in read_attrs: + instance = getattr(HudiReadConfig, attr) + assert hasattr(instance, "value"), ( + f"Read config {attr} should have 'value' property" + ) + assert isinstance(instance.value, str), ( + f"Read config {attr}.value should be a string" + ) + assert instance.value.startswith("hoodie."), ( + f"Read config {attr}.value should start with 'hoodie.'" + ) + + assert len(table_attrs) == 19, ( + f"Expected 19 table config variants, got {len(table_attrs)}: {table_attrs}" + ) + assert len(read_attrs) == 5, ( + f"Expected 5 read config variants, got {len(read_attrs)}: {read_attrs}" + ) + + +def test_mixed_string_and_enum_usage(builder): + """Test that strings and enums can be used together.""" + builder.with_hudi_option("custom.string.key", "string_value") + builder.with_hudi_option(HudiTableConfig.TABLE_NAME, "enum_table") + + assert builder.hudi_options["custom.string.key"] == "string_value" + assert builder.hudi_options["hoodie.table.name"] == "enum_table" + + +def test_backward_compatibility(builder): + """Test that existing string-based API still works.""" + builder.with_hudi_option("hoodie.table.name", "string_table") + builder.with_option("hoodie.read.input.partitions", "8") + + assert builder.hudi_options["hoodie.table.name"] == "string_table" + assert builder.options["hoodie.read.input.partitions"] == "8"