Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 11 additions & 9 deletions crates/iceberg/src/arrow/record_batch_partition_splitter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ impl RecordBatchPartitionSplitter {
/// # Returns
///
/// Returns a new `RecordBatchPartitionSplitter` instance or an error if initialization fails.
pub fn new(
pub fn try_new(
iceberg_schema: SchemaRef,
partition_spec: PartitionSpecRef,
calculator: Option<PartitionValueCalculator>,
Expand Down Expand Up @@ -87,12 +87,12 @@ impl RecordBatchPartitionSplitter {
/// # Returns
///
/// Returns a new `RecordBatchPartitionSplitter` instance or an error if initialization fails.
pub fn new_with_computed_values(
pub fn try_new_with_computed_values(
iceberg_schema: SchemaRef,
partition_spec: PartitionSpecRef,
) -> Result<Self> {
let calculator = PartitionValueCalculator::try_new(&partition_spec, &iceberg_schema)?;
Self::new(iceberg_schema, partition_spec, Some(calculator))
Self::try_new(iceberg_schema, partition_spec, Some(calculator))
}

/// Create a new RecordBatchPartitionSplitter expecting pre-computed partition values.
Expand All @@ -108,11 +108,11 @@ impl RecordBatchPartitionSplitter {
/// # Returns
///
/// Returns a new `RecordBatchPartitionSplitter` instance or an error if initialization fails.
pub fn new_with_precomputed_values(
pub fn try_new_with_precomputed_values(
iceberg_schema: SchemaRef,
partition_spec: PartitionSpecRef,
) -> Result<Self> {
Self::new(iceberg_schema, partition_spec, None)
Self::try_new(iceberg_schema, partition_spec, None)
}

/// Split the record batch into multiple record batches based on the partition spec.
Expand Down Expand Up @@ -261,9 +261,11 @@ mod tests {
.build()
.unwrap(),
);
let partition_splitter =
RecordBatchPartitionSplitter::new_with_computed_values(schema.clone(), partition_spec)
.expect("Failed to create splitter");
let partition_splitter = RecordBatchPartitionSplitter::try_new_with_computed_values(
schema.clone(),
partition_spec,
)
.expect("Failed to create splitter");

let arrow_schema = Arc::new(schema_to_arrow_schema(&schema).unwrap());
let id_array = Int32Array::from(vec![1, 2, 1, 3, 2, 3, 1]);
Expand Down Expand Up @@ -392,7 +394,7 @@ mod tests {
]));

// Create splitter expecting pre-computed partition column
let partition_splitter = RecordBatchPartitionSplitter::new_with_precomputed_values(
let partition_splitter = RecordBatchPartitionSplitter::try_new_with_precomputed_values(
schema.clone(),
partition_spec,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ use iceberg::spec::{TableMetadata, TableMetadataRef, Transform};
/// NonZeroUsize::new(4).unwrap(),
/// )?;
/// ```
#[allow(dead_code)]
pub(crate) fn repartition(
input: Arc<dyn ExecutionPlan>,
table_metadata: TableMetadataRef,
Expand Down
48 changes: 23 additions & 25 deletions crates/integrations/datafusion/src/physical_plan/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ use datafusion::physical_plan::{
execute_input_stream,
};
use futures::StreamExt;
use iceberg::arrow::{FieldMatchMode, schema_to_arrow_schema};
use iceberg::arrow::FieldMatchMode;
use iceberg::spec::{DataFileFormat, TableProperties, serialize_data_file_to_json};
use iceberg::table::Table;
use iceberg::writer::base_writer::data_file_writer::DataFileWriterBuilder;
Expand All @@ -44,12 +44,12 @@ use iceberg::writer::file_writer::location_generator::{
DefaultFileNameGenerator, DefaultLocationGenerator,
};
use iceberg::writer::file_writer::rolling_writer::RollingFileWriterBuilder;
use iceberg::writer::{IcebergWriter, IcebergWriterBuilder};
use iceberg::{Error, ErrorKind};
use parquet::file::properties::WriterProperties;
use uuid::Uuid;

use crate::physical_plan::DATA_FILES_COL_NAME;
use crate::task_writer::TaskWriter;
use crate::to_datafusion_error;

/// An execution plan node that writes data to an Iceberg table.
Expand Down Expand Up @@ -205,18 +205,6 @@ impl ExecutionPlan for IcebergWriteExec {
partition: usize,
context: Arc<TaskContext>,
) -> DFResult<SendableRecordBatchStream> {
if !self
.table
.metadata()
.default_partition_spec()
.is_unpartitioned()
{
// TODO add support for partitioned tables
return Err(DataFusionError::NotImplemented(
"IcebergWriteExec does not support partitioned tables yet".to_string(),
));
}

let partition_type = self.table.metadata().default_partition_type().clone();
let format_version = self.table.metadata().format_version();

Expand Down Expand Up @@ -277,31 +265,41 @@ impl ExecutionPlan for IcebergWriteExec {
);
let data_file_writer_builder = DataFileWriterBuilder::new(rolling_writer_builder);

// Create TaskWriter
// TODO: Make fanout_enabled configurable via table properties
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please create an issue to track this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Created here: #1834

let fanout_enabled = true;
let schema = self.table.metadata().current_schema().clone();
let partition_spec = self.table.metadata().default_partition_spec().clone();
let task_writer = TaskWriter::try_new(
data_file_writer_builder,
fanout_enabled,
schema.clone(),
partition_spec,
)
.map_err(to_datafusion_error)?;

// Get input data
let data = execute_input_stream(
Arc::clone(&self.input),
Arc::new(
schema_to_arrow_schema(self.table.metadata().current_schema())
.map_err(to_datafusion_error)?,
),
self.input.schema(), // input schema may have projected column `_partition`
partition,
Arc::clone(&context),
)?;

// Create write stream
let stream = futures::stream::once(async move {
let mut writer = data_file_writer_builder
// todo specify partition key when partitioning writer is supported
.build(None)
.await
.map_err(to_datafusion_error)?;
let mut task_writer = task_writer;
let mut input_stream = data;

while let Some(batch) = input_stream.next().await {
writer.write(batch?).await.map_err(to_datafusion_error)?;
let batch = batch?;
task_writer
.write(batch)
.await
.map_err(to_datafusion_error)?;
}

let data_files = writer.close().await.map_err(to_datafusion_error)?;
let data_files = task_writer.close().await.map_err(to_datafusion_error)?;

// Convert builders to data files and then to JSON strings
let data_files_strs: Vec<String> = data_files
Expand Down
43 changes: 28 additions & 15 deletions crates/integrations/datafusion/src/table/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ pub mod metadata_table;
pub mod table_provider_factory;

use std::any::Any;
use std::num::NonZeroUsize;
use std::sync::Arc;

use async_trait::async_trait;
Expand All @@ -38,6 +39,8 @@ use iceberg::{Catalog, Error, ErrorKind, NamespaceIdent, Result, TableIdent};
use metadata_table::IcebergMetadataTableProvider;

use crate::physical_plan::commit::IcebergCommitExec;
use crate::physical_plan::project::project_with_partition;
use crate::physical_plan::repartition::repartition;
use crate::physical_plan::scan::IcebergTableScan;
use crate::physical_plan::write::IcebergWriteExec;

Expand Down Expand Up @@ -170,32 +173,42 @@ impl TableProvider for IcebergTableProvider {

async fn insert_into(
&self,
_state: &dyn Session,
state: &dyn Session,
input: Arc<dyn ExecutionPlan>,
_insert_op: InsertOp,
) -> DFResult<Arc<dyn ExecutionPlan>> {
if !self
.table
.metadata()
.default_partition_spec()
.is_unpartitioned()
{
// TODO add insert into support for partitioned tables
return Err(DataFusionError::NotImplemented(
"IcebergTableProvider::insert_into does not support partitioned tables yet"
.to_string(),
));
}

let Some(catalog) = self.catalog.clone() else {
return Err(DataFusionError::Execution(
"Catalog cannot be none for insert_into".to_string(),
));
};

let partition_spec = self.table.metadata().default_partition_spec();

// Step 1: Project partition values for partitioned tables
let plan_with_partition = if !partition_spec.is_unpartitioned() {
project_with_partition(input, &self.table)?
} else {
input
};

// Step 2: Repartition for parallel processing
let target_partitions =
NonZeroUsize::new(state.config().target_partitions()).ok_or_else(|| {
DataFusionError::Configuration(
"target_partitions must be greater than 0".to_string(),
)
})?;

let repartitioned_plan = repartition(
plan_with_partition,
self.table.metadata_ref(),
target_partitions,
)?;

let write_plan = Arc::new(IcebergWriteExec::new(
self.table.clone(),
input,
repartitioned_plan,
self.schema.clone(),
));

Expand Down
Loading
Loading