Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions icechunk-python/python/icechunk/_icechunk_python.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,7 @@ class CachingConfig:
num_bytes_chunks: int | None
The number of bytes of chunks to cache.
"""
def __repr__(self) -> str: ...
@property
def num_snapshot_nodes(self) -> int | None:
"""
Expand Down Expand Up @@ -1501,6 +1502,7 @@ class PyRepository:
) -> str: ...

class PySession:
def __repr__(self) -> str: ...
@classmethod
def from_bytes(cls, data: bytes) -> PySession: ...
def __eq__(self, value: object) -> bool: ...
Expand Down
3 changes: 3 additions & 0 deletions icechunk-python/python/icechunk/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ def __init__(self, session: PySession):
self._session = session
self._allow_changes = False

def __repr__(self) -> str:
return self._session.__repr__()

def __eq__(self, value: object) -> bool:
if not isinstance(value, Session):
return False
Expand Down
62 changes: 51 additions & 11 deletions icechunk-python/src/config.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use async_trait::async_trait;
use chrono::{DateTime, Datelike, TimeDelta, Timelike, Utc};
use icechunk::repr::{PyRepr, dataclass_repr, dataclass_str};
use icechunk::storage::RetriesSettings;
use itertools::Itertools;
use pyo3::exceptions::PyValueError;
Expand Down Expand Up @@ -667,8 +668,58 @@ pub struct PyCachingConfig {
pub num_bytes_chunks: Option<u64>,
}

impl PyRepr for PyCachingConfig {
fn __str__(&self) -> String {
dataclass_str(
"icechunk.CachingConfig",
&[
("num_snapshot_nodes", &format_option_to_string(self.num_snapshot_nodes)),
("num_chunk_refs", &format_option_to_string(self.num_chunk_refs)),
(
"num_transaction_changes",
&format_option_to_string(self.num_transaction_changes),
),
(
"num_bytes_attributes",
&format_option_to_string(self.num_bytes_attributes),
),
("num_bytes_chunks", &format_option_to_string(self.num_bytes_chunks)),
],
)
}

fn __repr__(&self) -> String {
dataclass_repr(
"icechunk.CachingConfig",
&[
("num_snapshot_nodes", &format_option_to_string(self.num_snapshot_nodes)),
("num_chunk_refs", &format_option_to_string(self.num_chunk_refs)),
(
"num_transaction_changes",
&format_option_to_string(self.num_transaction_changes),
),
(
"num_bytes_attributes",
&format_option_to_string(self.num_bytes_attributes),
),
("num_bytes_chunks", &format_option_to_string(self.num_bytes_chunks)),
],
)
}
}

#[pymethods]
impl PyCachingConfig {
fn __str__(&self) -> String {
// Only needed because #[pymethods] cannot be used on trait impl blocks
<Self as PyRepr>::__str__(self)
}

fn __repr__(&self) -> String {
// Only needed because #[pymethods] cannot be used on trait impl blocks
<Self as PyRepr>::__repr__(self)
}

#[staticmethod]
/// Create a default `CachingConfig` instance
fn default() -> Self {
Expand All @@ -692,17 +743,6 @@ impl PyCachingConfig {
num_bytes_chunks,
}
}

pub fn __repr__(&self) -> String {
format!(
r#"CachingConfig(num_snapshot_nodes={snap}, num_chunk_refs={man}, num_transaction_changes={tx}, num_bytes_attributes={att}, num_bytes_chunks={chunks})"#,
snap = format_option_to_string(self.num_snapshot_nodes),
man = format_option_to_string(self.num_chunk_refs),
tx = format_option_to_string(self.num_transaction_changes),
att = format_option_to_string(self.num_bytes_attributes),
chunks = format_option_to_string(self.num_bytes_chunks),
)
}
}

impl From<&PyCachingConfig> for CachingConfig {
Expand Down
76 changes: 76 additions & 0 deletions icechunk-python/src/session.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use std::{borrow::Cow, ops::Deref, sync::Arc};

use async_stream::try_stream;
use futures::{StreamExt, TryStreamExt};
use icechunk::repr::{PyRepr, dataclass_str};
use icechunk::{Store, session::Session};
use pyo3::{prelude::*, types::PyType};
use tokio::sync::{Mutex, RwLock};
Expand All @@ -19,10 +20,85 @@ use crate::{
#[derive(Clone)]
pub struct PySession(pub Arc<RwLock<Session>>);

impl PyRepr for PySession {
fn __str__(&self) -> String {
// Use dataclass_str because Session is a non-executable class
let session = self.0.blocking_read();
if session.read_only() {
dataclass_str(
"icechunk.Session",
&[
("read_only", &session.read_only().to_string()),
("snapshot_id", &session.snapshot_id().to_string()),
],
)
} else {
let branch = session
.branch()
.map(|b| b.to_string())
.unwrap_or_else(|| "None".to_string());

dataclass_str(
"icechunk.Session",
&[
("read_only", &session.read_only().to_string()),
("snapshot_id", &session.snapshot_id().to_string()),
("branch", &branch),
(
"has_uncommitted_changes",
&session.has_uncommitted_changes().to_string(),
),
],
)
}
}

fn __repr__(&self) -> String {
let session = self.0.blocking_read();
if session.read_only() {
dataclass_str(
"icechunk.Session",
&[
("read_only", &session.read_only().to_string()),
("snapshot_id", &session.snapshot_id().to_string()),
],
)
} else {
let branch = session
.branch()
.map(|b| b.to_string())
.unwrap_or_else(|| "None".to_string());

dataclass_str(
"icechunk.Session",
&[
("read_only", &session.read_only().to_string()),
("snapshot_id", &session.snapshot_id().to_string()),
("branch", &branch),
(
"has_uncommitted_changes",
&session.has_uncommitted_changes().to_string(),
),
],
)
}
}
}

#[pymethods]
/// Most functions in this class block, so they need to `allow_threads` so other
/// python threads can make progress
impl PySession {
fn __str__(&self) -> String {
// Only needed because #[pymethods] cannot be used on trait impl blocks
<Self as PyRepr>::__str__(self)
}

fn __repr__(&self) -> String {
// Only needed because #[pymethods] cannot be used on trait impl blocks
<Self as PyRepr>::__repr__(self)
}

#[classmethod]
fn from_bytes(
_cls: Bound<'_, PyType>,
Expand Down
1 change: 1 addition & 0 deletions icechunk/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ pub mod inspect;
pub mod ops;
pub mod refs;
pub mod repository;
pub mod repr;
pub mod session;
pub mod storage;
pub mod store;
Expand Down
64 changes: 64 additions & 0 deletions icechunk/src/repr.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
pub trait PyRepr {
fn __str__(&self) -> String;

fn __repr__(&self) -> String;

// TODO fn _repr_html_(&self) -> String;
}

pub fn dataclass_str(class_name: &str, attributes: &[(&str, &str)]) -> String {
// Writes a python-like (non-executable) multi-line repr, given a class name and an (ordered) mapping of name, attribute pairs.
//
// Result of:
//
// dataclass_str(
// "icechunk.Session",
// &[
// ("read_only", &self.read_only().to_string()),
// ("snapshot_id", &self.snapshot_id().to_string()),
// ]
// )
//
// Looks like:
//
// <icechunk.Session>
// read_only: true
// snapshot_id: 1CECHNKREP0F1RSTCMT0

let attrs = attributes
.iter()
.map(|(name, value)| format!("\n{}: {}", name, value))
.collect::<String>();

format!("<{}>{}", class_name, attrs)
}

pub fn dataclass_repr(class_name: &str, attributes: &[(&str, &str)]) -> String {
// Writes a python-like (executable) multi-line repr, given a class name and an (ordered) mapping of name, attribute pairs.
//
// Result of:
//
// dataclass_repr(
// "icechunk.Config",
// &[
// ("field1", &self.field1().to_string()),
// ("field2", &self.field2().to_string()),
// ]
// )
//
// Looks like:
//
// icechunk.Config(
// field1=value,
// field2=value,
// )

let attrs = attributes
.iter()
.map(|(name, value)| format!("\n {}={}", name, value))
.collect::<String>();

format!("{}({}\n)", class_name, attrs)
}

// TODO: consolidate these two functions?
Loading