Skip to content

Add PyString::from_fmt using new PyUnicodeWriter #5199

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 7 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 85 additions & 0 deletions pyo3-ffi/src/compat/py_3_14.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,88 @@ compat_function!(
}
}
);

#[cfg(not(any(Py_LIMITED_API, PyPy)))]
compat_function!(
originally_defined_for(all(Py_3_14, not(Py_LIMITED_API)));

pub unsafe fn PyUnicodeWriter_Create(length: crate::Py_ssize_t) -> *mut crate::PyUnicodeWriter {
if length < 0 {
crate::PyErr_SetString(
crate::PyExc_ValueError,
c_str!("length must be positive").as_ptr(),
);
return std::ptr::null_mut();
}

let size = std::mem::size_of::<crate::_PyUnicodeWriter>();
let writer: *mut crate::_PyUnicodeWriter = crate::PyMem_Malloc(size).cast();
crate::_PyUnicodeWriter_Init(writer);
if crate::_PyUnicodeWriter_Prepare(writer, length, 127) < 0 {
PyUnicodeWriter_Discard(writer.cast());
return std::ptr::null_mut();
}
(*writer).overallocate = 1;
writer.cast()
}
);

#[cfg(not(any(Py_LIMITED_API, PyPy)))]
compat_function!(
originally_defined_for(all(Py_3_14, not(Py_LIMITED_API)));

pub unsafe fn PyUnicodeWriter_Finish(writer: *mut crate::PyUnicodeWriter) -> *mut crate::PyObject {
let str = crate::_PyUnicodeWriter_Finish(writer.cast());
crate::PyMem_Free(writer.cast());
str
}
);

#[cfg(not(any(Py_LIMITED_API, PyPy)))]
compat_function!(
originally_defined_for(all(Py_3_14, not(Py_LIMITED_API)));

pub unsafe fn PyUnicodeWriter_Discard(writer: *mut crate::PyUnicodeWriter) -> () {
crate::_PyUnicodeWriter_Dealloc(writer.cast());
crate::PyMem_Free(writer.cast())
}
);

#[cfg(not(any(Py_LIMITED_API, PyPy)))]
compat_function!(
originally_defined_for(all(Py_3_14, not(Py_LIMITED_API)));

pub unsafe fn PyUnicodeWriter_WriteChar(writer: *mut crate::PyUnicodeWriter, ch: crate::Py_UCS4) -> std::os::raw::c_int {
if ch > 0x10ffff {
crate::PyErr_SetString(
crate::PyExc_ValueError,
c_str!("character must be in range(0x110000)").as_ptr(),
);
return -1;
}

crate::_PyUnicodeWriter_WriteChar(writer.cast(), ch)
}
);

#[cfg(not(any(Py_LIMITED_API, PyPy)))]
compat_function!(
originally_defined_for(all(Py_3_14, not(Py_LIMITED_API)));

pub unsafe fn PyUnicodeWriter_WriteUTF8(writer: *mut crate::PyUnicodeWriter,str: *const std::os::raw::c_char, size: crate::Py_ssize_t) -> std::os::raw::c_int {
let size = if size < 0 {
libc::strlen(str) as isize
} else {
size
};

let py_str = crate::PyUnicode_FromStringAndSize(str, size);
if py_str.is_null() {
return -1;
}

let result = crate::_PyUnicodeWriter_WriteStr(writer.cast(), py_str);
crate::Py_DECREF(py_str);
result
}
);
76 changes: 76 additions & 0 deletions pyo3-ffi/src/cpython/unicodeobject.rs
Original file line number Diff line number Diff line change
Expand Up @@ -686,6 +686,82 @@ extern "C" {
// skipped PyUnicode_GetMax
}

#[cfg(Py_3_14)]
opaque_struct!(pub PyUnicodeWriter);

#[cfg(not(Py_3_14))]
pub type PyUnicodeWriter = _PyUnicodeWriter;

#[cfg(not(Py_3_14))]
#[doc(hidden)]
#[repr(C)]
pub struct _PyUnicodeWriter {
buffer: *mut PyObject,
data: *mut c_void,
kind: c_int,
pub(crate) maxchar: Py_UCS4,
pub(crate) size: Py_ssize_t,
pub(crate) pos: Py_ssize_t,
min_length: Py_ssize_t,
min_char: Py_UCS4,
pub(crate) overallocate: c_char,
readonly: c_char,
}

extern "C" {
#[cfg(Py_3_14)]
pub fn PyUnicodeWriter_Create(length: Py_ssize_t) -> *mut PyUnicodeWriter;
#[cfg(Py_3_14)]
pub fn PyUnicodeWriter_Finish(writer: *mut PyUnicodeWriter) -> *mut PyObject;
#[cfg(not(Py_3_14))]
pub(crate) fn _PyUnicodeWriter_Finish(writer: *mut _PyUnicodeWriter) -> *mut PyObject;
#[cfg(Py_3_14)]
pub fn PyUnicodeWriter_Discard(writer: *mut PyUnicodeWriter);
#[cfg(not(Py_3_14))]
pub(crate) fn _PyUnicodeWriter_Dealloc(writer: *mut _PyUnicodeWriter);
#[cfg(not(Py_3_14))]
pub(crate) fn _PyUnicodeWriter_Init(writer: *mut _PyUnicodeWriter);
#[cfg(not(Py_3_14))]
pub(crate) fn _PyUnicodeWriter_PrepareInternal(
writer: *mut _PyUnicodeWriter,
length: Py_ssize_t,
maxchars: Py_UCS4,
) -> c_int;
#[cfg(Py_3_14)]
pub fn PyUnicodeWriter_WriteChar(writer: *mut PyUnicodeWriter, ch: Py_UCS4) -> c_int;
#[cfg(not(Py_3_14))]
pub(crate) fn _PyUnicodeWriter_WriteChar(writer: *mut _PyUnicodeWriter, ch: Py_UCS4) -> c_int;
#[cfg(not(Py_3_14))]
pub(crate) fn _PyUnicodeWriter_WriteStr(
writer: *mut _PyUnicodeWriter,
str: *mut PyObject,
) -> c_int;
#[cfg(Py_3_14)]
pub fn PyUnicodeWriter_WriteUTF8(
writer: *mut PyUnicodeWriter,
str: *const c_char,
size: Py_ssize_t,
) -> c_int;
}

#[cfg(not(Py_3_14))]
#[inline(always)]
pub(crate) unsafe fn _PyUnicodeWriter_Prepare(
writer: *mut _PyUnicodeWriter,
length: Py_ssize_t,
maxchars: Py_UCS4,
) -> c_int {
if maxchars <= (*writer).maxchar && length <= (*writer).size - (*writer).pos {
return 0;
}

if length == 0 {
return 0;
}

_PyUnicodeWriter_PrepareInternal(writer, length, maxchars)
}

// skipped _PyUnicodeWriter
// skipped _PyUnicodeWriter_Init
// skipped _PyUnicodeWriter_Prepare
Expand Down
155 changes: 155 additions & 0 deletions src/fmt.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
//! This module provides the `PyUnicodeWriter` struct, which is a utility for efficiently
//! constructing Python strings using Rust's `fmt::Write` trait.
//! It allows for incremental string construction, without the need for repeated allocations, and
//! is particularly useful for building strings in a performance-sensitive context.
#[cfg(not(any(Py_LIMITED_API, PyPy)))]
use {
crate::ffi::compat::{
PyUnicodeWriter_Create, PyUnicodeWriter_Discard, PyUnicodeWriter_Finish,
PyUnicodeWriter_WriteChar, PyUnicodeWriter_WriteUTF8,
},
crate::ffi_ptr_ext::FfiPtrExt,
crate::impl_::callback::WrappingCastTo,
crate::types::{PyAnyMethods, PyString},
crate::{ffi, Bound, PyErr, PyResult, Python},
std::ptr::NonNull,
std::{fmt, mem},
};

/// This is like the `format!` macro, but it returns a `PyString` instead of a `String`.
#[macro_export]
macro_rules! py_format {
($py: expr, $($arg:tt)*) => {
$crate::types::PyString::from_fmt($py, format_args!($($arg)*))
}
}

#[cfg(not(any(Py_LIMITED_API, PyPy)))]
/// The `PyUnicodeWriter` is a utility for efficiently constructing Python strings
pub struct PyUnicodeWriter {
writer: NonNull<ffi::PyUnicodeWriter>,
last_error: Option<PyErr>,
}

#[cfg(not(any(Py_LIMITED_API, PyPy)))]
impl PyUnicodeWriter {
/// Creates a new `PyUnicodeWriter`.
pub fn new(py: Python<'_>) -> PyResult<Self> {
Self::with_capacity(py, 0)
}

/// Creates a new `PyUnicodeWriter` with the specified initial capacity.
pub fn with_capacity(py: Python<'_>, capacity: usize) -> PyResult<Self> {
match NonNull::new(unsafe { PyUnicodeWriter_Create(capacity.wrapping_cast()) }) {
Some(ptr) => Ok(PyUnicodeWriter {
writer: ptr,
last_error: None,
}),
None => Err(PyErr::fetch(py)),
}
}

/// Consumes the `PyUnicodeWriter` and returns a `Bound<PyString>` containing the constructed string.
pub fn into_py_string(mut self, py: Python<'_>) -> PyResult<Bound<'_, PyString>> {
if let Some(error) = self.take_error() {
Err(error)
} else {
let writer_ptr = self.as_ptr();
mem::forget(self);
Ok(unsafe {
PyUnicodeWriter_Finish(writer_ptr)
.assume_owned_or_err(py)?
.downcast_into_unchecked()
})
}
}

/// When fmt::Write returned an error, this function can be used to retrieve the last error that occurred.
pub fn take_error(&mut self) -> Option<PyErr> {
self.last_error.take()
}

fn as_ptr(&self) -> *mut ffi::PyUnicodeWriter {
self.writer.as_ptr()
}

fn set_error(&mut self) {
Python::with_gil(|py| {
self.last_error = Some(PyErr::fetch(py));
})
}
}

#[cfg(not(any(Py_LIMITED_API, PyPy)))]
impl fmt::Write for PyUnicodeWriter {
fn write_str(&mut self, s: &str) -> fmt::Result {
let result = unsafe {
PyUnicodeWriter_WriteUTF8(self.as_ptr(), s.as_ptr().cast(), s.len() as isize)
};
if result < 0 {
self.set_error();
Err(fmt::Error)
} else {
Ok(())
}
}

fn write_char(&mut self, c: char) -> fmt::Result {
let result = unsafe { PyUnicodeWriter_WriteChar(self.as_ptr(), c as u32) };
if result < 0 {
self.set_error();
Err(fmt::Error)
} else {
Ok(())
}
}
}

#[cfg(not(any(Py_LIMITED_API, PyPy)))]
impl Drop for PyUnicodeWriter {
fn drop(&mut self) {
unsafe {
PyUnicodeWriter_Discard(self.as_ptr());
}
}
}

#[cfg(test)]
mod tests {
#[cfg(not(any(Py_LIMITED_API, PyPy)))]
use super::*;
use crate::types::PyStringMethods;
use crate::{IntoPyObject, Python};

#[test]
#[allow(clippy::write_literal)]
#[cfg(not(any(Py_LIMITED_API, PyPy)))]
fn unicode_writer_test() {
use std::fmt::Write;
Python::with_gil(|py| {
let mut writer = PyUnicodeWriter::new(py).unwrap();
write!(writer, "Hello {}!", "world").unwrap();
writer.write_char('😎').unwrap();
let result = writer.into_py_string(py).unwrap();
assert_eq!(result.to_string(), "Hello world!😎");
});
}

#[test]
fn test_pystring_from_fmt() {
Python::with_gil(|py| {
py_format!(py, "Hello {}!", "world").unwrap();
});
}

#[test]
fn test_complex_format() {
Python::with_gil(|py| {
let complex_value = (42, "foo", [0; 0]).into_pyobject(py).unwrap();
let py_string = py_format!(py, "This is some complex value: {complex_value}").unwrap();
let actual = py_string.to_cow().unwrap();
let expected = "This is some complex value: (42, 'foo', [])";
assert_eq!(actual, expected);
});
}
}
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,7 @@ pub mod coroutine;
mod err;
pub mod exceptions;
pub mod ffi;
pub mod fmt;
mod gil;
#[doc(hidden)]
pub mod impl_;
Expand Down
32 changes: 31 additions & 1 deletion src/types/string.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#[cfg(not(Py_LIMITED_API))]
use crate::exceptions::PyUnicodeDecodeError;
use crate::ffi_ptr_ext::FfiPtrExt;
#[cfg(not(any(Py_LIMITED_API, PyPy)))]
use crate::fmt::PyUnicodeWriter;
use crate::instance::Borrowed;
use crate::py_result_ext::PyResultExt;
use crate::types::any::PyAnyMethods;
Expand All @@ -9,7 +11,9 @@ use crate::types::PyBytes;
use crate::{ffi, Bound, Py, PyAny, PyResult, Python};
use std::borrow::Cow;
use std::ffi::CString;
use std::str;
#[cfg(not(any(Py_LIMITED_API, PyPy)))]
use std::fmt::Write as _;
use std::{fmt, str};

/// Represents raw data backing a Python `str`.
///
Expand Down Expand Up @@ -209,6 +213,32 @@ impl PyString {
.downcast_into_unchecked()
}
}

/// Creates a Python string using a format string.
///
/// This function is similar to [`format!`], but it returns a Python string object instead of a Rust string.
pub fn from_fmt<'py>(
py: Python<'py>,
args: fmt::Arguments<'_>,
) -> PyResult<Bound<'py, PyString>> {
if let Some(static_string) = args.as_str() {
return Ok(PyString::new(py, static_string));
};

#[cfg(not(any(Py_LIMITED_API, PyPy)))]
{
let mut writer = PyUnicodeWriter::new(py)?;
writer
.write_fmt(args)
.map_err(|_| writer.take_error().expect("expected error"))?;
writer.into_py_string(py)
}

#[cfg(any(Py_LIMITED_API, PyPy))]
{
Ok(PyString::new(py, &format!("{args}")))
}
}
}

/// Implementation of functionality for [`PyString`].
Expand Down
Loading