Skip to content

Commit

Permalink
Auto merge of rust-lang#136932 - m-ou-se:fmt-width-precision-u16, r=<…
Browse files Browse the repository at this point in the history
…try>

Reduce formatting `width` and `precision` to 16 bits

This is reduces the `width` and `precision` fields in format strings to 16 bits. They are currently full `usize`s, but it's a bit nonsensical that we need to support the case where someone wants to pad their value to eighteen quintillion spaces and/or have eighteen quintillion digits of precision.

This is a breaking change, but probably affects virtually no code. Let's do a crater run to find out. Marking this as experiment for now.

---

There are three ways to set a width or precision today:

1. Directly a formatting string, e.g. `println!("{a:1234}")`
2. Indirectly in a formatting string, e.g. `println!("{a:width$}", width=1234)`
3. Through the unstable `FormattingOptions::width` method.

This PR:

- Adds a compiler error for 1. (`println!("{a:9999999}")` no longer compiles and gives a clear error.)
- Adds a runtime check for 2. (`println!("{a:width$}, width=9999999)` will panic.)
- Changes the signature of `FormattingOptions::width` to take a `u16` instead.

---

Additional context:

All the formatting flags and options are currently:

- The `+` flag (1 bit)
- The `-` flag (1 bit)
- The `#` flag (1 bit)
- The `0` flag (1 bit)
- The `x?` flag (1 bit)
- The `X?` flag (1 bit)
- The alignment (2 bits)
- The fill character (21 bits)
- Whether a width is specified (1 bit)
- Whether a precision is specified (1 bit)
- If used, the width (a full usize)
- If used, the precision (a full usize)

Everything except the last two can simply fit in a `u32` (those add up to 31 bits in total).

If we can accept a max width and precision of u16::MAX, we can make a `FormattingOptions` that is exactly 64 bits in size; the same size as a thin reference on most platforms.

If, additionally, we also limit the number of formatting arguments to u16::MAX, we can also reduce the size of `fmt::Arguments` (that is, of a `format_args!()` expression).
  • Loading branch information
bors committed Feb 12, 2025
2 parents 552a959 + 7355738 commit 7af7790
Show file tree
Hide file tree
Showing 12 changed files with 166 additions and 65 deletions.
2 changes: 1 addition & 1 deletion compiler/rustc_ast/src/format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ pub enum FormatAlignment {
#[derive(Clone, Encodable, Decodable, Debug, PartialEq, Eq)]
pub enum FormatCount {
/// `{:5}` or `{:.5}`
Literal(usize),
Literal(u16),
/// `{:.*}`, `{:.5$}`, or `{:a$}`, etc.
Argument(FormatArgPosition),
}
11 changes: 11 additions & 0 deletions compiler/rustc_ast_lowering/src/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2151,6 +2151,17 @@ impl<'hir> LoweringContext<'_, 'hir> {
self.expr(sp, hir::ExprKind::Lit(lit))
}

pub(super) fn expr_u16(&mut self, sp: Span, value: u16) -> hir::Expr<'hir> {
let lit = self.arena.alloc(hir::Lit {
span: sp,
node: ast::LitKind::Int(
u128::from(value).into(),
ast::LitIntType::Unsigned(ast::UintTy::U16),
),
});
self.expr(sp, hir::ExprKind::Lit(lit))
}

pub(super) fn expr_char(&mut self, sp: Span, value: char) -> hir::Expr<'hir> {
let lit = self.arena.alloc(hir::Lit { span: sp, node: ast::LitKind::Char(value) });
self.expr(sp, hir::ExprKind::Lit(lit))
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_ast_lowering/src/format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ fn make_count<'hir>(
hir::LangItem::FormatCount,
sym::Is,
));
let value = ctx.arena.alloc_from_iter([ctx.expr_usize(sp, *n)]);
let value = ctx.arena.alloc_from_iter([ctx.expr_u16(sp, *n)]);
ctx.expr_call_mut(sp, count_is, value)
}
Some(FormatCount::Argument(arg)) => {
Expand Down
18 changes: 9 additions & 9 deletions compiler/rustc_parse_format/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ pub enum DebugHex {
#[derive(Copy, Clone, Debug, PartialEq)]
pub enum Count<'a> {
/// The count is specified explicitly.
CountIs(usize),
CountIs(u16),
/// The count is specified by the argument with the given name.
CountIsName(&'a str, InnerSpan),
/// The count is specified by the argument at the given index.
Expand Down Expand Up @@ -565,7 +565,7 @@ impl<'a> Parser<'a> {
/// consuming a macro argument, `None` if it's the case.
fn position(&mut self) -> Option<Position<'a>> {
if let Some(i) = self.integer() {
Some(ArgumentIs(i))
Some(ArgumentIs(i.into()))
} else {
match self.cur.peek() {
Some(&(lo, c)) if rustc_lexer::is_id_start(c) => {
Expand Down Expand Up @@ -771,7 +771,7 @@ impl<'a> Parser<'a> {
/// width.
fn count(&mut self, start: usize) -> Count<'a> {
if let Some(i) = self.integer() {
if self.consume('$') { CountIsParam(i) } else { CountIs(i) }
if self.consume('$') { CountIsParam(i.into()) } else { CountIs(i) }
} else {
let tmp = self.cur.clone();
let word = self.word();
Expand Down Expand Up @@ -822,15 +822,15 @@ impl<'a> Parser<'a> {
word
}

fn integer(&mut self) -> Option<usize> {
let mut cur: usize = 0;
fn integer(&mut self) -> Option<u16> {
let mut cur: u16 = 0;
let mut found = false;
let mut overflow = false;
let start = self.current_pos();
while let Some(&(_, c)) = self.cur.peek() {
if let Some(i) = c.to_digit(10) {
let (tmp, mul_overflow) = cur.overflowing_mul(10);
let (tmp, add_overflow) = tmp.overflowing_add(i as usize);
let (tmp, add_overflow) = tmp.overflowing_add(i as u16);
if mul_overflow || add_overflow {
overflow = true;
}
Expand All @@ -847,11 +847,11 @@ impl<'a> Parser<'a> {
let overflowed_int = &self.input[start..end];
self.err(
format!(
"integer `{}` does not fit into the type `usize` whose range is `0..={}`",
"integer `{}` does not fit into the type `u16` whose range is `0..={}`",
overflowed_int,
usize::MAX
u16::MAX
),
"integer out of range for `usize`",
"integer out of range for `u16`",
self.span(start, end),
);
}
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_span/src/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1000,6 +1000,7 @@ symbols! {
from_residual,
from_size_align_unchecked,
from_str_method,
from_u16,
from_usize,
from_yeet,
fs_create_dir,
Expand Down
12 changes: 6 additions & 6 deletions library/core/src/fmt/float.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ fn float_to_decimal_common_exact<T>(
fmt: &mut Formatter<'_>,
num: &T,
sign: flt2dec::Sign,
precision: usize,
precision: u16,
) -> Result
where
T: flt2dec::DecodableFloat,
Expand All @@ -40,7 +40,7 @@ where
flt2dec::strategy::grisu::format_exact,
*num,
sign,
precision,
precision.into(),
&mut buf,
&mut parts,
);
Expand All @@ -55,7 +55,7 @@ fn float_to_decimal_common_shortest<T>(
fmt: &mut Formatter<'_>,
num: &T,
sign: flt2dec::Sign,
precision: usize,
precision: u16,
) -> Result
where
T: flt2dec::DecodableFloat,
Expand All @@ -68,7 +68,7 @@ where
flt2dec::strategy::grisu::format_shortest,
*num,
sign,
precision,
precision.into(),
&mut buf,
&mut parts,
);
Expand Down Expand Up @@ -101,7 +101,7 @@ fn float_to_exponential_common_exact<T>(
fmt: &mut Formatter<'_>,
num: &T,
sign: flt2dec::Sign,
precision: usize,
precision: u16,
upper: bool,
) -> Result
where
Expand All @@ -113,7 +113,7 @@ where
flt2dec::strategy::grisu::format_exact,
*num,
sign,
precision,
precision.into(),
upper,
&mut buf,
&mut parts,
Expand Down
49 changes: 26 additions & 23 deletions library/core/src/fmt/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -294,8 +294,8 @@ pub struct FormattingOptions {
flags: u32,
fill: char,
align: Option<Alignment>,
width: Option<usize>,
precision: Option<usize>,
width: Option<u16>,
precision: Option<u16>,
}

impl FormattingOptions {
Expand Down Expand Up @@ -389,7 +389,7 @@ impl FormattingOptions {
/// the padding specified by [`FormattingOptions::fill`]/[`FormattingOptions::align`]
/// will be used to take up the required space.
#[unstable(feature = "formatting_options", issue = "118117")]
pub fn width(&mut self, width: Option<usize>) -> &mut Self {
pub fn width(&mut self, width: Option<u16>) -> &mut Self {
self.width = width;
self
}
Expand All @@ -403,7 +403,7 @@ impl FormattingOptions {
/// - For floating-point types, this indicates how many digits after the
/// decimal point should be printed.
#[unstable(feature = "formatting_options", issue = "118117")]
pub fn precision(&mut self, precision: Option<usize>) -> &mut Self {
pub fn precision(&mut self, precision: Option<u16>) -> &mut Self {
self.precision = precision;
self
}
Expand Down Expand Up @@ -455,12 +455,12 @@ impl FormattingOptions {
}
/// Returns the current width.
#[unstable(feature = "formatting_options", issue = "118117")]
pub const fn get_width(&self) -> Option<usize> {
pub const fn get_width(&self) -> Option<u16> {
self.width
}
/// Returns the current precision.
#[unstable(feature = "formatting_options", issue = "118117")]
pub const fn get_precision(&self) -> Option<usize> {
pub const fn get_precision(&self) -> Option<u16> {
self.precision
}
/// Returns the current precision.
Expand Down Expand Up @@ -1499,15 +1499,18 @@ unsafe fn run(fmt: &mut Formatter<'_>, arg: &rt::Placeholder, args: &[rt::Argume
unsafe { value.fmt(fmt) }
}

unsafe fn getcount(args: &[rt::Argument<'_>], cnt: &rt::Count) -> Option<usize> {
unsafe fn getcount(args: &[rt::Argument<'_>], cnt: &rt::Count) -> Option<u16> {
match *cnt {
#[cfg(bootstrap)]
rt::Count::Is(n) => Some(n as u16),
#[cfg(not(bootstrap))]
rt::Count::Is(n) => Some(n),
rt::Count::Implied => None,
rt::Count::Param(i) => {
debug_assert!(i < args.len());
// SAFETY: cnt and args come from the same Arguments,
// which guarantees this index is always within bounds.
unsafe { args.get_unchecked(i).as_usize() }
unsafe { args.get_unchecked(i).as_u16() }
}
}
}
Expand All @@ -1516,11 +1519,11 @@ unsafe fn getcount(args: &[rt::Argument<'_>], cnt: &rt::Count) -> Option<usize>
#[must_use = "don't forget to write the post padding"]
pub(crate) struct PostPadding {
fill: char,
padding: usize,
padding: u16,
}

impl PostPadding {
fn new(fill: char, padding: usize) -> PostPadding {
fn new(fill: char, padding: u16) -> PostPadding {
PostPadding { fill, padding }
}

Expand Down Expand Up @@ -1634,7 +1637,7 @@ impl<'a> Formatter<'a> {
}
// Check if we're over the minimum width, if so then we can also
// just write the bytes.
Some(min) if width >= min => {
Some(min) if width >= usize::from(min) => {
write_prefix(self, sign, prefix)?;
self.buf.write_str(buf)
}
Expand All @@ -1645,7 +1648,7 @@ impl<'a> Formatter<'a> {
let old_align =
crate::mem::replace(&mut self.options.align, Some(Alignment::Right));
write_prefix(self, sign, prefix)?;
let post_padding = self.padding(min - width, Alignment::Right)?;
let post_padding = self.padding(min - width as u16, Alignment::Right)?;
self.buf.write_str(buf)?;
post_padding.write(self)?;
self.options.fill = old_fill;
Expand All @@ -1654,7 +1657,7 @@ impl<'a> Formatter<'a> {
}
// Otherwise, the sign and prefix goes after the padding
Some(min) => {
let post_padding = self.padding(min - width, Alignment::Right)?;
let post_padding = self.padding(min - width as u16, Alignment::Right)?;
write_prefix(self, sign, prefix)?;
self.buf.write_str(buf)?;
post_padding.write(self)
Expand Down Expand Up @@ -1703,7 +1706,7 @@ impl<'a> Formatter<'a> {
// If our string is longer that the precision, then we must have
// truncation. However other flags like `fill`, `width` and `align`
// must act as always.
if let Some((i, _)) = s.char_indices().nth(max) {
if let Some((i, _)) = s.char_indices().nth(usize::from(max)) {
// LLVM here can't prove that `..i` won't panic `&s[..i]`, but
// we know that it can't panic. Use `get` + `unwrap_or` to avoid
// `unsafe` and otherwise don't emit any panic-related code
Expand All @@ -1724,14 +1727,14 @@ impl<'a> Formatter<'a> {
let chars_count = s.chars().count();
// If we're under the maximum width, check if we're over the minimum
// width, if so it's as easy as just emitting the string.
if chars_count >= width {
if chars_count >= usize::from(width) {
self.buf.write_str(s)
}
// If we're under both the maximum and the minimum width, then fill
// up the minimum width with the specified string + some alignment.
else {
let align = Alignment::Left;
let post_padding = self.padding(width - chars_count, align)?;
let post_padding = self.padding(width - chars_count as u16, align)?;
self.buf.write_str(s)?;
post_padding.write(self)
}
Expand All @@ -1745,7 +1748,7 @@ impl<'a> Formatter<'a> {
/// thing that is being padded.
pub(crate) fn padding(
&mut self,
padding: usize,
padding: u16,
default: Alignment,
) -> result::Result<PostPadding, Error> {
let align = self.align().unwrap_or(default);
Expand Down Expand Up @@ -1785,19 +1788,19 @@ impl<'a> Formatter<'a> {

// remove the sign from the formatted parts
formatted.sign = "";
width = width.saturating_sub(sign.len());
width = width.saturating_sub(sign.len() as u16);
self.options.fill = '0';
self.options.align = Some(Alignment::Right);
}

// remaining parts go through the ordinary padding process.
let len = formatted.len();
let ret = if width <= len {
let ret = if usize::from(width) <= len {
// no padding
// SAFETY: Per the precondition.
unsafe { self.write_formatted_parts(&formatted) }
} else {
let post_padding = self.padding(width - len, Alignment::Right)?;
let post_padding = self.padding(width - len as u16, Alignment::Right)?;
// SAFETY: Per the precondition.
unsafe {
self.write_formatted_parts(&formatted)?;
Expand Down Expand Up @@ -2029,7 +2032,7 @@ impl<'a> Formatter<'a> {
#[must_use]
#[stable(feature = "fmt_flags", since = "1.5.0")]
pub fn width(&self) -> Option<usize> {
self.options.width
self.options.width.map(|x| x as usize)
}

/// Returns the optionally specified precision for numeric types.
Expand Down Expand Up @@ -2060,7 +2063,7 @@ impl<'a> Formatter<'a> {
#[must_use]
#[stable(feature = "fmt_flags", since = "1.5.0")]
pub fn precision(&self) -> Option<usize> {
self.options.precision
self.options.precision.map(|x| x as usize)
}

/// Determines if the `+` flag was specified.
Expand Down Expand Up @@ -2800,7 +2803,7 @@ pub(crate) fn pointer_fmt_inner(ptr_addr: usize, f: &mut Formatter<'_>) -> Resul
f.options.flags |= 1 << (rt::Flag::SignAwareZeroPad as u32);

if f.options.width.is_none() {
f.options.width = Some((usize::BITS / 4) as usize + 2);
f.options.width = Some((usize::BITS / 4) as u16 + 2);
}
}
f.options.flags |= 1 << (rt::Flag::Alternate as u32);
Expand Down
13 changes: 10 additions & 3 deletions library/core/src/fmt/rt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,11 @@ pub enum Alignment {
#[derive(Copy, Clone)]
pub enum Count {
/// Specified with a literal number, stores the value
#[cfg(bootstrap)]
Is(usize),
/// Specified with a literal number, stores the value
#[cfg(not(bootstrap))]
Is(u16),
/// Specified using `$` and `*` syntaxes, stores the index into `args`
Param(usize),
/// Not specified
Expand All @@ -74,7 +78,7 @@ enum ArgumentType<'a> {
formatter: unsafe fn(NonNull<()>, &mut Formatter<'_>) -> Result,
_lifetime: PhantomData<&'a ()>,
},
Count(usize),
Count(u16),
}

/// This struct represents a generic "argument" which is taken by format_args!().
Expand Down Expand Up @@ -151,7 +155,10 @@ impl Argument<'_> {
}
#[inline]
pub const fn from_usize(x: &usize) -> Argument<'_> {
Argument { ty: ArgumentType::Count(*x) }
if *x > u16::MAX as usize {
panic!("Formatting argument out of range");
};
Argument { ty: ArgumentType::Count(*x as u16) }
}

/// Format this placeholder argument.
Expand Down Expand Up @@ -181,7 +188,7 @@ impl Argument<'_> {
}

#[inline]
pub(super) const fn as_usize(&self) -> Option<usize> {
pub(super) const fn as_u16(&self) -> Option<u16> {
match self.ty {
ArgumentType::Count(count) => Some(count),
ArgumentType::Placeholder { .. } => None,
Expand Down
Loading

0 comments on commit 7af7790

Please sign in to comment.