Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 68 additions & 44 deletions c2rust-transpile/src/translator/literals.rs
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,27 @@ impl<'c> Translation<'c> {
CTypeKind::ConstantArray(ty, n) => {
// Convert all of the provided initializer values

let to_array_element = |id: CExprId| -> TranslationResult<_> {
self.convert_expr(ctx.used(), id)?.result_map(|x| {
// Array literals require all of their elements to be
// the correct type; they will not use implicit casts to
// change mut to const. This becomes a problem when an
// array literal is used in a position where there is no
// type information available to force its type to the
// correct const or mut variation. To avoid this issue
// we manually insert the otherwise elided casts in this
// particular context.
if let CExprKind::ImplicitCast(ty, _, CastKind::ConstCast, _, _) =
self.ast_context[id].kind
{
let t = self.convert_type(ty.ctype)?;
Ok(mk().cast_expr(x, t))
} else {
Ok(x)
}
})
};

// We need to handle the 4 cases in `str_init.c` with identical initializers:
// * `ptr_extra_braces`
// * `array_extra_braces`
Expand All @@ -189,60 +210,63 @@ impl<'c> Translation<'c> {
// which is possible because C allows extra braces around any initializer element.
// For non-string literal elements, the clang AST already fixes this up,
// but doesn't for string literals, so we need to handle them specially.
// The existing logic below this special cases handles all except `array_extra_braces`.
// The existing logic below this special case handles all except `array_extra_braces`.
// `array_extra_braces` is uniquely identified by:
// * there being only one element in the initializer list
// * the element type of the array being `CTypeKind::Char` (w/o this, `array_of_arrays` is included)
// * the expr kind being a string literal (`CExprKind::Literal` of a `CLiteral::String`).
if let &[id] = ids {
let is_string_literal = |id: CExprId| {
let ty_kind = &self.ast_context.resolve_type(ty).kind;
let expr_kind = &self.ast_context.index(id).kind;
let is_char_array = matches!(*ty_kind, CTypeKind::Char);
let is_str_literal =
matches!(*expr_kind, CExprKind::Literal(_, CLiteral::String { .. }));
if is_char_array && is_str_literal {
return self.convert_expr(ctx.used(), id);
}
}
is_char_array && is_str_literal
};

if ids.is_empty() {
// this was likely a C array of the form `int x[16] = {}`,
// we'll emit that as [0; 16].
let len = mk().lit_expr(mk().int_unsuffixed_lit(n as u128));
self.implicit_default_expr(ty, ctx.is_static)?
.and_then(|default_value| {
Ok(WithStmts::new_val(mk().repeat_expr(default_value, len)))
})
} else {
Ok(ids
.iter()
.map(|id| {
self.convert_expr(ctx.used(), *id)?.result_map(|x| {
// Array literals require all of their elements to be
// the correct type; they will not use implicit casts to
// change mut to const. This becomes a problem when an
// array literal is used in a position where there is no
// type information available to force its type to the
// correct const or mut variation. To avoid this issue
// we manually insert the otherwise elided casts in this
// particular context.
if let CExprKind::ImplicitCast(ty, _, CastKind::ConstCast, _, _) =
self.ast_context[*id].kind
{
let t = self.convert_type(ty.ctype)?;
Ok(mk().cast_expr(x, t))
} else {
Ok(x)
}
})
})
.chain(
// Pad out the array literal with default values to the desired size
iter::repeat(self.implicit_default_expr(ty, ctx.is_static))
.take(n - ids.len()),
)
.collect::<TranslationResult<WithStmts<_>>>()?
.map(|vals| mk().array_expr(vals)))
let is_zero_literal = |id: CExprId| {
matches!(
self.ast_context.index(id).kind,
CExprKind::Literal(_, CLiteral::Integer(0, _base))
)
};

match ids {
[] => {
// This was likely a C array of the form `int x[16] = {}`.
// We'll emit that as [0; 16].
let len = mk().lit_expr(mk().int_unsuffixed_lit(n as u128));
let zeroed = self.implicit_default_expr(ty, ctx.is_static)?;
Ok(zeroed.map(|default_value| mk().repeat_expr(default_value, len)))
}
&[single] if is_string_literal(single) => {
// See comment on `is_string_literal`.
// This detects these cases from `str_init.c`:
// * `ptr_extra_braces`
// * `array_of_ptrs`
// * `array_of_arrays`
self.convert_expr(ctx.used(), single)
}
&[single] if is_zero_literal(single) && n > 1 => {
// This was likely a C array of the form `int x[16] = { 0 }`.
// We'll emit that as [0; 16].
let len = mk().lit_expr(mk().int_unsuffixed_lit(n as u128));
Ok(to_array_element(single)?
.map(|default_value| mk().repeat_expr(default_value, len)))
}
[..] => {
Ok(ids
.iter()
.copied()
.map(to_array_element)
.chain(
// Pad out the array literal with default values to the desired size
iter::repeat(self.implicit_default_expr(ty, ctx.is_static))
.take(n - ids.len()),
)
.collect::<TranslationResult<WithStmts<_>>>()?
.map(|vals| mk().array_expr(vals)))
}
}
}
CTypeKind::Struct(struct_id) => {
Expand Down
65 changes: 65 additions & 0 deletions c2rust-transpile/tests/snapshots/arrays.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#include <stdlib.h>

static char simple[] = "mystring";
static char *foo = "mystring";

void entry(void) {
int arr[1][1] = { 1 };
arr[0][0] += 9;

int arr2[16] = {};
arr2[15] += 9;

struct {char* x; int y;} arr3[1] = {};
arr3[0].y += 9;

int arr4[16] = {0};
arr4[15] += 9;

struct {short; int y;} arr5[1] = { { 1, 2 } };
arr5[0].y += 9;

// excess elements
int arr6[2] = { 1, 2, 3 };
int arr7[0] = { 1234 };

char abc[] = "abc";

char def[] = {'d','e','f'};

char part[2] = {1};

char *abcptr = "abc";

char init[] = {"abcd"};

char too_long[3] = "abcde";

char too_short[20] = "abc";

// TODO re-enable after #1266 adds portable support for translating `wchar_t`.
#if 0
wchar_t wide1[] = L"x";

wchar_t wide2[3] = L"x";

wchar_t wide3[1] = L"xy";
#endif

// Test that we can get the address of the element past the end of the array
char *past_end = &simple[sizeof(simple)];
past_end = &foo[8];
}

void short_initializer() {
int empty_brackets[16] = {};
int brackets_with_zero[16] = {0};
int brackets_with_one[4] = {1};

// excess elements
int excess_elements_1[2] = { 1, 2, 3 };
int excess_elements_2[0] = { 1234 };

struct {short x; int y;} single_struct[1] = { { 1, 2 } };
struct {short x; int y;} many_struct[3] = { { 1, 2 } };
}
129 changes: 129 additions & 0 deletions c2rust-transpile/tests/snapshots/[email protected]
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
---
source: c2rust-transpile/tests/snapshots.rs
expression: cat tests/snapshots/arrays.rs
input_file: c2rust-transpile/tests/snapshots/arrays.c
---
#![allow(
dead_code,
mutable_transmutes,
non_camel_case_types,
non_snake_case,
non_upper_case_globals,
unused_assignments,
unused_mut
)]
#[derive(Copy, Clone)]
#[repr(C)]
pub struct C2RustUnnamed {
pub y: std::ffi::c_int,
}
#[derive(Copy, Clone)]
#[repr(C)]
pub struct C2RustUnnamed_0 {
pub x: *mut std::ffi::c_char,
pub y: std::ffi::c_int,
}
#[derive(Copy, Clone)]
#[repr(C)]
pub struct C2RustUnnamed_1 {
pub x: std::ffi::c_short,
pub y: std::ffi::c_int,
}
#[derive(Copy, Clone)]
#[repr(C)]
pub struct C2RustUnnamed_2 {
pub x: std::ffi::c_short,
pub y: std::ffi::c_int,
}
static mut simple: [std::ffi::c_char; 9] = unsafe {
*::core::mem::transmute::<&[u8; 9], &mut [std::ffi::c_char; 9]>(b"mystring\0")
};
static mut foo: *mut std::ffi::c_char = b"mystring\0" as *const u8
as *const std::ffi::c_char as *mut std::ffi::c_char;
#[no_mangle]
pub unsafe extern "C" fn entry() {
let mut arr: [[std::ffi::c_int; 1]; 1] = [[1 as std::ffi::c_int]];
arr[0 as std::ffi::c_int as usize][0 as std::ffi::c_int as usize]
+= 9 as std::ffi::c_int;
let mut arr2: [std::ffi::c_int; 16] = [0; 16];
arr2[15 as std::ffi::c_int as usize] += 9 as std::ffi::c_int;
let mut arr3: [C2RustUnnamed_0; 1] = [C2RustUnnamed_0 {
x: 0 as *mut std::ffi::c_char,
y: 0,
}; 1];
arr3[0 as std::ffi::c_int as usize].y += 9 as std::ffi::c_int;
let mut arr4: [std::ffi::c_int; 16] = [0 as std::ffi::c_int; 16];
arr4[15 as std::ffi::c_int as usize] += 9 as std::ffi::c_int;
let mut arr5: [C2RustUnnamed; 1] = [
{
let mut init = C2RustUnnamed {
y: 1 as std::ffi::c_int,
};
init
},
];
arr5[0 as std::ffi::c_int as usize].y += 9 as std::ffi::c_int;
let mut arr6: [std::ffi::c_int; 2] = [1 as std::ffi::c_int, 2 as std::ffi::c_int];
let mut arr7: [std::ffi::c_int; 0] = [0; 0];
let mut abc: [std::ffi::c_char; 4] = *::core::mem::transmute::<
&[u8; 4],
&mut [std::ffi::c_char; 4],
>(b"abc\0");
let mut def: [std::ffi::c_char; 3] = [
'd' as i32 as std::ffi::c_char,
'e' as i32 as std::ffi::c_char,
'f' as i32 as std::ffi::c_char,
];
let mut part: [std::ffi::c_char; 2] = [1 as std::ffi::c_int as std::ffi::c_char, 0];
let mut abcptr: *mut std::ffi::c_char = b"abc\0" as *const u8
as *const std::ffi::c_char as *mut std::ffi::c_char;
let mut init: [std::ffi::c_char; 5] = *::core::mem::transmute::<
&[u8; 5],
&mut [std::ffi::c_char; 5],
>(b"abcd\0");
let mut too_long: [std::ffi::c_char; 3] = *::core::mem::transmute::<
&[u8; 3],
&mut [std::ffi::c_char; 3],
>(b"abc");
let mut too_short: [std::ffi::c_char; 20] = *::core::mem::transmute::<
&[u8; 20],
&mut [std::ffi::c_char; 20],
>(b"abc\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0");
let mut past_end: *mut std::ffi::c_char = &mut *simple
.as_mut_ptr()
.offset(
::core::mem::size_of::<[std::ffi::c_char; 9]>() as std::ffi::c_ulong as isize,
) as *mut std::ffi::c_char;
past_end = &mut *foo.offset(8 as std::ffi::c_int as isize) as *mut std::ffi::c_char;
}
#[no_mangle]
pub unsafe extern "C" fn short_initializer() {
let mut empty_brackets: [std::ffi::c_int; 16] = [0; 16];
let mut brackets_with_zero: [std::ffi::c_int; 16] = [0 as std::ffi::c_int; 16];
let mut brackets_with_one: [std::ffi::c_int; 4] = [1 as std::ffi::c_int, 0, 0, 0];
let mut excess_elements_1: [std::ffi::c_int; 2] = [
1 as std::ffi::c_int,
2 as std::ffi::c_int,
];
let mut excess_elements_2: [std::ffi::c_int; 0] = [0; 0];
let mut single_struct: [C2RustUnnamed_2; 1] = [
{
let mut init = C2RustUnnamed_2 {
x: 1 as std::ffi::c_int as std::ffi::c_short,
y: 2 as std::ffi::c_int,
};
init
},
];
let mut many_struct: [C2RustUnnamed_1; 3] = [
{
let mut init = C2RustUnnamed_1 {
x: 1 as std::ffi::c_int as std::ffi::c_short,
y: 2 as std::ffi::c_int,
};
init
},
C2RustUnnamed_1 { x: 0, y: 0 },
C2RustUnnamed_1 { x: 0, y: 0 },
];
Comment on lines +118 to +128
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in more recent versions this could be e.g.

let mut many_struct: [C2RustUnnamed_1; 3] = const { 
    let mut base = [C2RustUnnamed_1 { x: 0, y: 0 }; 3];
    base[0] = C2RustUnnamed_1 {
        x: 1 as std::ffi::c_int as std::ffi::c_short,
        y: 2 as std::ffi::c_int,
    };

    base
};

but idk, it's extra work and may not actually come up in practice.

Copy link
Contributor

@kkysen kkysen Jul 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, this is definitely cleaner (of course, like you, I'm not sure how worth it it is). How recently was support for this added, though? I know the const {} blocks are new, but you don't need that for this, right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

True, you just have to name the const item, but I guess if you nest it in a block the name can never conflict?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here in normal scope, nothing should need to be const. In const scope (e.g. a const or static item), then it'll need to be const, but it shouldn't need the explicit const {, just a normal {, since it already knows to evaluate it in const mode.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh yeah, if the value is Copy, which it would be here, then const isn't even needed.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, true, although everything from C should be Copy.

}