-
-
Notifications
You must be signed in to change notification settings - Fork 14.2k
Closed
Description
There is a performance regression with shorthand array initialization that causes temporary stack allocation and copy. It appears to have been introduced between Rust 1.11.0 and 1.12.0 and exists all the way up to current beta.
(All examples henceforth are with -C opt-level=3)
Example:
pub struct BigTest {
arr: [u32; 128]
}
impl BigTest {
pub fn new() -> BigTest {
BigTest {
arr: [123; 128],
}
}
}
pub fn test() -> BigTest {
BigTest::new()
}It appears that in this case, a temporary is allocated on stack, initialized and then copied.
This could cause performance issues with large array initialization.
.LCPI0_0:
.long 123
.long 123
.long 123
.long 123
example::BigTest::new:
push rbx
sub rsp, 512
mov rbx, rdi
movaps xmm0, xmmword ptr [rip + .LCPI0_0]
movaps xmmword ptr [rsp], xmm0
movaps xmmword ptr [rsp + 16], xmm0
movaps xmmword ptr [rsp + 32], xmm0
movaps xmmword ptr [rsp + 48], xmm0
movaps xmmword ptr [rsp + 64], xmm0
movaps xmmword ptr [rsp + 80], xmm0
movaps xmmword ptr [rsp + 96], xmm0
movaps xmmword ptr [rsp + 112], xmm0
movaps xmmword ptr [rsp + 128], xmm0
movaps xmmword ptr [rsp + 144], xmm0
movaps xmmword ptr [rsp + 160], xmm0
movaps xmmword ptr [rsp + 176], xmm0
movaps xmmword ptr [rsp + 192], xmm0
movaps xmmword ptr [rsp + 208], xmm0
movaps xmmword ptr [rsp + 224], xmm0
movaps xmmword ptr [rsp + 240], xmm0
movaps xmmword ptr [rsp + 256], xmm0
movaps xmmword ptr [rsp + 272], xmm0
movaps xmmword ptr [rsp + 288], xmm0
movaps xmmword ptr [rsp + 304], xmm0
movaps xmmword ptr [rsp + 320], xmm0
movaps xmmword ptr [rsp + 336], xmm0
movaps xmmword ptr [rsp + 352], xmm0
movaps xmmword ptr [rsp + 368], xmm0
movaps xmmword ptr [rsp + 384], xmm0
movaps xmmword ptr [rsp + 400], xmm0
movaps xmmword ptr [rsp + 416], xmm0
movaps xmmword ptr [rsp + 432], xmm0
movaps xmmword ptr [rsp + 448], xmm0
movaps xmmword ptr [rsp + 464], xmm0
movaps xmmword ptr [rsp + 480], xmm0
movaps xmmword ptr [rsp + 496], xmm0
mov rsi, rsp
mov edx, 512
call qword ptr [rip + memcpy@GOTPCREL]
mov rax, rbx
add rsp, 512
pop rbx
ret
example::test:
push rbx
mov rbx, rdi
call qword ptr [rip + example::BigTest::new@GOTPCREL]
mov rax, rbx
pop rbx
retWithout shorthand, there is no temporary allocation:
pub struct BigTest {
arr: [u32; 128]
}
impl BigTest {
pub fn new() -> BigTest {
BigTest {
arr: [
123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,
123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,
123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,
123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,
123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,
123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,
123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,
123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,123,
],
}
}
}
pub fn test() -> BigTest {
BigTest::new()
}.LCPI0_0:
.long 123
.long 123
.long 123
.long 123
example::BigTest::new:
mov rax, rdi
movaps xmm0, xmmword ptr [rip + .LCPI0_0]
movups xmmword ptr [rdi], xmm0
movups xmmword ptr [rdi + 16], xmm0
movups xmmword ptr [rdi + 32], xmm0
movups xmmword ptr [rdi + 48], xmm0
movups xmmword ptr [rdi + 64], xmm0
movups xmmword ptr [rdi + 80], xmm0
movups xmmword ptr [rdi + 96], xmm0
movups xmmword ptr [rdi + 112], xmm0
movups xmmword ptr [rdi + 128], xmm0
movups xmmword ptr [rdi + 144], xmm0
movups xmmword ptr [rdi + 160], xmm0
movups xmmword ptr [rdi + 176], xmm0
movups xmmword ptr [rdi + 192], xmm0
movups xmmword ptr [rdi + 208], xmm0
movups xmmword ptr [rdi + 224], xmm0
movups xmmword ptr [rdi + 240], xmm0
movups xmmword ptr [rdi + 256], xmm0
movups xmmword ptr [rdi + 272], xmm0
movups xmmword ptr [rdi + 288], xmm0
movups xmmword ptr [rdi + 304], xmm0
movups xmmword ptr [rdi + 320], xmm0
movups xmmword ptr [rdi + 336], xmm0
movups xmmword ptr [rdi + 352], xmm0
movups xmmword ptr [rdi + 368], xmm0
movups xmmword ptr [rdi + 384], xmm0
movups xmmword ptr [rdi + 400], xmm0
movups xmmword ptr [rdi + 416], xmm0
movups xmmword ptr [rdi + 432], xmm0
movups xmmword ptr [rdi + 448], xmm0
movups xmmword ptr [rdi + 464], xmm0
movups xmmword ptr [rdi + 480], xmm0
movups xmmword ptr [rdi + 496], xmm0
ret
example::test:
push rbx
mov rbx, rdi
call qword ptr [rip + example::BigTest::new@GOTPCREL]
mov rax, rbx
pop rbx
retRust 1.11.0 when using shorthand, there is no extra allocation:
.LCPI0_0:
.long 123
.long 123
.long 123
.long 123
example::BigTest::new:
push rbp
mov rbp, rsp
movaps xmm0, xmmword ptr [rip + .LCPI0_0]
movups xmmword ptr [rdi], xmm0
movups xmmword ptr [rdi + 16], xmm0
movups xmmword ptr [rdi + 32], xmm0
movups xmmword ptr [rdi + 48], xmm0
movups xmmword ptr [rdi + 64], xmm0
movups xmmword ptr [rdi + 80], xmm0
movups xmmword ptr [rdi + 96], xmm0
movups xmmword ptr [rdi + 112], xmm0
movups xmmword ptr [rdi + 128], xmm0
movups xmmword ptr [rdi + 144], xmm0
movups xmmword ptr [rdi + 160], xmm0
movups xmmword ptr [rdi + 176], xmm0
movups xmmword ptr [rdi + 192], xmm0
movups xmmword ptr [rdi + 208], xmm0
movups xmmword ptr [rdi + 224], xmm0
movups xmmword ptr [rdi + 240], xmm0
movups xmmword ptr [rdi + 256], xmm0
movups xmmword ptr [rdi + 272], xmm0
movups xmmword ptr [rdi + 288], xmm0
movups xmmword ptr [rdi + 304], xmm0
movups xmmword ptr [rdi + 320], xmm0
movups xmmword ptr [rdi + 336], xmm0
movups xmmword ptr [rdi + 352], xmm0
movups xmmword ptr [rdi + 368], xmm0
movups xmmword ptr [rdi + 384], xmm0
movups xmmword ptr [rdi + 400], xmm0
movups xmmword ptr [rdi + 416], xmm0
movups xmmword ptr [rdi + 432], xmm0
movups xmmword ptr [rdi + 448], xmm0
movups xmmword ptr [rdi + 464], xmm0
movups xmmword ptr [rdi + 480], xmm0
movups xmmword ptr [rdi + 496], xmm0
mov rax, rdi
pop rbp
ret
.LCPI1_0:
.long 123
.long 123
.long 123
.long 123
example::test:
push rbp
mov rbp, rsp
movaps xmm0, xmmword ptr [rip + .LCPI1_0]
movups xmmword ptr [rdi], xmm0
movups xmmword ptr [rdi + 16], xmm0
movups xmmword ptr [rdi + 32], xmm0
movups xmmword ptr [rdi + 48], xmm0
movups xmmword ptr [rdi + 64], xmm0
movups xmmword ptr [rdi + 80], xmm0
movups xmmword ptr [rdi + 96], xmm0
movups xmmword ptr [rdi + 112], xmm0
movups xmmword ptr [rdi + 128], xmm0
movups xmmword ptr [rdi + 144], xmm0
movups xmmword ptr [rdi + 160], xmm0
movups xmmword ptr [rdi + 176], xmm0
movups xmmword ptr [rdi + 192], xmm0
movups xmmword ptr [rdi + 208], xmm0
movups xmmword ptr [rdi + 224], xmm0
movups xmmword ptr [rdi + 240], xmm0
movups xmmword ptr [rdi + 256], xmm0
movups xmmword ptr [rdi + 272], xmm0
movups xmmword ptr [rdi + 288], xmm0
movups xmmword ptr [rdi + 304], xmm0
movups xmmword ptr [rdi + 320], xmm0
movups xmmword ptr [rdi + 336], xmm0
movups xmmword ptr [rdi + 352], xmm0
movups xmmword ptr [rdi + 368], xmm0
movups xmmword ptr [rdi + 384], xmm0
movups xmmword ptr [rdi + 400], xmm0
movups xmmword ptr [rdi + 416], xmm0
movups xmmword ptr [rdi + 432], xmm0
movups xmmword ptr [rdi + 448], xmm0
movups xmmword ptr [rdi + 464], xmm0
movups xmmword ptr [rdi + 480], xmm0
movups xmmword ptr [rdi + 496], xmm0
mov rax, rdi
pop rbp
retMetadata
Metadata
Assignees
Labels
No labels