Commit 932c736
committed
Auto merge of rust-lang#48057 - scottmcm:less-match-more-compare, r=dtolnay
Simplify RangeInclusive::next[_back]
`match`ing on an `Option<Ordering>` seems cause some confusion for LLVM; switching to just using comparison operators removes a few jumps from the simple `for` loops I was trying.
cc rust-lang#45222 rust-lang#28237 (comment)
Example:
```rust
#[no_mangle]
pub fn coresum(x: std::ops::RangeInclusive<u64>) -> u64 {
let mut sum = 0;
for i in x {
sum += i ^ (i-1);
}
sum
}
```
Today:
```asm
coresum:
xor r8d, r8d
mov r9, -1
xor eax, eax
jmp .LBB0_1
.LBB0_4:
lea rcx, [rdi - 1]
xor rcx, rdi
add rax, rcx
mov rsi, rdx
mov rdi, r10
.LBB0_1:
cmp rdi, rsi
mov ecx, 1
cmovb rcx, r9
cmove rcx, r8
test rcx, rcx
mov edx, 0
mov r10d, 1
je .LBB0_4 // 1
cmp rcx, -1
jne .LBB0_5 // 2
lea r10, [rdi + 1]
mov rdx, rsi
jmp .LBB0_4 // 3
.LBB0_5:
ret
```
With this PR:
```asm
coresum:
cmp rcx, rdx
jbe .LBB0_2
xor eax, eax
ret
.LBB0_2:
xor r8d, r8d
mov r9d, 1
xor eax, eax
.p2align 4, 0x90
.LBB0_3:
lea r10, [rcx + 1]
cmp rcx, rdx
cmovae rdx, r8
cmovae r10, r9
lea r11, [rcx - 1]
xor r11, rcx
add rax, r11
mov rcx, r10
cmp r10, rdx
jbe .LBB0_3 // Just this
ret
```
<details><summary>Though using internal iteration (`.map(|i| i ^ (i-1)).sum()`) is still shorter to type, and lets the compiler unroll it</summary>
```asm
coresum_inner:
.Lcfi0:
.seh_proc coresum_inner
sub rsp, 168
.Lcfi1:
.seh_stackalloc 168
vmovdqa xmmword ptr [rsp + 144], xmm15
.Lcfi2:
.seh_savexmm 15, 144
vmovdqa xmmword ptr [rsp + 128], xmm14
.Lcfi3:
.seh_savexmm 14, 128
vmovdqa xmmword ptr [rsp + 112], xmm13
.Lcfi4:
.seh_savexmm 13, 112
vmovdqa xmmword ptr [rsp + 96], xmm12
.Lcfi5:
.seh_savexmm 12, 96
vmovdqa xmmword ptr [rsp + 80], xmm11
.Lcfi6:
.seh_savexmm 11, 80
vmovdqa xmmword ptr [rsp + 64], xmm10
.Lcfi7:
.seh_savexmm 10, 64
vmovdqa xmmword ptr [rsp + 48], xmm9
.Lcfi8:
.seh_savexmm 9, 48
vmovdqa xmmword ptr [rsp + 32], xmm8
.Lcfi9:
.seh_savexmm 8, 32
vmovdqa xmmword ptr [rsp + 16], xmm7
.Lcfi10:
.seh_savexmm 7, 16
vmovdqa xmmword ptr [rsp], xmm6
.Lcfi11:
.seh_savexmm 6, 0
.Lcfi12:
.seh_endprologue
cmp rdx, rcx
jae .LBB1_2
xor eax, eax
jmp .LBB1_13
.LBB1_2:
mov r8, rdx
sub r8, rcx
jbe .LBB1_3
cmp r8, 7
jbe .LBB1_5
mov rax, r8
and rax, -8
mov r9, r8
and r9, -8
je .LBB1_5
add rax, rcx
vmovq xmm0, rcx
vpshufd xmm0, xmm0, 68
mov ecx, 1
vmovq xmm1, rcx
vpslldq xmm1, xmm1, 8
vpaddq xmm1, xmm0, xmm1
vpxor xmm0, xmm0, xmm0
vpcmpeqd xmm11, xmm11, xmm11
vmovdqa xmm12, xmmword ptr [rip + __xmm@00000000000000010000000000000001]
vmovdqa xmm13, xmmword ptr [rip + __xmm@00000000000000030000000000000003]
vmovdqa xmm14, xmmword ptr [rip + __xmm@00000000000000050000000000000005]
vmovdqa xmm15, xmmword ptr [rip + __xmm@00000000000000080000000000000008]
mov rcx, r9
vpxor xmm4, xmm4, xmm4
vpxor xmm5, xmm5, xmm5
vpxor xmm6, xmm6, xmm6
.p2align 4, 0x90
.LBB1_9:
vpaddq xmm7, xmm1, xmmword ptr [rip + __xmm@00000000000000020000000000000002]
vpaddq xmm9, xmm1, xmmword ptr [rip + __xmm@00000000000000040000000000000004]
vpaddq xmm10, xmm1, xmmword ptr [rip + __xmm@00000000000000060000000000000006]
vpaddq xmm8, xmm1, xmm12
vpxor xmm7, xmm8, xmm7
vpaddq xmm2, xmm1, xmm13
vpxor xmm8, xmm2, xmm9
vpaddq xmm3, xmm1, xmm14
vpxor xmm3, xmm3, xmm10
vpaddq xmm2, xmm1, xmm11
vpxor xmm2, xmm2, xmm1
vpaddq xmm0, xmm2, xmm0
vpaddq xmm4, xmm7, xmm4
vpaddq xmm5, xmm8, xmm5
vpaddq xmm6, xmm3, xmm6
vpaddq xmm1, xmm1, xmm15
add rcx, -8
jne .LBB1_9
vpaddq xmm0, xmm4, xmm0
vpaddq xmm0, xmm5, xmm0
vpaddq xmm0, xmm6, xmm0
vpshufd xmm1, xmm0, 78
vpaddq xmm0, xmm0, xmm1
vmovq r10, xmm0
cmp r8, r9
jne .LBB1_6
jmp .LBB1_11
.LBB1_3:
xor r10d, r10d
jmp .LBB1_12
.LBB1_5:
xor r10d, r10d
mov rax, rcx
.p2align 4, 0x90
.LBB1_6:
lea rcx, [rax - 1]
xor rcx, rax
inc rax
add r10, rcx
cmp rdx, rax
jne .LBB1_6
.LBB1_11:
mov rcx, rdx
.LBB1_12:
lea rax, [rcx - 1]
xor rax, rcx
add rax, r10
.LBB1_13:
vmovaps xmm6, xmmword ptr [rsp]
vmovaps xmm7, xmmword ptr [rsp + 16]
vmovaps xmm8, xmmword ptr [rsp + 32]
vmovaps xmm9, xmmword ptr [rsp + 48]
vmovaps xmm10, xmmword ptr [rsp + 64]
vmovaps xmm11, xmmword ptr [rsp + 80]
vmovaps xmm12, xmmword ptr [rsp + 96]
vmovaps xmm13, xmmword ptr [rsp + 112]
vmovaps xmm14, xmmword ptr [rsp + 128]
vmovaps xmm15, xmmword ptr [rsp + 144]
add rsp, 168
ret
.seh_handlerdata
.section .text,"xr",one_only,coresum_inner
.Lcfi13:
.seh_endproc
```
</details>2 files changed
+31
-16
lines changed| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
334 | 334 | | |
335 | 335 | | |
336 | 336 | | |
337 | | - | |
338 | | - | |
339 | | - | |
340 | | - | |
| 337 | + | |
| 338 | + | |
341 | 339 | | |
342 | 340 | | |
343 | | - | |
344 | | - | |
| 341 | + | |
345 | 342 | | |
346 | 343 | | |
347 | 344 | | |
348 | | - | |
349 | | - | |
| 345 | + | |
| 346 | + | |
| 347 | + | |
350 | 348 | | |
351 | 349 | | |
352 | 350 | | |
| |||
428 | 426 | | |
429 | 427 | | |
430 | 428 | | |
431 | | - | |
432 | | - | |
433 | | - | |
434 | | - | |
| 429 | + | |
| 430 | + | |
435 | 431 | | |
436 | 432 | | |
437 | | - | |
438 | | - | |
| 433 | + | |
439 | 434 | | |
440 | 435 | | |
441 | 436 | | |
442 | | - | |
443 | | - | |
| 437 | + | |
| 438 | + | |
| 439 | + | |
444 | 440 | | |
445 | 441 | | |
446 | 442 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
1332 | 1332 | | |
1333 | 1333 | | |
1334 | 1334 | | |
| 1335 | + | |
| 1336 | + | |
| 1337 | + | |
| 1338 | + | |
| 1339 | + | |
| 1340 | + | |
| 1341 | + | |
| 1342 | + | |
| 1343 | + | |
| 1344 | + | |
| 1345 | + | |
| 1346 | + | |
1335 | 1347 | | |
1336 | 1348 | | |
1337 | 1349 | | |
| |||
1340 | 1352 | | |
1341 | 1353 | | |
1342 | 1354 | | |
| 1355 | + | |
| 1356 | + | |
| 1357 | + | |
| 1358 | + | |
| 1359 | + | |
| 1360 | + | |
| 1361 | + | |
1343 | 1362 | | |
1344 | 1363 | | |
1345 | 1364 | | |
| |||
0 commit comments