Skip to content

Commit 33f0078

Browse files
committed
[EVM][ISel] Improve BYTE instruction selection
This replaces 'AND (SRL imm, v), 0xFF' -> 'BYTE (31 - imm / 8), v', where imm % 8 == 0, and imm / 8 < 32. Fixes issue #824.
1 parent 6877782 commit 33f0078

File tree

4 files changed

+163
-55
lines changed

4 files changed

+163
-55
lines changed

llvm/lib/Target/EVM/EVMInstrInfo.td

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,18 @@ def negate_imm : SDNodeXForm<imm, [{
125125
return CurDAG->getTargetConstant(-neg, SDLoc(N), MVT::i256);
126126
}]>;
127127

128+
// Transform that maps the shift amount to BYTE‘s index (31 - Val / 8).
129+
def byte_shift : SDNodeXForm<imm, [{
130+
uint64_t Val = cast<ConstantSDNode>(N)->getZExtValue();
131+
uint64_t ByteIdx = Val / 8;
132+
return CurDAG->getTargetConstant(31 - ByteIdx, SDLoc(N), MVT::i256);
133+
}]>;
134+
135+
// Immediate predicate: imm divisible by 8 and within [0, 31 * 8].
136+
def byte_shift_imm : PatLeaf<(imm), [{
137+
uint64_t Val = N->getZExtValue();
138+
return (Val % 8) == 0 && Val <= 31 * 8;
139+
}]>;
128140

129141
//===----------------------------------------------------------------------===//
130142
// Pattern fragments for memory instructions.
@@ -351,6 +363,24 @@ defm BYTE
351363
[(set GPR:$dst, (int_evm_byte GPR:$idx, GPR:$val))],
352364
"BYTE", "$dst, $idx, $val", 0x1a, 3>;
353365

366+
def : Pat<(and (srl i256:$val, byte_shift_imm:$sh), (i256 255)),
367+
(BYTE (CONST_I256 (byte_shift byte_shift_imm:$sh)), $val)>;
368+
369+
// When the shift amount is 248, the node 'AND $val, 255' gets optimized away
370+
// in the lowered SelectionDAG, preventing the above pattern from matching
371+
// which results to the following instructions:
372+
//
373+
// PUSH1 0xF8
374+
// SHR
375+
//
376+
// However, using the BYTE instruction instead is often both cheaper in gas
377+
// and uses one less byte:
378+
//
379+
// PUSH0
380+
// BYTE
381+
//
382+
// So we explicitly match this case with a separate pattern:
383+
def : Pat<(srl i256:$val, (i256 248)), (BYTE (CONST_I256 0), $val)>;
354384

355385
//===----------------------------------------------------------------------===//
356386
// EVM shift instructions.

llvm/test/CodeGen/EVM/bitmanipulation-intrinsics.ll

Lines changed: 39 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ define i256 @bitreversetest(i256 %v) {
1515
; CHECK: ; %bb.0:
1616
; CHECK-NEXT: JUMPDEST
1717
; CHECK-NEXT: DUP1
18-
; CHECK-NEXT: PUSH1 0xF8
19-
; CHECK-NEXT: SHR
18+
; CHECK-NEXT: PUSH0
19+
; CHECK-NEXT: BYTE
2020
; CHECK-NEXT: PUSH2 0xFF00
2121
; CHECK-NEXT: DUP3
2222
; CHECK-NEXT: PUSH1 0xE8
@@ -280,8 +280,8 @@ define i256 @bswaptest(i256 %v) {
280280
; CHECK: ; %bb.0:
281281
; CHECK-NEXT: JUMPDEST
282282
; CHECK-NEXT: DUP1
283-
; CHECK-NEXT: PUSH1 0xF8
284-
; CHECK-NEXT: SHR
283+
; CHECK-NEXT: PUSH0
284+
; CHECK-NEXT: BYTE
285285
; CHECK-NEXT: PUSH2 0xFF00
286286
; CHECK-NEXT: DUP3
287287
; CHECK-NEXT: PUSH1 0xE8
@@ -511,17 +511,15 @@ define i256 @ctpoptest(i256 %v) {
511511
; CHECK-LABEL: ctpoptest:
512512
; CHECK: ; %bb.0:
513513
; CHECK-NEXT: JUMPDEST
514-
; CHECK-NEXT: PUSH1 0xFF
515514
; CHECK-NEXT: PUSH32 0x101010101010101010101010101010101010101010101010101010101010101
516515
; CHECK-NEXT: PUSH16 0xF0F0F0F0F0F0F0F0F0F0F0F0F0F0F0F
517-
; CHECK-NEXT: DUP3
518-
; CHECK-NEXT: DUP3
519-
; CHECK-NEXT: DUP3
516+
; CHECK-NEXT: DUP2
517+
; CHECK-NEXT: DUP2
520518
; CHECK-NEXT: PUSH32 0x3333333333333333333333333333333333333333333333333333333333333333
521-
; CHECK-NEXT: DUP8
519+
; CHECK-NEXT: DUP6
522520
; CHECK-NEXT: PUSH16 0x55555555555555555555555555555555
523521
; CHECK-NEXT: DUP1
524-
; CHECK-NEXT: SWAP10
522+
; CHECK-NEXT: SWAP8
525523
; CHECK-NEXT: PUSH1 0x81
526524
; CHECK-NEXT: SHR
527525
; CHECK-NEXT: AND
@@ -538,7 +536,7 @@ define i256 @ctpoptest(i256 %v) {
538536
; CHECK-NEXT: SWAP2
539537
; CHECK-NEXT: AND
540538
; CHECK-NEXT: ADD
541-
; CHECK-NEXT: SWAP9
539+
; CHECK-NEXT: SWAP7
542540
; CHECK-NEXT: DUP2
543541
; CHECK-NEXT: PUSH1 0x1
544542
; CHECK-NEXT: SHR
@@ -556,26 +554,24 @@ define i256 @ctpoptest(i256 %v) {
556554
; CHECK-NEXT: SWAP2
557555
; CHECK-NEXT: AND
558556
; CHECK-NEXT: ADD
559-
; CHECK-NEXT: SWAP7
557+
; CHECK-NEXT: SWAP5
560558
; CHECK-NEXT: DUP1
561559
; CHECK-NEXT: PUSH1 0x4
562560
; CHECK-NEXT: SHR
563561
; CHECK-NEXT: ADD
564562
; CHECK-NEXT: AND
565563
; CHECK-NEXT: MUL
566-
; CHECK-NEXT: PUSH1 0x78
567-
; CHECK-NEXT: SHR
568-
; CHECK-NEXT: AND
569-
; CHECK-NEXT: SWAP4
564+
; CHECK-NEXT: PUSH1 0x10
565+
; CHECK-NEXT: BYTE
566+
; CHECK-NEXT: SWAP3
570567
; CHECK-NEXT: DUP1
571568
; CHECK-NEXT: PUSH1 0x4
572569
; CHECK-NEXT: SHR
573570
; CHECK-NEXT: ADD
574571
; CHECK-NEXT: AND
575572
; CHECK-NEXT: MUL
576-
; CHECK-NEXT: PUSH1 0x78
577-
; CHECK-NEXT: SHR
578-
; CHECK-NEXT: AND
573+
; CHECK-NEXT: PUSH1 0x10
574+
; CHECK-NEXT: BYTE
579575
; CHECK-NEXT: ADD
580576
; CHECK-NEXT: SWAP1
581577
; CHECK-NEXT: JUMP
@@ -588,16 +584,14 @@ define i256 @ctlztest(i256 %v) {
588584
; CHECK-LABEL: ctlztest:
589585
; CHECK: ; %bb.0:
590586
; CHECK-NEXT: JUMPDEST
591-
; CHECK-NEXT: PUSH1 0xFF
592587
; CHECK-NEXT: PUSH32 0x101010101010101010101010101010101010101010101010101010101010101
593588
; CHECK-NEXT: PUSH16 0xF0F0F0F0F0F0F0F0F0F0F0F0F0F0F0F
594-
; CHECK-NEXT: DUP3
595-
; CHECK-NEXT: DUP3
596-
; CHECK-NEXT: DUP3
589+
; CHECK-NEXT: DUP2
590+
; CHECK-NEXT: DUP2
597591
; CHECK-NEXT: PUSH32 0x3333333333333333333333333333333333333333333333333333333333333333
598-
; CHECK-NEXT: DUP8
592+
; CHECK-NEXT: DUP6
599593
; CHECK-NEXT: PUSH16 0x55555555555555555555555555555555
600-
; CHECK-NEXT: SWAP9
594+
; CHECK-NEXT: SWAP7
601595
; CHECK-NEXT: PUSH1 0x1
602596
; CHECK-NEXT: SHR
603597
; CHECK-NEXT: OR
@@ -631,7 +625,7 @@ define i256 @ctlztest(i256 %v) {
631625
; CHECK-NEXT: OR
632626
; CHECK-NEXT: NOT
633627
; CHECK-NEXT: PUSH16 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
634-
; CHECK-NEXT: DUP10
628+
; CHECK-NEXT: DUP8
635629
; CHECK-NEXT: DUP3
636630
; CHECK-NEXT: PUSH1 0x81
637631
; CHECK-NEXT: SHR
@@ -649,7 +643,7 @@ define i256 @ctlztest(i256 %v) {
649643
; CHECK-NEXT: SWAP2
650644
; CHECK-NEXT: AND
651645
; CHECK-NEXT: ADD
652-
; CHECK-NEXT: SWAP10
646+
; CHECK-NEXT: SWAP8
653647
; CHECK-NEXT: DUP3
654648
; CHECK-NEXT: PUSH1 0x1
655649
; CHECK-NEXT: SHR
@@ -666,26 +660,24 @@ define i256 @ctlztest(i256 %v) {
666660
; CHECK-NEXT: SWAP2
667661
; CHECK-NEXT: AND
668662
; CHECK-NEXT: ADD
669-
; CHECK-NEXT: SWAP7
663+
; CHECK-NEXT: SWAP5
670664
; CHECK-NEXT: DUP1
671665
; CHECK-NEXT: PUSH1 0x4
672666
; CHECK-NEXT: SHR
673667
; CHECK-NEXT: ADD
674668
; CHECK-NEXT: AND
675669
; CHECK-NEXT: MUL
676-
; CHECK-NEXT: PUSH1 0x78
677-
; CHECK-NEXT: SHR
678-
; CHECK-NEXT: AND
679-
; CHECK-NEXT: SWAP4
670+
; CHECK-NEXT: PUSH1 0x10
671+
; CHECK-NEXT: BYTE
672+
; CHECK-NEXT: SWAP3
680673
; CHECK-NEXT: DUP1
681674
; CHECK-NEXT: PUSH1 0x4
682675
; CHECK-NEXT: SHR
683676
; CHECK-NEXT: ADD
684677
; CHECK-NEXT: AND
685678
; CHECK-NEXT: MUL
686-
; CHECK-NEXT: PUSH1 0x78
687-
; CHECK-NEXT: SHR
688-
; CHECK-NEXT: AND
679+
; CHECK-NEXT: PUSH1 0x10
680+
; CHECK-NEXT: BYTE
689681
; CHECK-NEXT: ADD
690682
; CHECK-NEXT: SWAP1
691683
; CHECK-NEXT: JUMP
@@ -698,23 +690,21 @@ define i256 @cttztest(i256 %v) {
698690
; CHECK-LABEL: cttztest:
699691
; CHECK: ; %bb.0:
700692
; CHECK-NEXT: JUMPDEST
701-
; CHECK-NEXT: PUSH1 0xFF
702693
; CHECK-NEXT: PUSH32 0x101010101010101010101010101010101010101010101010101010101010101
703694
; CHECK-NEXT: PUSH16 0xF0F0F0F0F0F0F0F0F0F0F0F0F0F0F0F
704-
; CHECK-NEXT: DUP3
705-
; CHECK-NEXT: DUP3
706-
; CHECK-NEXT: DUP3
695+
; CHECK-NEXT: DUP2
696+
; CHECK-NEXT: DUP2
707697
; CHECK-NEXT: PUSH32 0x3333333333333333333333333333333333333333333333333333333333333333
708-
; CHECK-NEXT: DUP8
698+
; CHECK-NEXT: DUP6
709699
; CHECK-NEXT: PUSH1 0x1
710700
; CHECK-NEXT: PUSH16 0x55555555555555555555555555555555
711-
; CHECK-NEXT: SWAP10
701+
; CHECK-NEXT: SWAP8
712702
; CHECK-NEXT: SUB
713703
; CHECK-NEXT: SWAP1
714704
; CHECK-NEXT: NOT
715705
; CHECK-NEXT: AND
716706
; CHECK-NEXT: PUSH16 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
717-
; CHECK-NEXT: DUP10
707+
; CHECK-NEXT: DUP8
718708
; CHECK-NEXT: DUP3
719709
; CHECK-NEXT: PUSH1 0x81
720710
; CHECK-NEXT: SHR
@@ -732,7 +722,7 @@ define i256 @cttztest(i256 %v) {
732722
; CHECK-NEXT: SWAP2
733723
; CHECK-NEXT: AND
734724
; CHECK-NEXT: ADD
735-
; CHECK-NEXT: SWAP10
725+
; CHECK-NEXT: SWAP8
736726
; CHECK-NEXT: DUP3
737727
; CHECK-NEXT: PUSH1 0x1
738728
; CHECK-NEXT: SHR
@@ -749,26 +739,24 @@ define i256 @cttztest(i256 %v) {
749739
; CHECK-NEXT: SWAP2
750740
; CHECK-NEXT: AND
751741
; CHECK-NEXT: ADD
752-
; CHECK-NEXT: SWAP7
742+
; CHECK-NEXT: SWAP5
753743
; CHECK-NEXT: DUP1
754744
; CHECK-NEXT: PUSH1 0x4
755745
; CHECK-NEXT: SHR
756746
; CHECK-NEXT: ADD
757747
; CHECK-NEXT: AND
758748
; CHECK-NEXT: MUL
759-
; CHECK-NEXT: PUSH1 0x78
760-
; CHECK-NEXT: SHR
761-
; CHECK-NEXT: AND
762-
; CHECK-NEXT: SWAP4
749+
; CHECK-NEXT: PUSH1 0x10
750+
; CHECK-NEXT: BYTE
751+
; CHECK-NEXT: SWAP3
763752
; CHECK-NEXT: DUP1
764753
; CHECK-NEXT: PUSH1 0x4
765754
; CHECK-NEXT: SHR
766755
; CHECK-NEXT: ADD
767756
; CHECK-NEXT: AND
768757
; CHECK-NEXT: MUL
769-
; CHECK-NEXT: PUSH1 0x78
770-
; CHECK-NEXT: SHR
771-
; CHECK-NEXT: AND
758+
; CHECK-NEXT: PUSH1 0x10
759+
; CHECK-NEXT: BYTE
772760
; CHECK-NEXT: ADD
773761
; CHECK-NEXT: SWAP1
774762
; CHECK-NEXT: JUMP
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s | FileCheck %s
3+
target datalayout = "E-p:256:256-i256:256:256-S256-a:256:256"
4+
target triple = "evm"
5+
6+
; Check that we do the following transformation:
7+
; AND (SRL imm, v), 0xFF' -> 'BYTE (31 - imm / 8), v',
8+
; where imm % 8 == 0, and imm / 8 < 32.
9+
10+
define i256 @byte_lshr_0(i256 %0) {
11+
; CHECK-LABEL: byte_lshr_0:
12+
; CHECK: ; %bb.0:
13+
; CHECK-NEXT: JUMPDEST
14+
; CHECK-NEXT: PUSH1 0xFF
15+
; CHECK-NEXT: AND
16+
; CHECK-NEXT: SWAP1
17+
; CHECK-NEXT: JUMP
18+
%r = and i256 %0, 255
19+
ret i256 %r
20+
}
21+
22+
define i256 @byte_lshr_8(i256 %0) {
23+
; CHECK-LABEL: byte_lshr_8:
24+
; CHECK: ; %bb.0:
25+
; CHECK-NEXT: JUMPDEST
26+
; CHECK-NEXT: PUSH1 0x1E
27+
; CHECK-NEXT: BYTE
28+
; CHECK-NEXT: SWAP1
29+
; CHECK-NEXT: JUMP
30+
%s = lshr i256 %0, 8
31+
%r = and i256 %s, 255
32+
ret i256 %r
33+
}
34+
35+
define i256 @byte_lshr_16(i256 %0) {
36+
; CHECK-LABEL: byte_lshr_16:
37+
; CHECK: ; %bb.0:
38+
; CHECK-NEXT: JUMPDEST
39+
; CHECK-NEXT: PUSH1 0x1D
40+
; CHECK-NEXT: BYTE
41+
; CHECK-NEXT: SWAP1
42+
; CHECK-NEXT: JUMP
43+
%s = lshr i256 %0, 16
44+
%r = and i256 %s, 255
45+
ret i256 %r
46+
}
47+
48+
define i256 @byte_lshr_248(i256 %0) {
49+
; CHECK-LABEL: byte_lshr_248:
50+
; CHECK: ; %bb.0:
51+
; CHECK-NEXT: JUMPDEST
52+
; CHECK-NEXT: PUSH0
53+
; CHECK-NEXT: BYTE
54+
; CHECK-NEXT: SWAP1
55+
; CHECK-NEXT: JUMP
56+
%s = lshr i256 %0, 248
57+
%r = and i256 %s, 255
58+
ret i256 %r
59+
}
60+
61+
; Should not apply: imm = 9 (not divisible by 8)
62+
define i256 @shift_not_multiple_of_8(i256 %0) {
63+
; CHECK-LABEL: shift_not_multiple_of_8:
64+
; CHECK: ; %bb.0:
65+
; CHECK-NEXT: JUMPDEST
66+
; CHECK-NEXT: PUSH1 0xFF
67+
; CHECK-NEXT: SWAP1
68+
; CHECK-NEXT: PUSH1 0x9
69+
; CHECK-NEXT: SHR
70+
; CHECK-NEXT: AND
71+
; CHECK-NEXT: SWAP1
72+
; CHECK-NEXT: JUMP
73+
%s = lshr i256 %0, 9
74+
%r = and i256 %s, 255
75+
ret i256 %r
76+
}
77+
78+
; Should not apply: imm = 256 (256 / 8 > 31)
79+
define i256 @shift_too_large(i256 %0) {
80+
; CHECK-LABEL: shift_too_large:
81+
; CHECK: ; %bb.0:
82+
; CHECK-NEXT: JUMPDEST
83+
; CHECK-NEXT: POP
84+
; CHECK-NEXT: PUSH0
85+
; CHECK-NEXT: SWAP1
86+
; CHECK-NEXT: JUMP
87+
%s = lshr i256 %0, 256
88+
%r = and i256 %s, 255
89+
ret i256 %r
90+
}

llvm/test/CodeGen/EVM/zero_any_extload.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@ define i8 @load_anyext_i8(ptr addrspace(1) %ptr) nounwind {
99
; CHECK: ; %bb.0:
1010
; CHECK-NEXT: JUMPDEST
1111
; CHECK-NEXT: MLOAD
12-
; CHECK-NEXT: PUSH1 0xF8
13-
; CHECK-NEXT: SHR
12+
; CHECK-NEXT: PUSH0
13+
; CHECK-NEXT: BYTE
1414
; CHECK-NEXT: SWAP1
1515
; CHECK-NEXT: JUMP
1616

@@ -79,8 +79,8 @@ define i256 @load_zeroext_i8(ptr addrspace(1) %ptr) nounwind {
7979
; CHECK: ; %bb.0:
8080
; CHECK-NEXT: JUMPDEST
8181
; CHECK-NEXT: MLOAD
82-
; CHECK-NEXT: PUSH1 0xF8
83-
; CHECK-NEXT: SHR
82+
; CHECK-NEXT: PUSH0
83+
; CHECK-NEXT: BYTE
8484
; CHECK-NEXT: SWAP1
8585
; CHECK-NEXT: JUMP
8686

0 commit comments

Comments
 (0)