1
- ; RUN: llc -fixup-byte-word-insts=1 < %s | \
2
- ; RUN: FileCheck -check-prefix CHECK -check-prefix BWON %s
3
- ; RUN: llc -fixup-byte-word-insts=0 < %s | \
4
- ; RUN: FileCheck -check-prefix CHECK -check-prefix BWOFF %s
1
+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2
+ ; RUN: llc -fixup-byte-word-insts=1 < %s | FileCheck %s -check-prefix=BWON
3
+ ; RUN: llc -fixup-byte-word-insts=0 < %s | FileCheck %s -check-prefix=BWOFF
5
4
6
5
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
7
6
target triple = "x86_64-apple-macosx10.8.0"
@@ -11,22 +10,40 @@ target triple = "x86_64-apple-macosx10.8.0"
11
10
; This has byte loads interspersed with byte stores, in a single
12
11
; basic-block loop. The upper portion should be dead, so the movb loads
13
12
; should have been changed into movzbl instead.
14
- ; CHECK-LABEL: foo1
15
- ; load:
16
- ; BWON: movzbl
17
- ; BWOFF: movb
18
- ; store:
19
- ; CHECK: movb
20
- ; load:
21
- ; BWON: movzbl
22
- ; BWOFF: movb
23
- ; store:
24
- ; CHECK: movb
25
- ; CHECK: ret
26
- define void @foo1 (i32 %count ,
27
- ptr noalias nocapture %q ,
28
- ptr noalias nocapture %p )
29
- nounwind uwtable noinline ssp {
13
+ define void @foo1 (i32 %count , ptr noalias nocapture %q , ptr noalias nocapture %p ) nounwind uwtable noinline ssp {
14
+ ; BWON-LABEL: foo1:
15
+ ; BWON: ## %bb.0:
16
+ ; BWON-NEXT: testl %edi, %edi
17
+ ; BWON-NEXT: jle LBB0_2
18
+ ; BWON-NEXT: .p2align 4
19
+ ; BWON-NEXT: LBB0_1: ## %a4
20
+ ; BWON-NEXT: ## =>This Inner Loop Header: Depth=1
21
+ ; BWON-NEXT: movzbl (%rsi), %eax
22
+ ; BWON-NEXT: movb %al, (%rdx)
23
+ ; BWON-NEXT: movzbl 1(%rsi), %eax
24
+ ; BWON-NEXT: movb %al, 1(%rdx)
25
+ ; BWON-NEXT: addq $8, %rdx
26
+ ; BWON-NEXT: decl %edi
27
+ ; BWON-NEXT: jne LBB0_1
28
+ ; BWON-NEXT: LBB0_2: ## %._crit_edge
29
+ ; BWON-NEXT: retq
30
+ ;
31
+ ; BWOFF-LABEL: foo1:
32
+ ; BWOFF: ## %bb.0:
33
+ ; BWOFF-NEXT: testl %edi, %edi
34
+ ; BWOFF-NEXT: jle LBB0_2
35
+ ; BWOFF-NEXT: .p2align 4
36
+ ; BWOFF-NEXT: LBB0_1: ## %a4
37
+ ; BWOFF-NEXT: ## =>This Inner Loop Header: Depth=1
38
+ ; BWOFF-NEXT: movb (%rsi), %al
39
+ ; BWOFF-NEXT: movb %al, (%rdx)
40
+ ; BWOFF-NEXT: movb 1(%rsi), %al
41
+ ; BWOFF-NEXT: movb %al, 1(%rdx)
42
+ ; BWOFF-NEXT: addq $8, %rdx
43
+ ; BWOFF-NEXT: decl %edi
44
+ ; BWOFF-NEXT: jne LBB0_1
45
+ ; BWOFF-NEXT: LBB0_2: ## %._crit_edge
46
+ ; BWOFF-NEXT: retq
30
47
%1 = icmp sgt i32 %count , 0
31
48
br i1 %1 , label %.lr.ph , label %._crit_edge
32
49
@@ -56,22 +73,40 @@ a4: ; preds = %3, %.lr.ph
56
73
; This has word loads interspersed with word stores.
57
74
; The upper portion should be dead, so the movw loads should have
58
75
; been changed into movzwl instead.
59
- ; CHECK-LABEL: foo2
60
- ; load:
61
- ; BWON: movzwl
62
- ; BWOFF: movw
63
- ; store:
64
- ; CHECK: movw
65
- ; load:
66
- ; BWON: movzwl
67
- ; BWOFF: movw
68
- ; store:
69
- ; CHECK: movw
70
- ; CHECK: ret
71
- define void @foo2 (i32 %count ,
72
- ptr noalias nocapture %q ,
73
- ptr noalias nocapture %p )
74
- nounwind uwtable noinline ssp {
76
+ define void @foo2 (i32 %count , ptr noalias nocapture %q , ptr noalias nocapture %p ) nounwind uwtable noinline ssp {
77
+ ; BWON-LABEL: foo2:
78
+ ; BWON: ## %bb.0:
79
+ ; BWON-NEXT: testl %edi, %edi
80
+ ; BWON-NEXT: jle LBB1_2
81
+ ; BWON-NEXT: .p2align 4
82
+ ; BWON-NEXT: LBB1_1: ## %a4
83
+ ; BWON-NEXT: ## =>This Inner Loop Header: Depth=1
84
+ ; BWON-NEXT: movzwl (%rsi), %eax
85
+ ; BWON-NEXT: movw %ax, (%rdx)
86
+ ; BWON-NEXT: movzwl 2(%rsi), %eax
87
+ ; BWON-NEXT: movw %ax, 2(%rdx)
88
+ ; BWON-NEXT: addq $16, %rdx
89
+ ; BWON-NEXT: decl %edi
90
+ ; BWON-NEXT: jne LBB1_1
91
+ ; BWON-NEXT: LBB1_2: ## %._crit_edge
92
+ ; BWON-NEXT: retq
93
+ ;
94
+ ; BWOFF-LABEL: foo2:
95
+ ; BWOFF: ## %bb.0:
96
+ ; BWOFF-NEXT: testl %edi, %edi
97
+ ; BWOFF-NEXT: jle LBB1_2
98
+ ; BWOFF-NEXT: .p2align 4
99
+ ; BWOFF-NEXT: LBB1_1: ## %a4
100
+ ; BWOFF-NEXT: ## =>This Inner Loop Header: Depth=1
101
+ ; BWOFF-NEXT: movw (%rsi), %ax
102
+ ; BWOFF-NEXT: movw %ax, (%rdx)
103
+ ; BWOFF-NEXT: movw 2(%rsi), %ax
104
+ ; BWOFF-NEXT: movw %ax, 2(%rdx)
105
+ ; BWOFF-NEXT: addq $16, %rdx
106
+ ; BWOFF-NEXT: decl %edi
107
+ ; BWOFF-NEXT: jne LBB1_1
108
+ ; BWOFF-NEXT: LBB1_2: ## %._crit_edge
109
+ ; BWOFF-NEXT: retq
75
110
%1 = icmp sgt i32 %count , 0
76
111
br i1 %1 , label %.lr.ph , label %._crit_edge
77
112
@@ -98,11 +133,18 @@ a4: ; preds = %3, %.lr.ph
98
133
99
134
; This test contains nothing but a simple byte load and store.
100
135
; movb encodes smaller, but we use movzbl for the load for better perf.
101
- ; CHECK-LABEL: foo3:
102
- ; BWON: movzbl
103
- ; BWOFF: movb
104
- ; CHECK: movb
105
136
define void @foo3 (ptr %dst , ptr %src ) {
137
+ ; BWON-LABEL: foo3:
138
+ ; BWON: ## %bb.0:
139
+ ; BWON-NEXT: movzbl (%rsi), %eax
140
+ ; BWON-NEXT: movb %al, (%rdi)
141
+ ; BWON-NEXT: retq
142
+ ;
143
+ ; BWOFF-LABEL: foo3:
144
+ ; BWOFF: ## %bb.0:
145
+ ; BWOFF-NEXT: movb (%rsi), %al
146
+ ; BWOFF-NEXT: movb %al, (%rdi)
147
+ ; BWOFF-NEXT: retq
106
148
%t0 = load i8 , ptr %src , align 1
107
149
store i8 %t0 , ptr %dst , align 1
108
150
ret void
@@ -111,11 +153,18 @@ define void @foo3(ptr%dst, ptr%src) {
111
153
; This test contains nothing but a simple word load and store. Since
112
154
; movw and movzwl are the same size, we should always choose to use
113
155
; movzwl instead.
114
- ; CHECK-LABEL: foo4:
115
- ; BWON: movzwl
116
- ; BWOFF: movw
117
- ; CHECK: movw
118
156
define void @foo4 (ptr %dst , ptr %src ) {
157
+ ; BWON-LABEL: foo4:
158
+ ; BWON: ## %bb.0:
159
+ ; BWON-NEXT: movzwl (%rsi), %eax
160
+ ; BWON-NEXT: movw %ax, (%rdi)
161
+ ; BWON-NEXT: retq
162
+ ;
163
+ ; BWOFF-LABEL: foo4:
164
+ ; BWOFF: ## %bb.0:
165
+ ; BWOFF-NEXT: movw (%rsi), %ax
166
+ ; BWOFF-NEXT: movw %ax, (%rdi)
167
+ ; BWOFF-NEXT: retq
119
168
%t0 = load i16 , ptr %src , align 2
120
169
store i16 %t0 , ptr %dst , align 2
121
170
ret void
0 commit comments