1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff -vec-extabi -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-64
3 ; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix-xcoff -vec-extabi -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-32
5 define zeroext i8 @test_add1(<16 x i8> %a, i32 signext %index, i8 zeroext %c) {
6 ; CHECK-64-LABEL: test_add1:
7 ; CHECK-64: # %bb.0: # %entry
8 ; CHECK-64-NEXT: clrldi 3, 3, 32
9 ; CHECK-64-NEXT: vextublx 3, 3, 2
10 ; CHECK-64-NEXT: add 3, 3, 4
11 ; CHECK-64-NEXT: clrldi 3, 3, 56
14 ; CHECK-32-LABEL: test_add1:
15 ; CHECK-32: # %bb.0: # %entry
16 ; CHECK-32-NEXT: addi 5, 1, -16
17 ; CHECK-32-NEXT: clrlwi 3, 3, 28
18 ; CHECK-32-NEXT: stxv 34, -16(1)
19 ; CHECK-32-NEXT: lbzx 3, 5, 3
20 ; CHECK-32-NEXT: add 3, 3, 4
21 ; CHECK-32-NEXT: clrlwi 3, 3, 24
24 %vecext = extractelement <16 x i8> %a, i32 %index
25 %conv = zext i8 %vecext to i32
26 %conv1 = zext i8 %c to i32
27 %add = add nuw nsw i32 %conv, %conv1
28 %conv2 = trunc i32 %add to i8
32 define signext i8 @test_add2(<16 x i8> %a, i32 signext %index, i8 signext %c) {
33 ; CHECK-64-LABEL: test_add2:
34 ; CHECK-64: # %bb.0: # %entry
35 ; CHECK-64-NEXT: clrldi 3, 3, 32
36 ; CHECK-64-NEXT: vextublx 3, 3, 2
37 ; CHECK-64-NEXT: add 3, 3, 4
38 ; CHECK-64-NEXT: extsb 3, 3
41 ; CHECK-32-LABEL: test_add2:
42 ; CHECK-32: # %bb.0: # %entry
43 ; CHECK-32-NEXT: addi 5, 1, -16
44 ; CHECK-32-NEXT: clrlwi 3, 3, 28
45 ; CHECK-32-NEXT: stxv 34, -16(1)
46 ; CHECK-32-NEXT: lbzx 3, 5, 3
47 ; CHECK-32-NEXT: add 3, 3, 4
48 ; CHECK-32-NEXT: extsb 3, 3
51 %vecext = extractelement <16 x i8> %a, i32 %index
52 %conv3 = zext i8 %vecext to i32
53 %conv14 = zext i8 %c to i32
54 %add = add nuw nsw i32 %conv3, %conv14
55 %conv2 = trunc i32 %add to i8
59 define zeroext i16 @test_add3(<8 x i16> %a, i32 signext %index, i16 zeroext %c) {
60 ; CHECK-64-LABEL: test_add3:
61 ; CHECK-64: # %bb.0: # %entry
62 ; CHECK-64-NEXT: clrldi 3, 3, 32
63 ; CHECK-64-NEXT: rlwinm 3, 3, 1, 28, 30
64 ; CHECK-64-NEXT: vextuhlx 3, 3, 2
65 ; CHECK-64-NEXT: add 3, 3, 4
66 ; CHECK-64-NEXT: clrldi 3, 3, 48
69 ; CHECK-32-LABEL: test_add3:
70 ; CHECK-32: # %bb.0: # %entry
71 ; CHECK-32-NEXT: addi 5, 1, -16
72 ; CHECK-32-NEXT: rlwinm 3, 3, 1, 28, 30
73 ; CHECK-32-NEXT: stxv 34, -16(1)
74 ; CHECK-32-NEXT: lhzx 3, 5, 3
75 ; CHECK-32-NEXT: add 3, 3, 4
76 ; CHECK-32-NEXT: clrlwi 3, 3, 16
79 %vecext = extractelement <8 x i16> %a, i32 %index
80 %conv = zext i16 %vecext to i32
81 %conv1 = zext i16 %c to i32
82 %add = add nuw nsw i32 %conv, %conv1
83 %conv2 = trunc i32 %add to i16
87 define signext i16 @test_add4(<8 x i16> %a, i32 signext %index, i16 signext %c) {
88 ; CHECK-64-LABEL: test_add4:
89 ; CHECK-64: # %bb.0: # %entry
90 ; CHECK-64-NEXT: clrldi 3, 3, 32
91 ; CHECK-64-NEXT: rlwinm 3, 3, 1, 28, 30
92 ; CHECK-64-NEXT: vextuhlx 3, 3, 2
93 ; CHECK-64-NEXT: add 3, 3, 4
94 ; CHECK-64-NEXT: extsh 3, 3
97 ; CHECK-32-LABEL: test_add4:
98 ; CHECK-32: # %bb.0: # %entry
99 ; CHECK-32-NEXT: addi 5, 1, -16
100 ; CHECK-32-NEXT: rlwinm 3, 3, 1, 28, 30
101 ; CHECK-32-NEXT: stxv 34, -16(1)
102 ; CHECK-32-NEXT: lhzx 3, 5, 3
103 ; CHECK-32-NEXT: add 3, 3, 4
104 ; CHECK-32-NEXT: extsh 3, 3
107 %vecext = extractelement <8 x i16> %a, i32 %index
108 %conv5 = zext i16 %vecext to i32
109 %conv16 = zext i16 %c to i32
110 %add = add nuw nsw i32 %conv5, %conv16
111 %conv2 = trunc i32 %add to i16
115 define zeroext i32 @test_add5(<4 x i32> %a, i32 signext %index, i32 zeroext %c) {
116 ; CHECK-64-LABEL: test_add5:
117 ; CHECK-64: # %bb.0: # %entry
118 ; CHECK-64-NEXT: clrldi 3, 3, 32
119 ; CHECK-64-NEXT: rlwinm 3, 3, 2, 28, 29
120 ; CHECK-64-NEXT: vextuwlx 3, 3, 2
121 ; CHECK-64-NEXT: add 3, 3, 4
122 ; CHECK-64-NEXT: clrldi 3, 3, 32
125 ; CHECK-32-LABEL: test_add5:
126 ; CHECK-32: # %bb.0: # %entry
127 ; CHECK-32-NEXT: addi 5, 1, -16
128 ; CHECK-32-NEXT: rlwinm 3, 3, 2, 28, 29
129 ; CHECK-32-NEXT: stxv 34, -16(1)
130 ; CHECK-32-NEXT: lwzx 3, 5, 3
131 ; CHECK-32-NEXT: add 3, 3, 4
134 %vecext = extractelement <4 x i32> %a, i32 %index
135 %add = add i32 %vecext, %c
139 define signext i32 @test_add6(<4 x i32> %a, i32 signext %index, i32 signext %c) {
140 ; CHECK-64-LABEL: test_add6:
141 ; CHECK-64: # %bb.0: # %entry
142 ; CHECK-64-NEXT: clrldi 3, 3, 32
143 ; CHECK-64-NEXT: rlwinm 3, 3, 2, 28, 29
144 ; CHECK-64-NEXT: vextuwlx 3, 3, 2
145 ; CHECK-64-NEXT: add 3, 3, 4
146 ; CHECK-64-NEXT: extsw 3, 3
149 ; CHECK-32-LABEL: test_add6:
150 ; CHECK-32: # %bb.0: # %entry
151 ; CHECK-32-NEXT: addi 5, 1, -16
152 ; CHECK-32-NEXT: rlwinm 3, 3, 2, 28, 29
153 ; CHECK-32-NEXT: stxv 34, -16(1)
154 ; CHECK-32-NEXT: lwzx 3, 5, 3
155 ; CHECK-32-NEXT: add 3, 3, 4
158 %vecext = extractelement <4 x i32> %a, i32 %index
159 %add = add nsw i32 %vecext, %c
163 ; When extracting word element 2 on LE, it's better to use mfvsrwz rather than vextuwrx
164 define zeroext i32 @test7(<4 x i32> %a) {
165 ; CHECK-64-LABEL: test7:
166 ; CHECK-64: # %bb.0: # %entry
167 ; CHECK-64-NEXT: li 3, 8
168 ; CHECK-64-NEXT: vextuwlx 3, 3, 2
171 ; CHECK-32-LABEL: test7:
172 ; CHECK-32: # %bb.0: # %entry
173 ; CHECK-32-NEXT: stxv 34, -16(1)
174 ; CHECK-32-NEXT: lwz 3, -8(1)
177 %vecext = extractelement <4 x i32> %a, i32 2
181 define zeroext i32 @testadd_7(<4 x i32> %a, i32 zeroext %c) {
182 ; CHECK-64-LABEL: testadd_7:
183 ; CHECK-64: # %bb.0: # %entry
184 ; CHECK-64-NEXT: li 4, 8
185 ; CHECK-64-NEXT: vextuwlx 4, 4, 2
186 ; CHECK-64-NEXT: add 3, 4, 3
187 ; CHECK-64-NEXT: clrldi 3, 3, 32
190 ; CHECK-32-LABEL: testadd_7:
191 ; CHECK-32: # %bb.0: # %entry
192 ; CHECK-32-NEXT: stxv 34, -16(1)
193 ; CHECK-32-NEXT: lwz 4, -8(1)
194 ; CHECK-32-NEXT: add 3, 4, 3
197 %vecext = extractelement <4 x i32> %a, i32 2
198 %add = add i32 %vecext, %c
202 define signext i32 @test8(<4 x i32> %a) {
203 ; CHECK-64-LABEL: test8:
204 ; CHECK-64: # %bb.0: # %entry
205 ; CHECK-64-NEXT: li 3, 8
206 ; CHECK-64-NEXT: vextuwlx 3, 3, 2
207 ; CHECK-64-NEXT: extsw 3, 3
210 ; CHECK-32-LABEL: test8:
211 ; CHECK-32: # %bb.0: # %entry
212 ; CHECK-32-NEXT: stxv 34, -16(1)
213 ; CHECK-32-NEXT: lwz 3, -8(1)
216 %vecext = extractelement <4 x i32> %a, i32 2
220 define signext i32 @testadd_8(<4 x i32> %a, i32 signext %c) {
221 ; CHECK-64-LABEL: testadd_8:
222 ; CHECK-64: # %bb.0: # %entry
223 ; CHECK-64-NEXT: li 4, 8
224 ; CHECK-64-NEXT: vextuwlx 4, 4, 2
225 ; CHECK-64-NEXT: add 3, 4, 3
226 ; CHECK-64-NEXT: extsw 3, 3
229 ; CHECK-32-LABEL: testadd_8:
230 ; CHECK-32: # %bb.0: # %entry
231 ; CHECK-32-NEXT: stxv 34, -16(1)
232 ; CHECK-32-NEXT: lwz 4, -8(1)
233 ; CHECK-32-NEXT: add 3, 4, 3
236 %vecext = extractelement <4 x i32> %a, i32 2
237 %add = add nsw i32 %vecext, %c
241 ; When extracting word element 1 on BE, it's better to use mfvsrwz rather than vextuwlx
242 define signext i32 @test9(<4 x i32> %a) {
243 ; CHECK-64-LABEL: test9:
244 ; CHECK-64: # %bb.0: # %entry
245 ; CHECK-64-NEXT: mfvsrwz 3, 34
246 ; CHECK-64-NEXT: extsw 3, 3
249 ; CHECK-32-LABEL: test9:
250 ; CHECK-32: # %bb.0: # %entry
251 ; CHECK-32-NEXT: stxv 34, -16(1)
252 ; CHECK-32-NEXT: lwz 3, -12(1)
255 %vecext = extractelement <4 x i32> %a, i32 1
259 define signext i32 @testadd_9(<4 x i32> %a, i32 signext %c) {
260 ; CHECK-64-LABEL: testadd_9:
261 ; CHECK-64: # %bb.0: # %entry
262 ; CHECK-64-NEXT: mfvsrwz 4, 34
263 ; CHECK-64-NEXT: add 3, 4, 3
264 ; CHECK-64-NEXT: extsw 3, 3
267 ; CHECK-32-LABEL: testadd_9:
268 ; CHECK-32: # %bb.0: # %entry
269 ; CHECK-32-NEXT: stxv 34, -16(1)
270 ; CHECK-32-NEXT: lwz 4, -12(1)
271 ; CHECK-32-NEXT: add 3, 4, 3
274 %vecext = extractelement <4 x i32> %a, i32 1
275 %add = add nsw i32 %vecext, %c