1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: opt -mtriple=powerpc-unknown-linux-gnu < %s -instcombine | \
3 ; RUN: llc -mtriple=ppc32-- -mcpu=g5 | not grep vperm
4 ; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- -mcpu=g5 | FileCheck %s
6 define void @VSLDOI_xy(<8 x i16>* %A, <8 x i16>* %B) {
7 ; CHECK-LABEL: VSLDOI_xy:
8 ; CHECK: # %bb.0: # %entry
9 ; CHECK-NEXT: lvx 2, 0, 3
10 ; CHECK-NEXT: lvx 3, 0, 4
11 ; CHECK-NEXT: vsldoi 2, 2, 3, 5
12 ; CHECK-NEXT: stvx 2, 0, 3
15 %tmp = load <8 x i16>, <8 x i16>* %A ; <<8 x i16>> [#uses=1]
16 %tmp2 = load <8 x i16>, <8 x i16>* %B ; <<8 x i16>> [#uses=1]
17 %tmp.upgrd.1 = bitcast <8 x i16> %tmp to <16 x i8> ; <<16 x i8>> [#uses=11]
18 %tmp2.upgrd.2 = bitcast <8 x i16> %tmp2 to <16 x i8> ; <<16 x i8>> [#uses=5]
19 %tmp.upgrd.3 = extractelement <16 x i8> %tmp.upgrd.1, i32 5 ; <i8> [#uses=1]
20 %tmp3 = extractelement <16 x i8> %tmp.upgrd.1, i32 6 ; <i8> [#uses=1]
21 %tmp4 = extractelement <16 x i8> %tmp.upgrd.1, i32 7 ; <i8> [#uses=1]
22 %tmp5 = extractelement <16 x i8> %tmp.upgrd.1, i32 8 ; <i8> [#uses=1]
23 %tmp6 = extractelement <16 x i8> %tmp.upgrd.1, i32 9 ; <i8> [#uses=1]
24 %tmp7 = extractelement <16 x i8> %tmp.upgrd.1, i32 10 ; <i8> [#uses=1]
25 %tmp8 = extractelement <16 x i8> %tmp.upgrd.1, i32 11 ; <i8> [#uses=1]
26 %tmp9 = extractelement <16 x i8> %tmp.upgrd.1, i32 12 ; <i8> [#uses=1]
27 %tmp10 = extractelement <16 x i8> %tmp.upgrd.1, i32 13 ; <i8> [#uses=1]
28 %tmp11 = extractelement <16 x i8> %tmp.upgrd.1, i32 14 ; <i8> [#uses=1]
29 %tmp12 = extractelement <16 x i8> %tmp.upgrd.1, i32 15 ; <i8> [#uses=1]
30 %tmp13 = extractelement <16 x i8> %tmp2.upgrd.2, i32 0 ; <i8> [#uses=1]
31 %tmp14 = extractelement <16 x i8> %tmp2.upgrd.2, i32 1 ; <i8> [#uses=1]
32 %tmp15 = extractelement <16 x i8> %tmp2.upgrd.2, i32 2 ; <i8> [#uses=1]
33 %tmp16 = extractelement <16 x i8> %tmp2.upgrd.2, i32 3 ; <i8> [#uses=1]
34 %tmp17 = extractelement <16 x i8> %tmp2.upgrd.2, i32 4 ; <i8> [#uses=1]
35 %tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.3, i32 0 ; <<16 x i8>> [#uses=1]
36 %tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1 ; <<16 x i8>> [#uses=1]
37 %tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2 ; <<16 x i8>> [#uses=1]
38 %tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3 ; <<16 x i8>> [#uses=1]
39 %tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4 ; <<16 x i8>> [#uses=1]
40 %tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5 ; <<16 x i8>> [#uses=1]
41 %tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6 ; <<16 x i8>> [#uses=1]
42 %tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7 ; <<16 x i8>> [#uses=1]
43 %tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8 ; <<16 x i8>> [#uses=1]
44 %tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9 ; <<16 x i8>> [#uses=1]
45 %tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10 ; <<16 x i8>> [#uses=1]
46 %tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11 ; <<16 x i8>> [#uses=1]
47 %tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12 ; <<16 x i8>> [#uses=1]
48 %tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13 ; <<16 x i8>> [#uses=1]
49 %tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14 ; <<16 x i8>> [#uses=1]
50 %tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15 ; <<16 x i8>> [#uses=1]
51 %tmp33.upgrd.4 = bitcast <16 x i8> %tmp33 to <8 x i16> ; <<8 x i16>> [#uses=1]
52 store <8 x i16> %tmp33.upgrd.4, <8 x i16>* %A
56 define void @VSLDOI_xx(<8 x i16>* %A, <8 x i16>* %B) {
57 ; CHECK-LABEL: VSLDOI_xx:
59 ; CHECK-NEXT: lvx 2, 0, 3
60 ; CHECK-NEXT: vsldoi 2, 2, 2, 5
61 ; CHECK-NEXT: stvx 2, 0, 3
63 %tmp = load <8 x i16>, <8 x i16>* %A ; <<8 x i16>> [#uses=1]
64 %tmp2 = load <8 x i16>, <8 x i16>* %A ; <<8 x i16>> [#uses=1]
65 %tmp.upgrd.5 = bitcast <8 x i16> %tmp to <16 x i8> ; <<16 x i8>> [#uses=11]
66 %tmp2.upgrd.6 = bitcast <8 x i16> %tmp2 to <16 x i8> ; <<16 x i8>> [#uses=5]
67 %tmp.upgrd.7 = extractelement <16 x i8> %tmp.upgrd.5, i32 5 ; <i8> [#uses=1]
68 %tmp3 = extractelement <16 x i8> %tmp.upgrd.5, i32 6 ; <i8> [#uses=1]
69 %tmp4 = extractelement <16 x i8> %tmp.upgrd.5, i32 7 ; <i8> [#uses=1]
70 %tmp5 = extractelement <16 x i8> %tmp.upgrd.5, i32 8 ; <i8> [#uses=1]
71 %tmp6 = extractelement <16 x i8> %tmp.upgrd.5, i32 9 ; <i8> [#uses=1]
72 %tmp7 = extractelement <16 x i8> %tmp.upgrd.5, i32 10 ; <i8> [#uses=1]
73 %tmp8 = extractelement <16 x i8> %tmp.upgrd.5, i32 11 ; <i8> [#uses=1]
74 %tmp9 = extractelement <16 x i8> %tmp.upgrd.5, i32 12 ; <i8> [#uses=1]
75 %tmp10 = extractelement <16 x i8> %tmp.upgrd.5, i32 13 ; <i8> [#uses=1]
76 %tmp11 = extractelement <16 x i8> %tmp.upgrd.5, i32 14 ; <i8> [#uses=1]
77 %tmp12 = extractelement <16 x i8> %tmp.upgrd.5, i32 15 ; <i8> [#uses=1]
78 %tmp13 = extractelement <16 x i8> %tmp2.upgrd.6, i32 0 ; <i8> [#uses=1]
79 %tmp14 = extractelement <16 x i8> %tmp2.upgrd.6, i32 1 ; <i8> [#uses=1]
80 %tmp15 = extractelement <16 x i8> %tmp2.upgrd.6, i32 2 ; <i8> [#uses=1]
81 %tmp16 = extractelement <16 x i8> %tmp2.upgrd.6, i32 3 ; <i8> [#uses=1]
82 %tmp17 = extractelement <16 x i8> %tmp2.upgrd.6, i32 4 ; <i8> [#uses=1]
83 %tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.7, i32 0 ; <<16 x i8>> [#uses=1]
84 %tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1 ; <<16 x i8>> [#uses=1]
85 %tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2 ; <<16 x i8>> [#uses=1]
86 %tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3 ; <<16 x i8>> [#uses=1]
87 %tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4 ; <<16 x i8>> [#uses=1]
88 %tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5 ; <<16 x i8>> [#uses=1]
89 %tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6 ; <<16 x i8>> [#uses=1]
90 %tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7 ; <<16 x i8>> [#uses=1]
91 %tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8 ; <<16 x i8>> [#uses=1]
92 %tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9 ; <<16 x i8>> [#uses=1]
93 %tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10 ; <<16 x i8>> [#uses=1]
94 %tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11 ; <<16 x i8>> [#uses=1]
95 %tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12 ; <<16 x i8>> [#uses=1]
96 %tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13 ; <<16 x i8>> [#uses=1]
97 %tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14 ; <<16 x i8>> [#uses=1]
98 %tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15 ; <<16 x i8>> [#uses=1]
99 %tmp33.upgrd.8 = bitcast <16 x i8> %tmp33 to <8 x i16> ; <<8 x i16>> [#uses=1]
100 store <8 x i16> %tmp33.upgrd.8, <8 x i16>* %A
104 define void @VPERM_promote(<8 x i16>* %A, <8 x i16>* %B) {
105 ; CHECK-LABEL: VPERM_promote:
106 ; CHECK: # %bb.0: # %entry
107 ; CHECK-NEXT: lvx 2, 0, 3
108 ; CHECK-NEXT: vspltisb 4, 14
109 ; CHECK-NEXT: lvx 3, 0, 4
110 ; CHECK-NEXT: vperm 2, 2, 3, 4
111 ; CHECK-NEXT: stvx 2, 0, 3
114 %tmp = load <8 x i16>, <8 x i16>* %A ; <<8 x i16>> [#uses=1]
115 %tmp.upgrd.9 = bitcast <8 x i16> %tmp to <4 x i32> ; <<4 x i32>> [#uses=1]
116 %tmp2 = load <8 x i16>, <8 x i16>* %B ; <<8 x i16>> [#uses=1]
117 %tmp2.upgrd.10 = bitcast <8 x i16> %tmp2 to <4 x i32> ; <<4 x i32>> [#uses=1]
118 %tmp3 = call <4 x i32> @llvm.ppc.altivec.vperm( <4 x i32> %tmp.upgrd.9, <4 x i32> %tmp2.upgrd.10, <16 x i8> < i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14 > ) ; <<4 x i32>> [#uses=1]
119 %tmp3.upgrd.11 = bitcast <4 x i32> %tmp3 to <8 x i16> ; <<8 x i16>> [#uses=1]
120 store <8 x i16> %tmp3.upgrd.11, <8 x i16>* %A
124 declare <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32>, <4 x i32>, <16 x i8>)
126 define void @tb_l(<16 x i8>* %A, <16 x i8>* %B) {
128 ; CHECK: # %bb.0: # %entry
129 ; CHECK-NEXT: lvx 2, 0, 3
130 ; CHECK-NEXT: lvx 3, 0, 4
131 ; CHECK-NEXT: vmrglb 2, 2, 3
132 ; CHECK-NEXT: stvx 2, 0, 3
135 %tmp = load <16 x i8>, <16 x i8>* %A ; <<16 x i8>> [#uses=8]
136 %tmp2 = load <16 x i8>, <16 x i8>* %B ; <<16 x i8>> [#uses=8]
137 %tmp.upgrd.12 = extractelement <16 x i8> %tmp, i32 8 ; <i8> [#uses=1]
138 %tmp3 = extractelement <16 x i8> %tmp2, i32 8 ; <i8> [#uses=1]
139 %tmp4 = extractelement <16 x i8> %tmp, i32 9 ; <i8> [#uses=1]
140 %tmp5 = extractelement <16 x i8> %tmp2, i32 9 ; <i8> [#uses=1]
141 %tmp6 = extractelement <16 x i8> %tmp, i32 10 ; <i8> [#uses=1]
142 %tmp7 = extractelement <16 x i8> %tmp2, i32 10 ; <i8> [#uses=1]
143 %tmp8 = extractelement <16 x i8> %tmp, i32 11 ; <i8> [#uses=1]
144 %tmp9 = extractelement <16 x i8> %tmp2, i32 11 ; <i8> [#uses=1]
145 %tmp10 = extractelement <16 x i8> %tmp, i32 12 ; <i8> [#uses=1]
146 %tmp11 = extractelement <16 x i8> %tmp2, i32 12 ; <i8> [#uses=1]
147 %tmp12 = extractelement <16 x i8> %tmp, i32 13 ; <i8> [#uses=1]
148 %tmp13 = extractelement <16 x i8> %tmp2, i32 13 ; <i8> [#uses=1]
149 %tmp14 = extractelement <16 x i8> %tmp, i32 14 ; <i8> [#uses=1]
150 %tmp15 = extractelement <16 x i8> %tmp2, i32 14 ; <i8> [#uses=1]
151 %tmp16 = extractelement <16 x i8> %tmp, i32 15 ; <i8> [#uses=1]
152 %tmp17 = extractelement <16 x i8> %tmp2, i32 15 ; <i8> [#uses=1]
153 %tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.12, i32 0 ; <<16 x i8>> [#uses=1]
154 %tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1 ; <<16 x i8>> [#uses=1]
155 %tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2 ; <<16 x i8>> [#uses=1]
156 %tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3 ; <<16 x i8>> [#uses=1]
157 %tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4 ; <<16 x i8>> [#uses=1]
158 %tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5 ; <<16 x i8>> [#uses=1]
159 %tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6 ; <<16 x i8>> [#uses=1]
160 %tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7 ; <<16 x i8>> [#uses=1]
161 %tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8 ; <<16 x i8>> [#uses=1]
162 %tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9 ; <<16 x i8>> [#uses=1]
163 %tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10 ; <<16 x i8>> [#uses=1]
164 %tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11 ; <<16 x i8>> [#uses=1]
165 %tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12 ; <<16 x i8>> [#uses=1]
166 %tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13 ; <<16 x i8>> [#uses=1]
167 %tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14 ; <<16 x i8>> [#uses=1]
168 %tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15 ; <<16 x i8>> [#uses=1]
169 store <16 x i8> %tmp33, <16 x i8>* %A
173 define void @th_l(<8 x i16>* %A, <8 x i16>* %B) {
175 ; CHECK: # %bb.0: # %entry
176 ; CHECK-NEXT: lvx 2, 0, 3
177 ; CHECK-NEXT: lvx 3, 0, 4
178 ; CHECK-NEXT: vmrglh 2, 2, 3
179 ; CHECK-NEXT: stvx 2, 0, 3
182 %tmp = load <8 x i16>, <8 x i16>* %A ; <<8 x i16>> [#uses=4]
183 %tmp2 = load <8 x i16>, <8 x i16>* %B ; <<8 x i16>> [#uses=4]
184 %tmp.upgrd.13 = extractelement <8 x i16> %tmp, i32 4 ; <i16> [#uses=1]
185 %tmp3 = extractelement <8 x i16> %tmp2, i32 4 ; <i16> [#uses=1]
186 %tmp4 = extractelement <8 x i16> %tmp, i32 5 ; <i16> [#uses=1]
187 %tmp5 = extractelement <8 x i16> %tmp2, i32 5 ; <i16> [#uses=1]
188 %tmp6 = extractelement <8 x i16> %tmp, i32 6 ; <i16> [#uses=1]
189 %tmp7 = extractelement <8 x i16> %tmp2, i32 6 ; <i16> [#uses=1]
190 %tmp8 = extractelement <8 x i16> %tmp, i32 7 ; <i16> [#uses=1]
191 %tmp9 = extractelement <8 x i16> %tmp2, i32 7 ; <i16> [#uses=1]
192 %tmp10 = insertelement <8 x i16> undef, i16 %tmp.upgrd.13, i32 0 ; <<8 x i16>> [#uses=1]
193 %tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 1 ; <<8 x i16>> [#uses=1]
194 %tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 2 ; <<8 x i16>> [#uses=1]
195 %tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 3 ; <<8 x i16>> [#uses=1]
196 %tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 4 ; <<8 x i16>> [#uses=1]
197 %tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 5 ; <<8 x i16>> [#uses=1]
198 %tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 6 ; <<8 x i16>> [#uses=1]
199 %tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 7 ; <<8 x i16>> [#uses=1]
200 store <8 x i16> %tmp17, <8 x i16>* %A
204 define void @tw_l(<4 x i32>* %A, <4 x i32>* %B) {
206 ; CHECK: # %bb.0: # %entry
207 ; CHECK-NEXT: lvx 2, 0, 3
208 ; CHECK-NEXT: lvx 3, 0, 4
209 ; CHECK-NEXT: vmrglw 2, 2, 3
210 ; CHECK-NEXT: stvx 2, 0, 3
213 %tmp = load <4 x i32>, <4 x i32>* %A ; <<4 x i32>> [#uses=2]
214 %tmp2 = load <4 x i32>, <4 x i32>* %B ; <<4 x i32>> [#uses=2]
215 %tmp.upgrd.14 = extractelement <4 x i32> %tmp, i32 2 ; <i32> [#uses=1]
216 %tmp3 = extractelement <4 x i32> %tmp2, i32 2 ; <i32> [#uses=1]
217 %tmp4 = extractelement <4 x i32> %tmp, i32 3 ; <i32> [#uses=1]
218 %tmp5 = extractelement <4 x i32> %tmp2, i32 3 ; <i32> [#uses=1]
219 %tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.14, i32 0 ; <<4 x i32>> [#uses=1]
220 %tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1]
221 %tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2 ; <<4 x i32>> [#uses=1]
222 %tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3 ; <<4 x i32>> [#uses=1]
223 store <4 x i32> %tmp9, <4 x i32>* %A
227 define void @tb_h(<16 x i8>* %A, <16 x i8>* %B) {
229 ; CHECK: # %bb.0: # %entry
230 ; CHECK-NEXT: lvx 2, 0, 3
231 ; CHECK-NEXT: lvx 3, 0, 4
232 ; CHECK-NEXT: vmrghb 2, 2, 3
233 ; CHECK-NEXT: stvx 2, 0, 3
236 %tmp = load <16 x i8>, <16 x i8>* %A ; <<16 x i8>> [#uses=8]
237 %tmp2 = load <16 x i8>, <16 x i8>* %B ; <<16 x i8>> [#uses=8]
238 %tmp.upgrd.15 = extractelement <16 x i8> %tmp, i32 0 ; <i8> [#uses=1]
239 %tmp3 = extractelement <16 x i8> %tmp2, i32 0 ; <i8> [#uses=1]
240 %tmp4 = extractelement <16 x i8> %tmp, i32 1 ; <i8> [#uses=1]
241 %tmp5 = extractelement <16 x i8> %tmp2, i32 1 ; <i8> [#uses=1]
242 %tmp6 = extractelement <16 x i8> %tmp, i32 2 ; <i8> [#uses=1]
243 %tmp7 = extractelement <16 x i8> %tmp2, i32 2 ; <i8> [#uses=1]
244 %tmp8 = extractelement <16 x i8> %tmp, i32 3 ; <i8> [#uses=1]
245 %tmp9 = extractelement <16 x i8> %tmp2, i32 3 ; <i8> [#uses=1]
246 %tmp10 = extractelement <16 x i8> %tmp, i32 4 ; <i8> [#uses=1]
247 %tmp11 = extractelement <16 x i8> %tmp2, i32 4 ; <i8> [#uses=1]
248 %tmp12 = extractelement <16 x i8> %tmp, i32 5 ; <i8> [#uses=1]
249 %tmp13 = extractelement <16 x i8> %tmp2, i32 5 ; <i8> [#uses=1]
250 %tmp14 = extractelement <16 x i8> %tmp, i32 6 ; <i8> [#uses=1]
251 %tmp15 = extractelement <16 x i8> %tmp2, i32 6 ; <i8> [#uses=1]
252 %tmp16 = extractelement <16 x i8> %tmp, i32 7 ; <i8> [#uses=1]
253 %tmp17 = extractelement <16 x i8> %tmp2, i32 7 ; <i8> [#uses=1]
254 %tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.15, i32 0 ; <<16 x i8>> [#uses=1]
255 %tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1 ; <<16 x i8>> [#uses=1]
256 %tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2 ; <<16 x i8>> [#uses=1]
257 %tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3 ; <<16 x i8>> [#uses=1]
258 %tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4 ; <<16 x i8>> [#uses=1]
259 %tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5 ; <<16 x i8>> [#uses=1]
260 %tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6 ; <<16 x i8>> [#uses=1]
261 %tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7 ; <<16 x i8>> [#uses=1]
262 %tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8 ; <<16 x i8>> [#uses=1]
263 %tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9 ; <<16 x i8>> [#uses=1]
264 %tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10 ; <<16 x i8>> [#uses=1]
265 %tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11 ; <<16 x i8>> [#uses=1]
266 %tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12 ; <<16 x i8>> [#uses=1]
267 %tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13 ; <<16 x i8>> [#uses=1]
268 %tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14 ; <<16 x i8>> [#uses=1]
269 %tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15 ; <<16 x i8>> [#uses=1]
270 store <16 x i8> %tmp33, <16 x i8>* %A
274 define void @th_h(<8 x i16>* %A, <8 x i16>* %B) {
276 ; CHECK: # %bb.0: # %entry
277 ; CHECK-NEXT: lvx 2, 0, 3
278 ; CHECK-NEXT: lvx 3, 0, 4
279 ; CHECK-NEXT: vmrghh 2, 2, 3
280 ; CHECK-NEXT: stvx 2, 0, 3
283 %tmp = load <8 x i16>, <8 x i16>* %A ; <<8 x i16>> [#uses=4]
284 %tmp2 = load <8 x i16>, <8 x i16>* %B ; <<8 x i16>> [#uses=4]
285 %tmp.upgrd.16 = extractelement <8 x i16> %tmp, i32 0 ; <i16> [#uses=1]
286 %tmp3 = extractelement <8 x i16> %tmp2, i32 0 ; <i16> [#uses=1]
287 %tmp4 = extractelement <8 x i16> %tmp, i32 1 ; <i16> [#uses=1]
288 %tmp5 = extractelement <8 x i16> %tmp2, i32 1 ; <i16> [#uses=1]
289 %tmp6 = extractelement <8 x i16> %tmp, i32 2 ; <i16> [#uses=1]
290 %tmp7 = extractelement <8 x i16> %tmp2, i32 2 ; <i16> [#uses=1]
291 %tmp8 = extractelement <8 x i16> %tmp, i32 3 ; <i16> [#uses=1]
292 %tmp9 = extractelement <8 x i16> %tmp2, i32 3 ; <i16> [#uses=1]
293 %tmp10 = insertelement <8 x i16> undef, i16 %tmp.upgrd.16, i32 0 ; <<8 x i16>> [#uses=1]
294 %tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 1 ; <<8 x i16>> [#uses=1]
295 %tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 2 ; <<8 x i16>> [#uses=1]
296 %tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 3 ; <<8 x i16>> [#uses=1]
297 %tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 4 ; <<8 x i16>> [#uses=1]
298 %tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 5 ; <<8 x i16>> [#uses=1]
299 %tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 6 ; <<8 x i16>> [#uses=1]
300 %tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 7 ; <<8 x i16>> [#uses=1]
301 store <8 x i16> %tmp17, <8 x i16>* %A
305 define void @tw_h(<4 x i32>* %A, <4 x i32>* %B) {
307 ; CHECK: # %bb.0: # %entry
308 ; CHECK-NEXT: lvx 2, 0, 3
309 ; CHECK-NEXT: lvx 3, 0, 4
310 ; CHECK-NEXT: vmrghw 2, 3, 2
311 ; CHECK-NEXT: stvx 2, 0, 3
314 %tmp = load <4 x i32>, <4 x i32>* %A ; <<4 x i32>> [#uses=2]
315 %tmp2 = load <4 x i32>, <4 x i32>* %B ; <<4 x i32>> [#uses=2]
316 %tmp.upgrd.17 = extractelement <4 x i32> %tmp2, i32 0 ; <i32> [#uses=1]
317 %tmp3 = extractelement <4 x i32> %tmp, i32 0 ; <i32> [#uses=1]
318 %tmp4 = extractelement <4 x i32> %tmp2, i32 1 ; <i32> [#uses=1]
319 %tmp5 = extractelement <4 x i32> %tmp, i32 1 ; <i32> [#uses=1]
320 %tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.17, i32 0 ; <<4 x i32>> [#uses=1]
321 %tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1]
322 %tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2 ; <<4 x i32>> [#uses=1]
323 %tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3 ; <<4 x i32>> [#uses=1]
324 store <4 x i32> %tmp9, <4 x i32>* %A
328 define void @tw_h_flop(<4 x i32>* %A, <4 x i32>* %B) {
329 ; CHECK-LABEL: tw_h_flop:
331 ; CHECK-NEXT: lvx 2, 0, 3
332 ; CHECK-NEXT: lvx 3, 0, 4
333 ; CHECK-NEXT: vmrghw 2, 2, 3
334 ; CHECK-NEXT: stvx 2, 0, 3
336 %tmp = load <4 x i32>, <4 x i32>* %A ; <<4 x i32>> [#uses=2]
337 %tmp2 = load <4 x i32>, <4 x i32>* %B ; <<4 x i32>> [#uses=2]
338 %tmp.upgrd.18 = extractelement <4 x i32> %tmp, i32 0 ; <i32> [#uses=1]
339 %tmp3 = extractelement <4 x i32> %tmp2, i32 0 ; <i32> [#uses=1]
340 %tmp4 = extractelement <4 x i32> %tmp, i32 1 ; <i32> [#uses=1]
341 %tmp5 = extractelement <4 x i32> %tmp2, i32 1 ; <i32> [#uses=1]
342 %tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.18, i32 0 ; <<4 x i32>> [#uses=1]
343 %tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1]
344 %tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2 ; <<4 x i32>> [#uses=1]
345 %tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3 ; <<4 x i32>> [#uses=1]
346 store <4 x i32> %tmp9, <4 x i32>* %A
350 define void @VMRG_UNARY_tb_l(<16 x i8>* %A, <16 x i8>* %B) {
351 ; CHECK-LABEL: VMRG_UNARY_tb_l:
352 ; CHECK: # %bb.0: # %entry
353 ; CHECK-NEXT: lvx 2, 0, 3
354 ; CHECK-NEXT: vmrglb 2, 2, 2
355 ; CHECK-NEXT: stvx 2, 0, 3
358 %tmp = load <16 x i8>, <16 x i8>* %A ; <<16 x i8>> [#uses=16]
359 %tmp.upgrd.19 = extractelement <16 x i8> %tmp, i32 8 ; <i8> [#uses=1]
360 %tmp3 = extractelement <16 x i8> %tmp, i32 8 ; <i8> [#uses=1]
361 %tmp4 = extractelement <16 x i8> %tmp, i32 9 ; <i8> [#uses=1]
362 %tmp5 = extractelement <16 x i8> %tmp, i32 9 ; <i8> [#uses=1]
363 %tmp6 = extractelement <16 x i8> %tmp, i32 10 ; <i8> [#uses=1]
364 %tmp7 = extractelement <16 x i8> %tmp, i32 10 ; <i8> [#uses=1]
365 %tmp8 = extractelement <16 x i8> %tmp, i32 11 ; <i8> [#uses=1]
366 %tmp9 = extractelement <16 x i8> %tmp, i32 11 ; <i8> [#uses=1]
367 %tmp10 = extractelement <16 x i8> %tmp, i32 12 ; <i8> [#uses=1]
368 %tmp11 = extractelement <16 x i8> %tmp, i32 12 ; <i8> [#uses=1]
369 %tmp12 = extractelement <16 x i8> %tmp, i32 13 ; <i8> [#uses=1]
370 %tmp13 = extractelement <16 x i8> %tmp, i32 13 ; <i8> [#uses=1]
371 %tmp14 = extractelement <16 x i8> %tmp, i32 14 ; <i8> [#uses=1]
372 %tmp15 = extractelement <16 x i8> %tmp, i32 14 ; <i8> [#uses=1]
373 %tmp16 = extractelement <16 x i8> %tmp, i32 15 ; <i8> [#uses=1]
374 %tmp17 = extractelement <16 x i8> %tmp, i32 15 ; <i8> [#uses=1]
375 %tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.19, i32 0 ; <<16 x i8>> [#uses=1]
376 %tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1 ; <<16 x i8>> [#uses=1]
377 %tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2 ; <<16 x i8>> [#uses=1]
378 %tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3 ; <<16 x i8>> [#uses=1]
379 %tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4 ; <<16 x i8>> [#uses=1]
380 %tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5 ; <<16 x i8>> [#uses=1]
381 %tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6 ; <<16 x i8>> [#uses=1]
382 %tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7 ; <<16 x i8>> [#uses=1]
383 %tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8 ; <<16 x i8>> [#uses=1]
384 %tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9 ; <<16 x i8>> [#uses=1]
385 %tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10 ; <<16 x i8>> [#uses=1]
386 %tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11 ; <<16 x i8>> [#uses=1]
387 %tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12 ; <<16 x i8>> [#uses=1]
388 %tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13 ; <<16 x i8>> [#uses=1]
389 %tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14 ; <<16 x i8>> [#uses=1]
390 %tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15 ; <<16 x i8>> [#uses=1]
391 store <16 x i8> %tmp33, <16 x i8>* %A
395 define void @VMRG_UNARY_th_l(<8 x i16>* %A, <8 x i16>* %B) {
396 ; CHECK-LABEL: VMRG_UNARY_th_l:
397 ; CHECK: # %bb.0: # %entry
398 ; CHECK-NEXT: lvx 2, 0, 3
399 ; CHECK-NEXT: vmrglh 2, 2, 2
400 ; CHECK-NEXT: stvx 2, 0, 3
403 %tmp = load <8 x i16>, <8 x i16>* %A ; <<8 x i16>> [#uses=8]
404 %tmp.upgrd.20 = extractelement <8 x i16> %tmp, i32 4 ; <i16> [#uses=1]
405 %tmp3 = extractelement <8 x i16> %tmp, i32 4 ; <i16> [#uses=1]
406 %tmp4 = extractelement <8 x i16> %tmp, i32 5 ; <i16> [#uses=1]
407 %tmp5 = extractelement <8 x i16> %tmp, i32 5 ; <i16> [#uses=1]
408 %tmp6 = extractelement <8 x i16> %tmp, i32 6 ; <i16> [#uses=1]
409 %tmp7 = extractelement <8 x i16> %tmp, i32 6 ; <i16> [#uses=1]
410 %tmp8 = extractelement <8 x i16> %tmp, i32 7 ; <i16> [#uses=1]
411 %tmp9 = extractelement <8 x i16> %tmp, i32 7 ; <i16> [#uses=1]
412 %tmp10 = insertelement <8 x i16> undef, i16 %tmp.upgrd.20, i32 0 ; <<8 x i16>> [#uses=1]
413 %tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 1 ; <<8 x i16>> [#uses=1]
414 %tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 2 ; <<8 x i16>> [#uses=1]
415 %tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 3 ; <<8 x i16>> [#uses=1]
416 %tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 4 ; <<8 x i16>> [#uses=1]
417 %tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 5 ; <<8 x i16>> [#uses=1]
418 %tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 6 ; <<8 x i16>> [#uses=1]
419 %tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 7 ; <<8 x i16>> [#uses=1]
420 store <8 x i16> %tmp17, <8 x i16>* %A
424 define void @VMRG_UNARY_tw_l(<4 x i32>* %A, <4 x i32>* %B) {
425 ; CHECK-LABEL: VMRG_UNARY_tw_l:
426 ; CHECK: # %bb.0: # %entry
427 ; CHECK-NEXT: lvx 2, 0, 3
428 ; CHECK-NEXT: vmrglw 2, 2, 2
429 ; CHECK-NEXT: stvx 2, 0, 3
432 %tmp = load <4 x i32>, <4 x i32>* %A ; <<4 x i32>> [#uses=4]
433 %tmp.upgrd.21 = extractelement <4 x i32> %tmp, i32 2 ; <i32> [#uses=1]
434 %tmp3 = extractelement <4 x i32> %tmp, i32 2 ; <i32> [#uses=1]
435 %tmp4 = extractelement <4 x i32> %tmp, i32 3 ; <i32> [#uses=1]
436 %tmp5 = extractelement <4 x i32> %tmp, i32 3 ; <i32> [#uses=1]
437 %tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.21, i32 0 ; <<4 x i32>> [#uses=1]
438 %tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1]
439 %tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2 ; <<4 x i32>> [#uses=1]
440 %tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3 ; <<4 x i32>> [#uses=1]
441 store <4 x i32> %tmp9, <4 x i32>* %A
445 define void @VMRG_UNARY_tb_h(<16 x i8>* %A, <16 x i8>* %B) {
446 ; CHECK-LABEL: VMRG_UNARY_tb_h:
447 ; CHECK: # %bb.0: # %entry
448 ; CHECK-NEXT: lvx 2, 0, 3
449 ; CHECK-NEXT: vmrghb 2, 2, 2
450 ; CHECK-NEXT: stvx 2, 0, 3
453 %tmp = load <16 x i8>, <16 x i8>* %A ; <<16 x i8>> [#uses=16]
454 %tmp.upgrd.22 = extractelement <16 x i8> %tmp, i32 0 ; <i8> [#uses=1]
455 %tmp3 = extractelement <16 x i8> %tmp, i32 0 ; <i8> [#uses=1]
456 %tmp4 = extractelement <16 x i8> %tmp, i32 1 ; <i8> [#uses=1]
457 %tmp5 = extractelement <16 x i8> %tmp, i32 1 ; <i8> [#uses=1]
458 %tmp6 = extractelement <16 x i8> %tmp, i32 2 ; <i8> [#uses=1]
459 %tmp7 = extractelement <16 x i8> %tmp, i32 2 ; <i8> [#uses=1]
460 %tmp8 = extractelement <16 x i8> %tmp, i32 3 ; <i8> [#uses=1]
461 %tmp9 = extractelement <16 x i8> %tmp, i32 3 ; <i8> [#uses=1]
462 %tmp10 = extractelement <16 x i8> %tmp, i32 4 ; <i8> [#uses=1]
463 %tmp11 = extractelement <16 x i8> %tmp, i32 4 ; <i8> [#uses=1]
464 %tmp12 = extractelement <16 x i8> %tmp, i32 5 ; <i8> [#uses=1]
465 %tmp13 = extractelement <16 x i8> %tmp, i32 5 ; <i8> [#uses=1]
466 %tmp14 = extractelement <16 x i8> %tmp, i32 6 ; <i8> [#uses=1]
467 %tmp15 = extractelement <16 x i8> %tmp, i32 6 ; <i8> [#uses=1]
468 %tmp16 = extractelement <16 x i8> %tmp, i32 7 ; <i8> [#uses=1]
469 %tmp17 = extractelement <16 x i8> %tmp, i32 7 ; <i8> [#uses=1]
470 %tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.22, i32 0 ; <<16 x i8>> [#uses=1]
471 %tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1 ; <<16 x i8>> [#uses=1]
472 %tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2 ; <<16 x i8>> [#uses=1]
473 %tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3 ; <<16 x i8>> [#uses=1]
474 %tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4 ; <<16 x i8>> [#uses=1]
475 %tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5 ; <<16 x i8>> [#uses=1]
476 %tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6 ; <<16 x i8>> [#uses=1]
477 %tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7 ; <<16 x i8>> [#uses=1]
478 %tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8 ; <<16 x i8>> [#uses=1]
479 %tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9 ; <<16 x i8>> [#uses=1]
480 %tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10 ; <<16 x i8>> [#uses=1]
481 %tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11 ; <<16 x i8>> [#uses=1]
482 %tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12 ; <<16 x i8>> [#uses=1]
483 %tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13 ; <<16 x i8>> [#uses=1]
484 %tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14 ; <<16 x i8>> [#uses=1]
485 %tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15 ; <<16 x i8>> [#uses=1]
486 store <16 x i8> %tmp33, <16 x i8>* %A
490 define void @VMRG_UNARY_th_h(<8 x i16>* %A, <8 x i16>* %B) {
491 ; CHECK-LABEL: VMRG_UNARY_th_h:
492 ; CHECK: # %bb.0: # %entry
493 ; CHECK-NEXT: lvx 2, 0, 3
494 ; CHECK-NEXT: vmrghh 2, 2, 2
495 ; CHECK-NEXT: stvx 2, 0, 3
498 %tmp = load <8 x i16>, <8 x i16>* %A ; <<8 x i16>> [#uses=8]
499 %tmp.upgrd.23 = extractelement <8 x i16> %tmp, i32 0 ; <i16> [#uses=1]
500 %tmp3 = extractelement <8 x i16> %tmp, i32 0 ; <i16> [#uses=1]
501 %tmp4 = extractelement <8 x i16> %tmp, i32 1 ; <i16> [#uses=1]
502 %tmp5 = extractelement <8 x i16> %tmp, i32 1 ; <i16> [#uses=1]
503 %tmp6 = extractelement <8 x i16> %tmp, i32 2 ; <i16> [#uses=1]
504 %tmp7 = extractelement <8 x i16> %tmp, i32 2 ; <i16> [#uses=1]
505 %tmp8 = extractelement <8 x i16> %tmp, i32 3 ; <i16> [#uses=1]
506 %tmp9 = extractelement <8 x i16> %tmp, i32 3 ; <i16> [#uses=1]
507 %tmp10 = insertelement <8 x i16> undef, i16 %tmp.upgrd.23, i32 0 ; <<8 x i16>> [#uses=1]
508 %tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 1 ; <<8 x i16>> [#uses=1]
509 %tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 2 ; <<8 x i16>> [#uses=1]
510 %tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 3 ; <<8 x i16>> [#uses=1]
511 %tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 4 ; <<8 x i16>> [#uses=1]
512 %tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 5 ; <<8 x i16>> [#uses=1]
513 %tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 6 ; <<8 x i16>> [#uses=1]
514 %tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 7 ; <<8 x i16>> [#uses=1]
515 store <8 x i16> %tmp17, <8 x i16>* %A
519 define void @VMRG_UNARY_tw_h(<4 x i32>* %A, <4 x i32>* %B) {
520 ; CHECK-LABEL: VMRG_UNARY_tw_h:
521 ; CHECK: # %bb.0: # %entry
522 ; CHECK-NEXT: lvx 2, 0, 3
523 ; CHECK-NEXT: vmrghw 2, 2, 2
524 ; CHECK-NEXT: stvx 2, 0, 3
527 %tmp = load <4 x i32>, <4 x i32>* %A ; <<4 x i32>> [#uses=4]
528 %tmp.upgrd.24 = extractelement <4 x i32> %tmp, i32 0 ; <i32> [#uses=1]
529 %tmp3 = extractelement <4 x i32> %tmp, i32 0 ; <i32> [#uses=1]
530 %tmp4 = extractelement <4 x i32> %tmp, i32 1 ; <i32> [#uses=1]
531 %tmp5 = extractelement <4 x i32> %tmp, i32 1 ; <i32> [#uses=1]
532 %tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.24, i32 0 ; <<4 x i32>> [#uses=1]
533 %tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1]
534 %tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2 ; <<4 x i32>> [#uses=1]
535 %tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3 ; <<4 x i32>> [#uses=1]
536 store <4 x i32> %tmp9, <4 x i32>* %A
540 define void @VPCKUHUM_unary(<8 x i16>* %A, <8 x i16>* %B) {
541 ; CHECK-LABEL: VPCKUHUM_unary:
542 ; CHECK: # %bb.0: # %entry
543 ; CHECK-NEXT: lvx 2, 0, 3
544 ; CHECK-NEXT: vpkuhum 2, 2, 2
545 ; CHECK-NEXT: stvx 2, 0, 3
548 %tmp = load <8 x i16>, <8 x i16>* %A ; <<8 x i16>> [#uses=2]
549 %tmp.upgrd.25 = bitcast <8 x i16> %tmp to <16 x i8> ; <<16 x i8>> [#uses=8]
550 %tmp3 = bitcast <8 x i16> %tmp to <16 x i8> ; <<16 x i8>> [#uses=8]
551 %tmp.upgrd.26 = extractelement <16 x i8> %tmp.upgrd.25, i32 1 ; <i8> [#uses=1]
552 %tmp4 = extractelement <16 x i8> %tmp.upgrd.25, i32 3 ; <i8> [#uses=1]
553 %tmp5 = extractelement <16 x i8> %tmp.upgrd.25, i32 5 ; <i8> [#uses=1]
554 %tmp6 = extractelement <16 x i8> %tmp.upgrd.25, i32 7 ; <i8> [#uses=1]
555 %tmp7 = extractelement <16 x i8> %tmp.upgrd.25, i32 9 ; <i8> [#uses=1]
556 %tmp8 = extractelement <16 x i8> %tmp.upgrd.25, i32 11 ; <i8> [#uses=1]
557 %tmp9 = extractelement <16 x i8> %tmp.upgrd.25, i32 13 ; <i8> [#uses=1]
558 %tmp10 = extractelement <16 x i8> %tmp.upgrd.25, i32 15 ; <i8> [#uses=1]
559 %tmp11 = extractelement <16 x i8> %tmp3, i32 1 ; <i8> [#uses=1]
560 %tmp12 = extractelement <16 x i8> %tmp3, i32 3 ; <i8> [#uses=1]
561 %tmp13 = extractelement <16 x i8> %tmp3, i32 5 ; <i8> [#uses=1]
562 %tmp14 = extractelement <16 x i8> %tmp3, i32 7 ; <i8> [#uses=1]
563 %tmp15 = extractelement <16 x i8> %tmp3, i32 9 ; <i8> [#uses=1]
564 %tmp16 = extractelement <16 x i8> %tmp3, i32 11 ; <i8> [#uses=1]
565 %tmp17 = extractelement <16 x i8> %tmp3, i32 13 ; <i8> [#uses=1]
566 %tmp18 = extractelement <16 x i8> %tmp3, i32 15 ; <i8> [#uses=1]
567 %tmp19 = insertelement <16 x i8> undef, i8 %tmp.upgrd.26, i32 0 ; <<16 x i8>> [#uses=1]
568 %tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 1 ; <<16 x i8>> [#uses=1]
569 %tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 2 ; <<16 x i8>> [#uses=1]
570 %tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 3 ; <<16 x i8>> [#uses=1]
571 %tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 4 ; <<16 x i8>> [#uses=1]
572 %tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 5 ; <<16 x i8>> [#uses=1]
573 %tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 6 ; <<16 x i8>> [#uses=1]
574 %tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 7 ; <<16 x i8>> [#uses=1]
575 %tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 8 ; <<16 x i8>> [#uses=1]
576 %tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 9 ; <<16 x i8>> [#uses=1]
577 %tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 10 ; <<16 x i8>> [#uses=1]
578 %tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 11 ; <<16 x i8>> [#uses=1]
579 %tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 12 ; <<16 x i8>> [#uses=1]
580 %tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 13 ; <<16 x i8>> [#uses=1]
581 %tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 14 ; <<16 x i8>> [#uses=1]
582 %tmp34 = insertelement <16 x i8> %tmp33, i8 %tmp18, i32 15 ; <<16 x i8>> [#uses=1]
583 %tmp34.upgrd.27 = bitcast <16 x i8> %tmp34 to <8 x i16> ; <<8 x i16>> [#uses=1]
584 store <8 x i16> %tmp34.upgrd.27, <8 x i16>* %A
588 define void @VPCKUWUM_unary(<4 x i32>* %A, <4 x i32>* %B) {
589 ; CHECK-LABEL: VPCKUWUM_unary:
590 ; CHECK: # %bb.0: # %entry
591 ; CHECK-NEXT: lvx 2, 0, 3
592 ; CHECK-NEXT: vpkuwum 2, 2, 2
593 ; CHECK-NEXT: stvx 2, 0, 3
596 %tmp = load <4 x i32>, <4 x i32>* %A ; <<4 x i32>> [#uses=2]
597 %tmp.upgrd.28 = bitcast <4 x i32> %tmp to <8 x i16> ; <<8 x i16>> [#uses=4]
598 %tmp3 = bitcast <4 x i32> %tmp to <8 x i16> ; <<8 x i16>> [#uses=4]
599 %tmp.upgrd.29 = extractelement <8 x i16> %tmp.upgrd.28, i32 1 ; <i16> [#uses=1]
600 %tmp4 = extractelement <8 x i16> %tmp.upgrd.28, i32 3 ; <i16> [#uses=1]
601 %tmp5 = extractelement <8 x i16> %tmp.upgrd.28, i32 5 ; <i16> [#uses=1]
602 %tmp6 = extractelement <8 x i16> %tmp.upgrd.28, i32 7 ; <i16> [#uses=1]
603 %tmp7 = extractelement <8 x i16> %tmp3, i32 1 ; <i16> [#uses=1]
604 %tmp8 = extractelement <8 x i16> %tmp3, i32 3 ; <i16> [#uses=1]
605 %tmp9 = extractelement <8 x i16> %tmp3, i32 5 ; <i16> [#uses=1]
606 %tmp10 = extractelement <8 x i16> %tmp3, i32 7 ; <i16> [#uses=1]
607 %tmp11 = insertelement <8 x i16> undef, i16 %tmp.upgrd.29, i32 0 ; <<8 x i16>> [#uses=1]
608 %tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 1 ; <<8 x i16>> [#uses=1]
609 %tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 2 ; <<8 x i16>> [#uses=1]
610 %tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 3 ; <<8 x i16>> [#uses=1]
611 %tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 4 ; <<8 x i16>> [#uses=1]
612 %tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 5 ; <<8 x i16>> [#uses=1]
613 %tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 6 ; <<8 x i16>> [#uses=1]
614 %tmp18 = insertelement <8 x i16> %tmp17, i16 %tmp10, i32 7 ; <<8 x i16>> [#uses=1]
615 %tmp18.upgrd.30 = bitcast <8 x i16> %tmp18 to <4 x i32> ; <<4 x i32>> [#uses=1]
616 store <4 x i32> %tmp18.upgrd.30, <4 x i32>* %A