1 ; RUN: opt -mtriple=powerpc-unknown-linux-gnu < %s -instcombine | \
2 ; RUN: llc -mtriple=ppc32-- -mcpu=g5 | not grep vperm
3 ; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- -mcpu=g5 > %t
4 ; RUN: grep vsldoi %t | count 2
5 ; RUN: grep vmrgh %t | count 7
6 ; RUN: grep vmrgl %t | count 6
7 ; RUN: grep vpkuhum %t | count 1
8 ; RUN: grep vpkuwum %t | count 1
10 define void @VSLDOI_xy(<8 x i16>* %A, <8 x i16>* %B) {
12 %tmp = load <8 x i16>, <8 x i16>* %A ; <<8 x i16>> [#uses=1]
13 %tmp2 = load <8 x i16>, <8 x i16>* %B ; <<8 x i16>> [#uses=1]
14 %tmp.upgrd.1 = bitcast <8 x i16> %tmp to <16 x i8> ; <<16 x i8>> [#uses=11]
15 %tmp2.upgrd.2 = bitcast <8 x i16> %tmp2 to <16 x i8> ; <<16 x i8>> [#uses=5]
16 %tmp.upgrd.3 = extractelement <16 x i8> %tmp.upgrd.1, i32 5 ; <i8> [#uses=1]
17 %tmp3 = extractelement <16 x i8> %tmp.upgrd.1, i32 6 ; <i8> [#uses=1]
18 %tmp4 = extractelement <16 x i8> %tmp.upgrd.1, i32 7 ; <i8> [#uses=1]
19 %tmp5 = extractelement <16 x i8> %tmp.upgrd.1, i32 8 ; <i8> [#uses=1]
20 %tmp6 = extractelement <16 x i8> %tmp.upgrd.1, i32 9 ; <i8> [#uses=1]
21 %tmp7 = extractelement <16 x i8> %tmp.upgrd.1, i32 10 ; <i8> [#uses=1]
22 %tmp8 = extractelement <16 x i8> %tmp.upgrd.1, i32 11 ; <i8> [#uses=1]
23 %tmp9 = extractelement <16 x i8> %tmp.upgrd.1, i32 12 ; <i8> [#uses=1]
24 %tmp10 = extractelement <16 x i8> %tmp.upgrd.1, i32 13 ; <i8> [#uses=1]
25 %tmp11 = extractelement <16 x i8> %tmp.upgrd.1, i32 14 ; <i8> [#uses=1]
26 %tmp12 = extractelement <16 x i8> %tmp.upgrd.1, i32 15 ; <i8> [#uses=1]
27 %tmp13 = extractelement <16 x i8> %tmp2.upgrd.2, i32 0 ; <i8> [#uses=1]
28 %tmp14 = extractelement <16 x i8> %tmp2.upgrd.2, i32 1 ; <i8> [#uses=1]
29 %tmp15 = extractelement <16 x i8> %tmp2.upgrd.2, i32 2 ; <i8> [#uses=1]
30 %tmp16 = extractelement <16 x i8> %tmp2.upgrd.2, i32 3 ; <i8> [#uses=1]
31 %tmp17 = extractelement <16 x i8> %tmp2.upgrd.2, i32 4 ; <i8> [#uses=1]
32 %tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.3, i32 0 ; <<16 x i8>> [#uses=1]
33 %tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1 ; <<16 x i8>> [#uses=1]
34 %tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2 ; <<16 x i8>> [#uses=1]
35 %tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3 ; <<16 x i8>> [#uses=1]
36 %tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4 ; <<16 x i8>> [#uses=1]
37 %tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5 ; <<16 x i8>> [#uses=1]
38 %tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6 ; <<16 x i8>> [#uses=1]
39 %tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7 ; <<16 x i8>> [#uses=1]
40 %tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8 ; <<16 x i8>> [#uses=1]
41 %tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9 ; <<16 x i8>> [#uses=1]
42 %tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10 ; <<16 x i8>> [#uses=1]
43 %tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11 ; <<16 x i8>> [#uses=1]
44 %tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12 ; <<16 x i8>> [#uses=1]
45 %tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13 ; <<16 x i8>> [#uses=1]
46 %tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14 ; <<16 x i8>> [#uses=1]
47 %tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15 ; <<16 x i8>> [#uses=1]
48 %tmp33.upgrd.4 = bitcast <16 x i8> %tmp33 to <8 x i16> ; <<8 x i16>> [#uses=1]
49 store <8 x i16> %tmp33.upgrd.4, <8 x i16>* %A
53 define void @VSLDOI_xx(<8 x i16>* %A, <8 x i16>* %B) {
54 %tmp = load <8 x i16>, <8 x i16>* %A ; <<8 x i16>> [#uses=1]
55 %tmp2 = load <8 x i16>, <8 x i16>* %A ; <<8 x i16>> [#uses=1]
56 %tmp.upgrd.5 = bitcast <8 x i16> %tmp to <16 x i8> ; <<16 x i8>> [#uses=11]
57 %tmp2.upgrd.6 = bitcast <8 x i16> %tmp2 to <16 x i8> ; <<16 x i8>> [#uses=5]
58 %tmp.upgrd.7 = extractelement <16 x i8> %tmp.upgrd.5, i32 5 ; <i8> [#uses=1]
59 %tmp3 = extractelement <16 x i8> %tmp.upgrd.5, i32 6 ; <i8> [#uses=1]
60 %tmp4 = extractelement <16 x i8> %tmp.upgrd.5, i32 7 ; <i8> [#uses=1]
61 %tmp5 = extractelement <16 x i8> %tmp.upgrd.5, i32 8 ; <i8> [#uses=1]
62 %tmp6 = extractelement <16 x i8> %tmp.upgrd.5, i32 9 ; <i8> [#uses=1]
63 %tmp7 = extractelement <16 x i8> %tmp.upgrd.5, i32 10 ; <i8> [#uses=1]
64 %tmp8 = extractelement <16 x i8> %tmp.upgrd.5, i32 11 ; <i8> [#uses=1]
65 %tmp9 = extractelement <16 x i8> %tmp.upgrd.5, i32 12 ; <i8> [#uses=1]
66 %tmp10 = extractelement <16 x i8> %tmp.upgrd.5, i32 13 ; <i8> [#uses=1]
67 %tmp11 = extractelement <16 x i8> %tmp.upgrd.5, i32 14 ; <i8> [#uses=1]
68 %tmp12 = extractelement <16 x i8> %tmp.upgrd.5, i32 15 ; <i8> [#uses=1]
69 %tmp13 = extractelement <16 x i8> %tmp2.upgrd.6, i32 0 ; <i8> [#uses=1]
70 %tmp14 = extractelement <16 x i8> %tmp2.upgrd.6, i32 1 ; <i8> [#uses=1]
71 %tmp15 = extractelement <16 x i8> %tmp2.upgrd.6, i32 2 ; <i8> [#uses=1]
72 %tmp16 = extractelement <16 x i8> %tmp2.upgrd.6, i32 3 ; <i8> [#uses=1]
73 %tmp17 = extractelement <16 x i8> %tmp2.upgrd.6, i32 4 ; <i8> [#uses=1]
74 %tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.7, i32 0 ; <<16 x i8>> [#uses=1]
75 %tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1 ; <<16 x i8>> [#uses=1]
76 %tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2 ; <<16 x i8>> [#uses=1]
77 %tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3 ; <<16 x i8>> [#uses=1]
78 %tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4 ; <<16 x i8>> [#uses=1]
79 %tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5 ; <<16 x i8>> [#uses=1]
80 %tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6 ; <<16 x i8>> [#uses=1]
81 %tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7 ; <<16 x i8>> [#uses=1]
82 %tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8 ; <<16 x i8>> [#uses=1]
83 %tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9 ; <<16 x i8>> [#uses=1]
84 %tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10 ; <<16 x i8>> [#uses=1]
85 %tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11 ; <<16 x i8>> [#uses=1]
86 %tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12 ; <<16 x i8>> [#uses=1]
87 %tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13 ; <<16 x i8>> [#uses=1]
88 %tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14 ; <<16 x i8>> [#uses=1]
89 %tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15 ; <<16 x i8>> [#uses=1]
90 %tmp33.upgrd.8 = bitcast <16 x i8> %tmp33 to <8 x i16> ; <<8 x i16>> [#uses=1]
91 store <8 x i16> %tmp33.upgrd.8, <8 x i16>* %A
95 define void @VPERM_promote(<8 x i16>* %A, <8 x i16>* %B) {
97 %tmp = load <8 x i16>, <8 x i16>* %A ; <<8 x i16>> [#uses=1]
98 %tmp.upgrd.9 = bitcast <8 x i16> %tmp to <4 x i32> ; <<4 x i32>> [#uses=1]
99 %tmp2 = load <8 x i16>, <8 x i16>* %B ; <<8 x i16>> [#uses=1]
100 %tmp2.upgrd.10 = bitcast <8 x i16> %tmp2 to <4 x i32> ; <<4 x i32>> [#uses=1]
101 %tmp3 = call <4 x i32> @llvm.ppc.altivec.vperm( <4 x i32> %tmp.upgrd.9, <4 x i32> %tmp2.upgrd.10, <16 x i8> < i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14 > ) ; <<4 x i32>> [#uses=1]
102 %tmp3.upgrd.11 = bitcast <4 x i32> %tmp3 to <8 x i16> ; <<8 x i16>> [#uses=1]
103 store <8 x i16> %tmp3.upgrd.11, <8 x i16>* %A
107 declare <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32>, <4 x i32>, <16 x i8>)
109 define void @tb_l(<16 x i8>* %A, <16 x i8>* %B) {
111 %tmp = load <16 x i8>, <16 x i8>* %A ; <<16 x i8>> [#uses=8]
112 %tmp2 = load <16 x i8>, <16 x i8>* %B ; <<16 x i8>> [#uses=8]
113 %tmp.upgrd.12 = extractelement <16 x i8> %tmp, i32 8 ; <i8> [#uses=1]
114 %tmp3 = extractelement <16 x i8> %tmp2, i32 8 ; <i8> [#uses=1]
115 %tmp4 = extractelement <16 x i8> %tmp, i32 9 ; <i8> [#uses=1]
116 %tmp5 = extractelement <16 x i8> %tmp2, i32 9 ; <i8> [#uses=1]
117 %tmp6 = extractelement <16 x i8> %tmp, i32 10 ; <i8> [#uses=1]
118 %tmp7 = extractelement <16 x i8> %tmp2, i32 10 ; <i8> [#uses=1]
119 %tmp8 = extractelement <16 x i8> %tmp, i32 11 ; <i8> [#uses=1]
120 %tmp9 = extractelement <16 x i8> %tmp2, i32 11 ; <i8> [#uses=1]
121 %tmp10 = extractelement <16 x i8> %tmp, i32 12 ; <i8> [#uses=1]
122 %tmp11 = extractelement <16 x i8> %tmp2, i32 12 ; <i8> [#uses=1]
123 %tmp12 = extractelement <16 x i8> %tmp, i32 13 ; <i8> [#uses=1]
124 %tmp13 = extractelement <16 x i8> %tmp2, i32 13 ; <i8> [#uses=1]
125 %tmp14 = extractelement <16 x i8> %tmp, i32 14 ; <i8> [#uses=1]
126 %tmp15 = extractelement <16 x i8> %tmp2, i32 14 ; <i8> [#uses=1]
127 %tmp16 = extractelement <16 x i8> %tmp, i32 15 ; <i8> [#uses=1]
128 %tmp17 = extractelement <16 x i8> %tmp2, i32 15 ; <i8> [#uses=1]
129 %tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.12, i32 0 ; <<16 x i8>> [#uses=1]
130 %tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1 ; <<16 x i8>> [#uses=1]
131 %tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2 ; <<16 x i8>> [#uses=1]
132 %tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3 ; <<16 x i8>> [#uses=1]
133 %tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4 ; <<16 x i8>> [#uses=1]
134 %tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5 ; <<16 x i8>> [#uses=1]
135 %tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6 ; <<16 x i8>> [#uses=1]
136 %tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7 ; <<16 x i8>> [#uses=1]
137 %tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8 ; <<16 x i8>> [#uses=1]
138 %tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9 ; <<16 x i8>> [#uses=1]
139 %tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10 ; <<16 x i8>> [#uses=1]
140 %tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11 ; <<16 x i8>> [#uses=1]
141 %tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12 ; <<16 x i8>> [#uses=1]
142 %tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13 ; <<16 x i8>> [#uses=1]
143 %tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14 ; <<16 x i8>> [#uses=1]
144 %tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15 ; <<16 x i8>> [#uses=1]
145 store <16 x i8> %tmp33, <16 x i8>* %A
149 define void @th_l(<8 x i16>* %A, <8 x i16>* %B) {
151 %tmp = load <8 x i16>, <8 x i16>* %A ; <<8 x i16>> [#uses=4]
152 %tmp2 = load <8 x i16>, <8 x i16>* %B ; <<8 x i16>> [#uses=4]
153 %tmp.upgrd.13 = extractelement <8 x i16> %tmp, i32 4 ; <i16> [#uses=1]
154 %tmp3 = extractelement <8 x i16> %tmp2, i32 4 ; <i16> [#uses=1]
155 %tmp4 = extractelement <8 x i16> %tmp, i32 5 ; <i16> [#uses=1]
156 %tmp5 = extractelement <8 x i16> %tmp2, i32 5 ; <i16> [#uses=1]
157 %tmp6 = extractelement <8 x i16> %tmp, i32 6 ; <i16> [#uses=1]
158 %tmp7 = extractelement <8 x i16> %tmp2, i32 6 ; <i16> [#uses=1]
159 %tmp8 = extractelement <8 x i16> %tmp, i32 7 ; <i16> [#uses=1]
160 %tmp9 = extractelement <8 x i16> %tmp2, i32 7 ; <i16> [#uses=1]
161 %tmp10 = insertelement <8 x i16> undef, i16 %tmp.upgrd.13, i32 0 ; <<8 x i16>> [#uses=1]
162 %tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 1 ; <<8 x i16>> [#uses=1]
163 %tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 2 ; <<8 x i16>> [#uses=1]
164 %tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 3 ; <<8 x i16>> [#uses=1]
165 %tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 4 ; <<8 x i16>> [#uses=1]
166 %tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 5 ; <<8 x i16>> [#uses=1]
167 %tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 6 ; <<8 x i16>> [#uses=1]
168 %tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 7 ; <<8 x i16>> [#uses=1]
169 store <8 x i16> %tmp17, <8 x i16>* %A
173 define void @tw_l(<4 x i32>* %A, <4 x i32>* %B) {
175 %tmp = load <4 x i32>, <4 x i32>* %A ; <<4 x i32>> [#uses=2]
176 %tmp2 = load <4 x i32>, <4 x i32>* %B ; <<4 x i32>> [#uses=2]
177 %tmp.upgrd.14 = extractelement <4 x i32> %tmp, i32 2 ; <i32> [#uses=1]
178 %tmp3 = extractelement <4 x i32> %tmp2, i32 2 ; <i32> [#uses=1]
179 %tmp4 = extractelement <4 x i32> %tmp, i32 3 ; <i32> [#uses=1]
180 %tmp5 = extractelement <4 x i32> %tmp2, i32 3 ; <i32> [#uses=1]
181 %tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.14, i32 0 ; <<4 x i32>> [#uses=1]
182 %tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1]
183 %tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2 ; <<4 x i32>> [#uses=1]
184 %tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3 ; <<4 x i32>> [#uses=1]
185 store <4 x i32> %tmp9, <4 x i32>* %A
189 define void @tb_h(<16 x i8>* %A, <16 x i8>* %B) {
191 %tmp = load <16 x i8>, <16 x i8>* %A ; <<16 x i8>> [#uses=8]
192 %tmp2 = load <16 x i8>, <16 x i8>* %B ; <<16 x i8>> [#uses=8]
193 %tmp.upgrd.15 = extractelement <16 x i8> %tmp, i32 0 ; <i8> [#uses=1]
194 %tmp3 = extractelement <16 x i8> %tmp2, i32 0 ; <i8> [#uses=1]
195 %tmp4 = extractelement <16 x i8> %tmp, i32 1 ; <i8> [#uses=1]
196 %tmp5 = extractelement <16 x i8> %tmp2, i32 1 ; <i8> [#uses=1]
197 %tmp6 = extractelement <16 x i8> %tmp, i32 2 ; <i8> [#uses=1]
198 %tmp7 = extractelement <16 x i8> %tmp2, i32 2 ; <i8> [#uses=1]
199 %tmp8 = extractelement <16 x i8> %tmp, i32 3 ; <i8> [#uses=1]
200 %tmp9 = extractelement <16 x i8> %tmp2, i32 3 ; <i8> [#uses=1]
201 %tmp10 = extractelement <16 x i8> %tmp, i32 4 ; <i8> [#uses=1]
202 %tmp11 = extractelement <16 x i8> %tmp2, i32 4 ; <i8> [#uses=1]
203 %tmp12 = extractelement <16 x i8> %tmp, i32 5 ; <i8> [#uses=1]
204 %tmp13 = extractelement <16 x i8> %tmp2, i32 5 ; <i8> [#uses=1]
205 %tmp14 = extractelement <16 x i8> %tmp, i32 6 ; <i8> [#uses=1]
206 %tmp15 = extractelement <16 x i8> %tmp2, i32 6 ; <i8> [#uses=1]
207 %tmp16 = extractelement <16 x i8> %tmp, i32 7 ; <i8> [#uses=1]
208 %tmp17 = extractelement <16 x i8> %tmp2, i32 7 ; <i8> [#uses=1]
209 %tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.15, i32 0 ; <<16 x i8>> [#uses=1]
210 %tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1 ; <<16 x i8>> [#uses=1]
211 %tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2 ; <<16 x i8>> [#uses=1]
212 %tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3 ; <<16 x i8>> [#uses=1]
213 %tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4 ; <<16 x i8>> [#uses=1]
214 %tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5 ; <<16 x i8>> [#uses=1]
215 %tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6 ; <<16 x i8>> [#uses=1]
216 %tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7 ; <<16 x i8>> [#uses=1]
217 %tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8 ; <<16 x i8>> [#uses=1]
218 %tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9 ; <<16 x i8>> [#uses=1]
219 %tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10 ; <<16 x i8>> [#uses=1]
220 %tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11 ; <<16 x i8>> [#uses=1]
221 %tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12 ; <<16 x i8>> [#uses=1]
222 %tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13 ; <<16 x i8>> [#uses=1]
223 %tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14 ; <<16 x i8>> [#uses=1]
224 %tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15 ; <<16 x i8>> [#uses=1]
225 store <16 x i8> %tmp33, <16 x i8>* %A
229 define void @th_h(<8 x i16>* %A, <8 x i16>* %B) {
231 %tmp = load <8 x i16>, <8 x i16>* %A ; <<8 x i16>> [#uses=4]
232 %tmp2 = load <8 x i16>, <8 x i16>* %B ; <<8 x i16>> [#uses=4]
233 %tmp.upgrd.16 = extractelement <8 x i16> %tmp, i32 0 ; <i16> [#uses=1]
234 %tmp3 = extractelement <8 x i16> %tmp2, i32 0 ; <i16> [#uses=1]
235 %tmp4 = extractelement <8 x i16> %tmp, i32 1 ; <i16> [#uses=1]
236 %tmp5 = extractelement <8 x i16> %tmp2, i32 1 ; <i16> [#uses=1]
237 %tmp6 = extractelement <8 x i16> %tmp, i32 2 ; <i16> [#uses=1]
238 %tmp7 = extractelement <8 x i16> %tmp2, i32 2 ; <i16> [#uses=1]
239 %tmp8 = extractelement <8 x i16> %tmp, i32 3 ; <i16> [#uses=1]
240 %tmp9 = extractelement <8 x i16> %tmp2, i32 3 ; <i16> [#uses=1]
241 %tmp10 = insertelement <8 x i16> undef, i16 %tmp.upgrd.16, i32 0 ; <<8 x i16>> [#uses=1]
242 %tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 1 ; <<8 x i16>> [#uses=1]
243 %tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 2 ; <<8 x i16>> [#uses=1]
244 %tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 3 ; <<8 x i16>> [#uses=1]
245 %tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 4 ; <<8 x i16>> [#uses=1]
246 %tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 5 ; <<8 x i16>> [#uses=1]
247 %tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 6 ; <<8 x i16>> [#uses=1]
248 %tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 7 ; <<8 x i16>> [#uses=1]
249 store <8 x i16> %tmp17, <8 x i16>* %A
253 define void @tw_h(<4 x i32>* %A, <4 x i32>* %B) {
255 %tmp = load <4 x i32>, <4 x i32>* %A ; <<4 x i32>> [#uses=2]
256 %tmp2 = load <4 x i32>, <4 x i32>* %B ; <<4 x i32>> [#uses=2]
257 %tmp.upgrd.17 = extractelement <4 x i32> %tmp2, i32 0 ; <i32> [#uses=1]
258 %tmp3 = extractelement <4 x i32> %tmp, i32 0 ; <i32> [#uses=1]
259 %tmp4 = extractelement <4 x i32> %tmp2, i32 1 ; <i32> [#uses=1]
260 %tmp5 = extractelement <4 x i32> %tmp, i32 1 ; <i32> [#uses=1]
261 %tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.17, i32 0 ; <<4 x i32>> [#uses=1]
262 %tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1]
263 %tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2 ; <<4 x i32>> [#uses=1]
264 %tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3 ; <<4 x i32>> [#uses=1]
265 store <4 x i32> %tmp9, <4 x i32>* %A
269 define void @tw_h_flop(<4 x i32>* %A, <4 x i32>* %B) {
270 %tmp = load <4 x i32>, <4 x i32>* %A ; <<4 x i32>> [#uses=2]
271 %tmp2 = load <4 x i32>, <4 x i32>* %B ; <<4 x i32>> [#uses=2]
272 %tmp.upgrd.18 = extractelement <4 x i32> %tmp, i32 0 ; <i32> [#uses=1]
273 %tmp3 = extractelement <4 x i32> %tmp2, i32 0 ; <i32> [#uses=1]
274 %tmp4 = extractelement <4 x i32> %tmp, i32 1 ; <i32> [#uses=1]
275 %tmp5 = extractelement <4 x i32> %tmp2, i32 1 ; <i32> [#uses=1]
276 %tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.18, i32 0 ; <<4 x i32>> [#uses=1]
277 %tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1]
278 %tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2 ; <<4 x i32>> [#uses=1]
279 %tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3 ; <<4 x i32>> [#uses=1]
280 store <4 x i32> %tmp9, <4 x i32>* %A
284 define void @VMRG_UNARY_tb_l(<16 x i8>* %A, <16 x i8>* %B) {
286 %tmp = load <16 x i8>, <16 x i8>* %A ; <<16 x i8>> [#uses=16]
287 %tmp.upgrd.19 = extractelement <16 x i8> %tmp, i32 8 ; <i8> [#uses=1]
288 %tmp3 = extractelement <16 x i8> %tmp, i32 8 ; <i8> [#uses=1]
289 %tmp4 = extractelement <16 x i8> %tmp, i32 9 ; <i8> [#uses=1]
290 %tmp5 = extractelement <16 x i8> %tmp, i32 9 ; <i8> [#uses=1]
291 %tmp6 = extractelement <16 x i8> %tmp, i32 10 ; <i8> [#uses=1]
292 %tmp7 = extractelement <16 x i8> %tmp, i32 10 ; <i8> [#uses=1]
293 %tmp8 = extractelement <16 x i8> %tmp, i32 11 ; <i8> [#uses=1]
294 %tmp9 = extractelement <16 x i8> %tmp, i32 11 ; <i8> [#uses=1]
295 %tmp10 = extractelement <16 x i8> %tmp, i32 12 ; <i8> [#uses=1]
296 %tmp11 = extractelement <16 x i8> %tmp, i32 12 ; <i8> [#uses=1]
297 %tmp12 = extractelement <16 x i8> %tmp, i32 13 ; <i8> [#uses=1]
298 %tmp13 = extractelement <16 x i8> %tmp, i32 13 ; <i8> [#uses=1]
299 %tmp14 = extractelement <16 x i8> %tmp, i32 14 ; <i8> [#uses=1]
300 %tmp15 = extractelement <16 x i8> %tmp, i32 14 ; <i8> [#uses=1]
301 %tmp16 = extractelement <16 x i8> %tmp, i32 15 ; <i8> [#uses=1]
302 %tmp17 = extractelement <16 x i8> %tmp, i32 15 ; <i8> [#uses=1]
303 %tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.19, i32 0 ; <<16 x i8>> [#uses=1]
304 %tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1 ; <<16 x i8>> [#uses=1]
305 %tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2 ; <<16 x i8>> [#uses=1]
306 %tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3 ; <<16 x i8>> [#uses=1]
307 %tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4 ; <<16 x i8>> [#uses=1]
308 %tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5 ; <<16 x i8>> [#uses=1]
309 %tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6 ; <<16 x i8>> [#uses=1]
310 %tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7 ; <<16 x i8>> [#uses=1]
311 %tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8 ; <<16 x i8>> [#uses=1]
312 %tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9 ; <<16 x i8>> [#uses=1]
313 %tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10 ; <<16 x i8>> [#uses=1]
314 %tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11 ; <<16 x i8>> [#uses=1]
315 %tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12 ; <<16 x i8>> [#uses=1]
316 %tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13 ; <<16 x i8>> [#uses=1]
317 %tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14 ; <<16 x i8>> [#uses=1]
318 %tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15 ; <<16 x i8>> [#uses=1]
319 store <16 x i8> %tmp33, <16 x i8>* %A
323 define void @VMRG_UNARY_th_l(<8 x i16>* %A, <8 x i16>* %B) {
325 %tmp = load <8 x i16>, <8 x i16>* %A ; <<8 x i16>> [#uses=8]
326 %tmp.upgrd.20 = extractelement <8 x i16> %tmp, i32 4 ; <i16> [#uses=1]
327 %tmp3 = extractelement <8 x i16> %tmp, i32 4 ; <i16> [#uses=1]
328 %tmp4 = extractelement <8 x i16> %tmp, i32 5 ; <i16> [#uses=1]
329 %tmp5 = extractelement <8 x i16> %tmp, i32 5 ; <i16> [#uses=1]
330 %tmp6 = extractelement <8 x i16> %tmp, i32 6 ; <i16> [#uses=1]
331 %tmp7 = extractelement <8 x i16> %tmp, i32 6 ; <i16> [#uses=1]
332 %tmp8 = extractelement <8 x i16> %tmp, i32 7 ; <i16> [#uses=1]
333 %tmp9 = extractelement <8 x i16> %tmp, i32 7 ; <i16> [#uses=1]
334 %tmp10 = insertelement <8 x i16> undef, i16 %tmp.upgrd.20, i32 0 ; <<8 x i16>> [#uses=1]
335 %tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 1 ; <<8 x i16>> [#uses=1]
336 %tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 2 ; <<8 x i16>> [#uses=1]
337 %tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 3 ; <<8 x i16>> [#uses=1]
338 %tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 4 ; <<8 x i16>> [#uses=1]
339 %tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 5 ; <<8 x i16>> [#uses=1]
340 %tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 6 ; <<8 x i16>> [#uses=1]
341 %tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 7 ; <<8 x i16>> [#uses=1]
342 store <8 x i16> %tmp17, <8 x i16>* %A
346 define void @VMRG_UNARY_tw_l(<4 x i32>* %A, <4 x i32>* %B) {
348 %tmp = load <4 x i32>, <4 x i32>* %A ; <<4 x i32>> [#uses=4]
349 %tmp.upgrd.21 = extractelement <4 x i32> %tmp, i32 2 ; <i32> [#uses=1]
350 %tmp3 = extractelement <4 x i32> %tmp, i32 2 ; <i32> [#uses=1]
351 %tmp4 = extractelement <4 x i32> %tmp, i32 3 ; <i32> [#uses=1]
352 %tmp5 = extractelement <4 x i32> %tmp, i32 3 ; <i32> [#uses=1]
353 %tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.21, i32 0 ; <<4 x i32>> [#uses=1]
354 %tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1]
355 %tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2 ; <<4 x i32>> [#uses=1]
356 %tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3 ; <<4 x i32>> [#uses=1]
357 store <4 x i32> %tmp9, <4 x i32>* %A
361 define void @VMRG_UNARY_tb_h(<16 x i8>* %A, <16 x i8>* %B) {
363 %tmp = load <16 x i8>, <16 x i8>* %A ; <<16 x i8>> [#uses=16]
364 %tmp.upgrd.22 = extractelement <16 x i8> %tmp, i32 0 ; <i8> [#uses=1]
365 %tmp3 = extractelement <16 x i8> %tmp, i32 0 ; <i8> [#uses=1]
366 %tmp4 = extractelement <16 x i8> %tmp, i32 1 ; <i8> [#uses=1]
367 %tmp5 = extractelement <16 x i8> %tmp, i32 1 ; <i8> [#uses=1]
368 %tmp6 = extractelement <16 x i8> %tmp, i32 2 ; <i8> [#uses=1]
369 %tmp7 = extractelement <16 x i8> %tmp, i32 2 ; <i8> [#uses=1]
370 %tmp8 = extractelement <16 x i8> %tmp, i32 3 ; <i8> [#uses=1]
371 %tmp9 = extractelement <16 x i8> %tmp, i32 3 ; <i8> [#uses=1]
372 %tmp10 = extractelement <16 x i8> %tmp, i32 4 ; <i8> [#uses=1]
373 %tmp11 = extractelement <16 x i8> %tmp, i32 4 ; <i8> [#uses=1]
374 %tmp12 = extractelement <16 x i8> %tmp, i32 5 ; <i8> [#uses=1]
375 %tmp13 = extractelement <16 x i8> %tmp, i32 5 ; <i8> [#uses=1]
376 %tmp14 = extractelement <16 x i8> %tmp, i32 6 ; <i8> [#uses=1]
377 %tmp15 = extractelement <16 x i8> %tmp, i32 6 ; <i8> [#uses=1]
378 %tmp16 = extractelement <16 x i8> %tmp, i32 7 ; <i8> [#uses=1]
379 %tmp17 = extractelement <16 x i8> %tmp, i32 7 ; <i8> [#uses=1]
380 %tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.22, i32 0 ; <<16 x i8>> [#uses=1]
381 %tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1 ; <<16 x i8>> [#uses=1]
382 %tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2 ; <<16 x i8>> [#uses=1]
383 %tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3 ; <<16 x i8>> [#uses=1]
384 %tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4 ; <<16 x i8>> [#uses=1]
385 %tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5 ; <<16 x i8>> [#uses=1]
386 %tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6 ; <<16 x i8>> [#uses=1]
387 %tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7 ; <<16 x i8>> [#uses=1]
388 %tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8 ; <<16 x i8>> [#uses=1]
389 %tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9 ; <<16 x i8>> [#uses=1]
390 %tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10 ; <<16 x i8>> [#uses=1]
391 %tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11 ; <<16 x i8>> [#uses=1]
392 %tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12 ; <<16 x i8>> [#uses=1]
393 %tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13 ; <<16 x i8>> [#uses=1]
394 %tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14 ; <<16 x i8>> [#uses=1]
395 %tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15 ; <<16 x i8>> [#uses=1]
396 store <16 x i8> %tmp33, <16 x i8>* %A
400 define void @VMRG_UNARY_th_h(<8 x i16>* %A, <8 x i16>* %B) {
402 %tmp = load <8 x i16>, <8 x i16>* %A ; <<8 x i16>> [#uses=8]
403 %tmp.upgrd.23 = extractelement <8 x i16> %tmp, i32 0 ; <i16> [#uses=1]
404 %tmp3 = extractelement <8 x i16> %tmp, i32 0 ; <i16> [#uses=1]
405 %tmp4 = extractelement <8 x i16> %tmp, i32 1 ; <i16> [#uses=1]
406 %tmp5 = extractelement <8 x i16> %tmp, i32 1 ; <i16> [#uses=1]
407 %tmp6 = extractelement <8 x i16> %tmp, i32 2 ; <i16> [#uses=1]
408 %tmp7 = extractelement <8 x i16> %tmp, i32 2 ; <i16> [#uses=1]
409 %tmp8 = extractelement <8 x i16> %tmp, i32 3 ; <i16> [#uses=1]
410 %tmp9 = extractelement <8 x i16> %tmp, i32 3 ; <i16> [#uses=1]
411 %tmp10 = insertelement <8 x i16> undef, i16 %tmp.upgrd.23, i32 0 ; <<8 x i16>> [#uses=1]
412 %tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 1 ; <<8 x i16>> [#uses=1]
413 %tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 2 ; <<8 x i16>> [#uses=1]
414 %tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 3 ; <<8 x i16>> [#uses=1]
415 %tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 4 ; <<8 x i16>> [#uses=1]
416 %tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 5 ; <<8 x i16>> [#uses=1]
417 %tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 6 ; <<8 x i16>> [#uses=1]
418 %tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 7 ; <<8 x i16>> [#uses=1]
419 store <8 x i16> %tmp17, <8 x i16>* %A
423 define void @VMRG_UNARY_tw_h(<4 x i32>* %A, <4 x i32>* %B) {
425 %tmp = load <4 x i32>, <4 x i32>* %A ; <<4 x i32>> [#uses=4]
426 %tmp.upgrd.24 = extractelement <4 x i32> %tmp, i32 0 ; <i32> [#uses=1]
427 %tmp3 = extractelement <4 x i32> %tmp, i32 0 ; <i32> [#uses=1]
428 %tmp4 = extractelement <4 x i32> %tmp, i32 1 ; <i32> [#uses=1]
429 %tmp5 = extractelement <4 x i32> %tmp, i32 1 ; <i32> [#uses=1]
430 %tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.24, i32 0 ; <<4 x i32>> [#uses=1]
431 %tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1]
432 %tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2 ; <<4 x i32>> [#uses=1]
433 %tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3 ; <<4 x i32>> [#uses=1]
434 store <4 x i32> %tmp9, <4 x i32>* %A
438 define void @VPCKUHUM_unary(<8 x i16>* %A, <8 x i16>* %B) {
440 %tmp = load <8 x i16>, <8 x i16>* %A ; <<8 x i16>> [#uses=2]
441 %tmp.upgrd.25 = bitcast <8 x i16> %tmp to <16 x i8> ; <<16 x i8>> [#uses=8]
442 %tmp3 = bitcast <8 x i16> %tmp to <16 x i8> ; <<16 x i8>> [#uses=8]
443 %tmp.upgrd.26 = extractelement <16 x i8> %tmp.upgrd.25, i32 1 ; <i8> [#uses=1]
444 %tmp4 = extractelement <16 x i8> %tmp.upgrd.25, i32 3 ; <i8> [#uses=1]
445 %tmp5 = extractelement <16 x i8> %tmp.upgrd.25, i32 5 ; <i8> [#uses=1]
446 %tmp6 = extractelement <16 x i8> %tmp.upgrd.25, i32 7 ; <i8> [#uses=1]
447 %tmp7 = extractelement <16 x i8> %tmp.upgrd.25, i32 9 ; <i8> [#uses=1]
448 %tmp8 = extractelement <16 x i8> %tmp.upgrd.25, i32 11 ; <i8> [#uses=1]
449 %tmp9 = extractelement <16 x i8> %tmp.upgrd.25, i32 13 ; <i8> [#uses=1]
450 %tmp10 = extractelement <16 x i8> %tmp.upgrd.25, i32 15 ; <i8> [#uses=1]
451 %tmp11 = extractelement <16 x i8> %tmp3, i32 1 ; <i8> [#uses=1]
452 %tmp12 = extractelement <16 x i8> %tmp3, i32 3 ; <i8> [#uses=1]
453 %tmp13 = extractelement <16 x i8> %tmp3, i32 5 ; <i8> [#uses=1]
454 %tmp14 = extractelement <16 x i8> %tmp3, i32 7 ; <i8> [#uses=1]
455 %tmp15 = extractelement <16 x i8> %tmp3, i32 9 ; <i8> [#uses=1]
456 %tmp16 = extractelement <16 x i8> %tmp3, i32 11 ; <i8> [#uses=1]
457 %tmp17 = extractelement <16 x i8> %tmp3, i32 13 ; <i8> [#uses=1]
458 %tmp18 = extractelement <16 x i8> %tmp3, i32 15 ; <i8> [#uses=1]
459 %tmp19 = insertelement <16 x i8> undef, i8 %tmp.upgrd.26, i32 0 ; <<16 x i8>> [#uses=1]
460 %tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 1 ; <<16 x i8>> [#uses=1]
461 %tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 2 ; <<16 x i8>> [#uses=1]
462 %tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 3 ; <<16 x i8>> [#uses=1]
463 %tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 4 ; <<16 x i8>> [#uses=1]
464 %tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 5 ; <<16 x i8>> [#uses=1]
465 %tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 6 ; <<16 x i8>> [#uses=1]
466 %tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 7 ; <<16 x i8>> [#uses=1]
467 %tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 8 ; <<16 x i8>> [#uses=1]
468 %tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 9 ; <<16 x i8>> [#uses=1]
469 %tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 10 ; <<16 x i8>> [#uses=1]
470 %tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 11 ; <<16 x i8>> [#uses=1]
471 %tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 12 ; <<16 x i8>> [#uses=1]
472 %tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 13 ; <<16 x i8>> [#uses=1]
473 %tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 14 ; <<16 x i8>> [#uses=1]
474 %tmp34 = insertelement <16 x i8> %tmp33, i8 %tmp18, i32 15 ; <<16 x i8>> [#uses=1]
475 %tmp34.upgrd.27 = bitcast <16 x i8> %tmp34 to <8 x i16> ; <<8 x i16>> [#uses=1]
476 store <8 x i16> %tmp34.upgrd.27, <8 x i16>* %A
480 define void @VPCKUWUM_unary(<4 x i32>* %A, <4 x i32>* %B) {
482 %tmp = load <4 x i32>, <4 x i32>* %A ; <<4 x i32>> [#uses=2]
483 %tmp.upgrd.28 = bitcast <4 x i32> %tmp to <8 x i16> ; <<8 x i16>> [#uses=4]
484 %tmp3 = bitcast <4 x i32> %tmp to <8 x i16> ; <<8 x i16>> [#uses=4]
485 %tmp.upgrd.29 = extractelement <8 x i16> %tmp.upgrd.28, i32 1 ; <i16> [#uses=1]
486 %tmp4 = extractelement <8 x i16> %tmp.upgrd.28, i32 3 ; <i16> [#uses=1]
487 %tmp5 = extractelement <8 x i16> %tmp.upgrd.28, i32 5 ; <i16> [#uses=1]
488 %tmp6 = extractelement <8 x i16> %tmp.upgrd.28, i32 7 ; <i16> [#uses=1]
489 %tmp7 = extractelement <8 x i16> %tmp3, i32 1 ; <i16> [#uses=1]
490 %tmp8 = extractelement <8 x i16> %tmp3, i32 3 ; <i16> [#uses=1]
491 %tmp9 = extractelement <8 x i16> %tmp3, i32 5 ; <i16> [#uses=1]
492 %tmp10 = extractelement <8 x i16> %tmp3, i32 7 ; <i16> [#uses=1]
493 %tmp11 = insertelement <8 x i16> undef, i16 %tmp.upgrd.29, i32 0 ; <<8 x i16>> [#uses=1]
494 %tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 1 ; <<8 x i16>> [#uses=1]
495 %tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 2 ; <<8 x i16>> [#uses=1]
496 %tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 3 ; <<8 x i16>> [#uses=1]
497 %tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 4 ; <<8 x i16>> [#uses=1]
498 %tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 5 ; <<8 x i16>> [#uses=1]
499 %tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 6 ; <<8 x i16>> [#uses=1]
500 %tmp18 = insertelement <8 x i16> %tmp17, i16 %tmp10, i32 7 ; <<8 x i16>> [#uses=1]
501 %tmp18.upgrd.30 = bitcast <8 x i16> %tmp18 to <4 x i32> ; <<4 x i32>> [#uses=1]
502 store <4 x i32> %tmp18.upgrd.30, <4 x i32>* %A