1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE
3 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE
4 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
5 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
6 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=icelake-server -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512
8 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
10 @src64 = common global [4 x i64] zeroinitializer, align 32
11 @dst64 = common global [4 x i64] zeroinitializer, align 32
12 @src32 = common global [8 x i32] zeroinitializer, align 32
13 @dst32 = common global [8 x i32] zeroinitializer, align 32
14 @src16 = common global [16 x i16] zeroinitializer, align 32
15 @dst16 = common global [16 x i16] zeroinitializer, align 32
16 @src8 = common global [32 x i8] zeroinitializer, align 32
17 @dst8 = common global [32 x i8] zeroinitializer, align 32
19 declare i64 @llvm.cttz.i64(i64, i1)
20 declare i32 @llvm.cttz.i32(i32, i1)
21 declare i16 @llvm.cttz.i16(i16, i1)
22 declare i8 @llvm.cttz.i8(i8, i1)
28 define void @cttz_2i64() #0 {
29 ; SSE-LABEL: @cttz_2i64(
30 ; SSE-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 8
31 ; SSE-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
32 ; SSE-NEXT: [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 false)
33 ; SSE-NEXT: [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 false)
34 ; SSE-NEXT: store i64 [[CTTZ0]], ptr @dst64, align 8
35 ; SSE-NEXT: store i64 [[CTTZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
38 ; AVX1-LABEL: @cttz_2i64(
39 ; AVX1-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 8
40 ; AVX1-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
41 ; AVX1-NEXT: [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 false)
42 ; AVX1-NEXT: [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 false)
43 ; AVX1-NEXT: store i64 [[CTTZ0]], ptr @dst64, align 8
44 ; AVX1-NEXT: store i64 [[CTTZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
47 ; AVX2-LABEL: @cttz_2i64(
48 ; AVX2-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 8
49 ; AVX2-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
50 ; AVX2-NEXT: [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 false)
51 ; AVX2-NEXT: [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 false)
52 ; AVX2-NEXT: store i64 [[CTTZ0]], ptr @dst64, align 8
53 ; AVX2-NEXT: store i64 [[CTTZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
56 ; AVX512-LABEL: @cttz_2i64(
57 ; AVX512-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 8
58 ; AVX512-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> [[TMP1]], i1 false)
59 ; AVX512-NEXT: store <2 x i64> [[TMP2]], ptr @dst64, align 8
60 ; AVX512-NEXT: ret void
62 %ld0 = load i64, ptr @src64, align 8
63 %ld1 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
64 %cttz0 = call i64 @llvm.cttz.i64(i64 %ld0, i1 0)
65 %cttz1 = call i64 @llvm.cttz.i64(i64 %ld1, i1 0)
66 store i64 %cttz0, ptr @dst64, align 8
67 store i64 %cttz1, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
71 define void @cttz_4i64() #0 {
72 ; SSE-LABEL: @cttz_4i64(
73 ; SSE-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 4
74 ; SSE-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4
75 ; SSE-NEXT: [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
76 ; SSE-NEXT: [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4
77 ; SSE-NEXT: [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 false)
78 ; SSE-NEXT: [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 false)
79 ; SSE-NEXT: [[CTTZ2:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD2]], i1 false)
80 ; SSE-NEXT: [[CTTZ3:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD3]], i1 false)
81 ; SSE-NEXT: store i64 [[CTTZ0]], ptr @dst64, align 4
82 ; SSE-NEXT: store i64 [[CTTZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4
83 ; SSE-NEXT: store i64 [[CTTZ2]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
84 ; SSE-NEXT: store i64 [[CTTZ3]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4
87 ; AVX1-LABEL: @cttz_4i64(
88 ; AVX1-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 4
89 ; AVX1-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4
90 ; AVX1-NEXT: [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
91 ; AVX1-NEXT: [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4
92 ; AVX1-NEXT: [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 false)
93 ; AVX1-NEXT: [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 false)
94 ; AVX1-NEXT: [[CTTZ2:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD2]], i1 false)
95 ; AVX1-NEXT: [[CTTZ3:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD3]], i1 false)
96 ; AVX1-NEXT: store i64 [[CTTZ0]], ptr @dst64, align 4
97 ; AVX1-NEXT: store i64 [[CTTZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4
98 ; AVX1-NEXT: store i64 [[CTTZ2]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
99 ; AVX1-NEXT: store i64 [[CTTZ3]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4
100 ; AVX1-NEXT: ret void
102 ; AVX2-LABEL: @cttz_4i64(
103 ; AVX2-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 4
104 ; AVX2-NEXT: [[TMP2:%.*]] = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> [[TMP1]], i1 false)
105 ; AVX2-NEXT: store <4 x i64> [[TMP2]], ptr @dst64, align 4
106 ; AVX2-NEXT: ret void
108 ; AVX512-LABEL: @cttz_4i64(
109 ; AVX512-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 4
110 ; AVX512-NEXT: [[TMP2:%.*]] = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> [[TMP1]], i1 false)
111 ; AVX512-NEXT: store <4 x i64> [[TMP2]], ptr @dst64, align 4
112 ; AVX512-NEXT: ret void
114 %ld0 = load i64, ptr @src64, align 4
115 %ld1 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4
116 %ld2 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
117 %ld3 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4
118 %cttz0 = call i64 @llvm.cttz.i64(i64 %ld0, i1 0)
119 %cttz1 = call i64 @llvm.cttz.i64(i64 %ld1, i1 0)
120 %cttz2 = call i64 @llvm.cttz.i64(i64 %ld2, i1 0)
121 %cttz3 = call i64 @llvm.cttz.i64(i64 %ld3, i1 0)
122 store i64 %cttz0, ptr @dst64, align 4
123 store i64 %cttz1, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4
124 store i64 %cttz2, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
125 store i64 %cttz3, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4
129 define void @cttz_4i32() #0 {
130 ; SSE-LABEL: @cttz_4i32(
131 ; SSE-NEXT: [[LD0:%.*]] = load i32, ptr @src32, align 4
132 ; SSE-NEXT: [[LD1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 4
133 ; SSE-NEXT: [[LD2:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 4
134 ; SSE-NEXT: [[LD3:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 4
135 ; SSE-NEXT: [[CTTZ0:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD0]], i1 false)
136 ; SSE-NEXT: [[CTTZ1:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD1]], i1 false)
137 ; SSE-NEXT: [[CTTZ2:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD2]], i1 false)
138 ; SSE-NEXT: [[CTTZ3:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD3]], i1 false)
139 ; SSE-NEXT: store i32 [[CTTZ0]], ptr @dst32, align 4
140 ; SSE-NEXT: store i32 [[CTTZ1]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 4
141 ; SSE-NEXT: store i32 [[CTTZ2]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 4
142 ; SSE-NEXT: store i32 [[CTTZ3]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 4
145 ; AVX-LABEL: @cttz_4i32(
146 ; AVX-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 4
147 ; AVX-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> [[TMP1]], i1 false)
148 ; AVX-NEXT: store <4 x i32> [[TMP2]], ptr @dst32, align 4
151 %ld0 = load i32, ptr @src32, align 4
152 %ld1 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 4
153 %ld2 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 4
154 %ld3 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 4
155 %cttz0 = call i32 @llvm.cttz.i32(i32 %ld0, i1 0)
156 %cttz1 = call i32 @llvm.cttz.i32(i32 %ld1, i1 0)
157 %cttz2 = call i32 @llvm.cttz.i32(i32 %ld2, i1 0)
158 %cttz3 = call i32 @llvm.cttz.i32(i32 %ld3, i1 0)
159 store i32 %cttz0, ptr @dst32, align 4
160 store i32 %cttz1, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 4
161 store i32 %cttz2, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 4
162 store i32 %cttz3, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 4
166 define void @cttz_8i32() #0 {
167 ; SSE-LABEL: @cttz_8i32(
168 ; SSE-NEXT: [[LD0:%.*]] = load i32, ptr @src32, align 2
169 ; SSE-NEXT: [[LD1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 2
170 ; SSE-NEXT: [[LD2:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 2
171 ; SSE-NEXT: [[LD3:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 2
172 ; SSE-NEXT: [[LD4:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 4), align 2
173 ; SSE-NEXT: [[LD5:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 5), align 2
174 ; SSE-NEXT: [[LD6:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 6), align 2
175 ; SSE-NEXT: [[LD7:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 7), align 2
176 ; SSE-NEXT: [[CTTZ0:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD0]], i1 false)
177 ; SSE-NEXT: [[CTTZ1:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD1]], i1 false)
178 ; SSE-NEXT: [[CTTZ2:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD2]], i1 false)
179 ; SSE-NEXT: [[CTTZ3:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD3]], i1 false)
180 ; SSE-NEXT: [[CTTZ4:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD4]], i1 false)
181 ; SSE-NEXT: [[CTTZ5:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD5]], i1 false)
182 ; SSE-NEXT: [[CTTZ6:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD6]], i1 false)
183 ; SSE-NEXT: [[CTTZ7:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD7]], i1 false)
184 ; SSE-NEXT: store i32 [[CTTZ0]], ptr @dst32, align 2
185 ; SSE-NEXT: store i32 [[CTTZ1]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 2
186 ; SSE-NEXT: store i32 [[CTTZ2]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 2
187 ; SSE-NEXT: store i32 [[CTTZ3]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 2
188 ; SSE-NEXT: store i32 [[CTTZ4]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 4), align 2
189 ; SSE-NEXT: store i32 [[CTTZ5]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 5), align 2
190 ; SSE-NEXT: store i32 [[CTTZ6]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 6), align 2
191 ; SSE-NEXT: store i32 [[CTTZ7]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 7), align 2
194 ; AVX-LABEL: @cttz_8i32(
195 ; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 2
196 ; AVX-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> [[TMP1]], i1 false)
197 ; AVX-NEXT: store <8 x i32> [[TMP2]], ptr @dst32, align 2
200 %ld0 = load i32, ptr @src32, align 2
201 %ld1 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 2
202 %ld2 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 2
203 %ld3 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 2
204 %ld4 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 4), align 2
205 %ld5 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 5), align 2
206 %ld6 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 6), align 2
207 %ld7 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 7), align 2
208 %cttz0 = call i32 @llvm.cttz.i32(i32 %ld0, i1 0)
209 %cttz1 = call i32 @llvm.cttz.i32(i32 %ld1, i1 0)
210 %cttz2 = call i32 @llvm.cttz.i32(i32 %ld2, i1 0)
211 %cttz3 = call i32 @llvm.cttz.i32(i32 %ld3, i1 0)
212 %cttz4 = call i32 @llvm.cttz.i32(i32 %ld4, i1 0)
213 %cttz5 = call i32 @llvm.cttz.i32(i32 %ld5, i1 0)
214 %cttz6 = call i32 @llvm.cttz.i32(i32 %ld6, i1 0)
215 %cttz7 = call i32 @llvm.cttz.i32(i32 %ld7, i1 0)
216 store i32 %cttz0, ptr @dst32, align 2
217 store i32 %cttz1, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 2
218 store i32 %cttz2, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 2
219 store i32 %cttz3, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 2
220 store i32 %cttz4, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 4), align 2
221 store i32 %cttz5, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 5), align 2
222 store i32 %cttz6, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 6), align 2
223 store i32 %cttz7, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 7), align 2
227 define void @cttz_8i16() #0 {
228 ; CHECK-LABEL: @cttz_8i16(
229 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 2
230 ; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> [[TMP1]], i1 false)
231 ; CHECK-NEXT: store <8 x i16> [[TMP2]], ptr @dst16, align 2
232 ; CHECK-NEXT: ret void
234 %ld0 = load i16, ptr @src16, align 2
235 %ld1 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 1), align 2
236 %ld2 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 2), align 2
237 %ld3 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 3), align 2
238 %ld4 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 4), align 2
239 %ld5 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 5), align 2
240 %ld6 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 6), align 2
241 %ld7 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 7), align 2
242 %cttz0 = call i16 @llvm.cttz.i16(i16 %ld0, i1 0)
243 %cttz1 = call i16 @llvm.cttz.i16(i16 %ld1, i1 0)
244 %cttz2 = call i16 @llvm.cttz.i16(i16 %ld2, i1 0)
245 %cttz3 = call i16 @llvm.cttz.i16(i16 %ld3, i1 0)
246 %cttz4 = call i16 @llvm.cttz.i16(i16 %ld4, i1 0)
247 %cttz5 = call i16 @llvm.cttz.i16(i16 %ld5, i1 0)
248 %cttz6 = call i16 @llvm.cttz.i16(i16 %ld6, i1 0)
249 %cttz7 = call i16 @llvm.cttz.i16(i16 %ld7, i1 0)
250 store i16 %cttz0, ptr @dst16, align 2
251 store i16 %cttz1, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 1), align 2
252 store i16 %cttz2, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 2), align 2
253 store i16 %cttz3, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 3), align 2
254 store i16 %cttz4, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 4), align 2
255 store i16 %cttz5, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 5), align 2
256 store i16 %cttz6, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 6), align 2
257 store i16 %cttz7, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 7), align 2
261 define void @cttz_16i16() #0 {
262 ; SSE-LABEL: @cttz_16i16(
263 ; SSE-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 2
264 ; SSE-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> [[TMP1]], i1 false)
265 ; SSE-NEXT: store <8 x i16> [[TMP2]], ptr @dst16, align 2
266 ; SSE-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 8), align 2
267 ; SSE-NEXT: [[TMP4:%.*]] = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> [[TMP3]], i1 false)
268 ; SSE-NEXT: store <8 x i16> [[TMP4]], ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 8), align 2
271 ; AVX-LABEL: @cttz_16i16(
272 ; AVX-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @src16, align 2
273 ; AVX-NEXT: [[TMP2:%.*]] = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> [[TMP1]], i1 false)
274 ; AVX-NEXT: store <16 x i16> [[TMP2]], ptr @dst16, align 2
277 %ld0 = load i16, ptr @src16, align 2
278 %ld1 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 1), align 2
279 %ld2 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 2), align 2
280 %ld3 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 3), align 2
281 %ld4 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 4), align 2
282 %ld5 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 5), align 2
283 %ld6 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 6), align 2
284 %ld7 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 7), align 2
285 %ld8 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 8), align 2
286 %ld9 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 9), align 2
287 %ld10 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 10), align 2
288 %ld11 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 11), align 2
289 %ld12 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 12), align 2
290 %ld13 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 13), align 2
291 %ld14 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 14), align 2
292 %ld15 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 15), align 2
293 %cttz0 = call i16 @llvm.cttz.i16(i16 %ld0, i1 0)
294 %cttz1 = call i16 @llvm.cttz.i16(i16 %ld1, i1 0)
295 %cttz2 = call i16 @llvm.cttz.i16(i16 %ld2, i1 0)
296 %cttz3 = call i16 @llvm.cttz.i16(i16 %ld3, i1 0)
297 %cttz4 = call i16 @llvm.cttz.i16(i16 %ld4, i1 0)
298 %cttz5 = call i16 @llvm.cttz.i16(i16 %ld5, i1 0)
299 %cttz6 = call i16 @llvm.cttz.i16(i16 %ld6, i1 0)
300 %cttz7 = call i16 @llvm.cttz.i16(i16 %ld7, i1 0)
301 %cttz8 = call i16 @llvm.cttz.i16(i16 %ld8, i1 0)
302 %cttz9 = call i16 @llvm.cttz.i16(i16 %ld9, i1 0)
303 %cttz10 = call i16 @llvm.cttz.i16(i16 %ld10, i1 0)
304 %cttz11 = call i16 @llvm.cttz.i16(i16 %ld11, i1 0)
305 %cttz12 = call i16 @llvm.cttz.i16(i16 %ld12, i1 0)
306 %cttz13 = call i16 @llvm.cttz.i16(i16 %ld13, i1 0)
307 %cttz14 = call i16 @llvm.cttz.i16(i16 %ld14, i1 0)
308 %cttz15 = call i16 @llvm.cttz.i16(i16 %ld15, i1 0)
309 store i16 %cttz0 , ptr @dst16, align 2
310 store i16 %cttz1 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 1), align 2
311 store i16 %cttz2 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 2), align 2
312 store i16 %cttz3 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 3), align 2
313 store i16 %cttz4 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 4), align 2
314 store i16 %cttz5 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 5), align 2
315 store i16 %cttz6 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 6), align 2
316 store i16 %cttz7 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 7), align 2
317 store i16 %cttz8 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 8), align 2
318 store i16 %cttz9 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 9), align 2
319 store i16 %cttz10, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 10), align 2
320 store i16 %cttz11, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 11), align 2
321 store i16 %cttz12, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 12), align 2
322 store i16 %cttz13, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 13), align 2
323 store i16 %cttz14, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 14), align 2
324 store i16 %cttz15, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 15), align 2
328 define void @cttz_16i8() #0 {
329 ; CHECK-LABEL: @cttz_16i8(
330 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @src8, align 1
331 ; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> [[TMP1]], i1 false)
332 ; CHECK-NEXT: store <16 x i8> [[TMP2]], ptr @dst8, align 1
333 ; CHECK-NEXT: ret void
335 %ld0 = load i8, ptr @src8, align 1
336 %ld1 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 1), align 1
337 %ld2 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 2), align 1
338 %ld3 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 3), align 1
339 %ld4 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 4), align 1
340 %ld5 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 5), align 1
341 %ld6 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 6), align 1
342 %ld7 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 7), align 1
343 %ld8 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 8), align 1
344 %ld9 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 9), align 1
345 %ld10 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 10), align 1
346 %ld11 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 11), align 1
347 %ld12 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 12), align 1
348 %ld13 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 13), align 1
349 %ld14 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 14), align 1
350 %ld15 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 15), align 1
351 %cttz0 = call i8 @llvm.cttz.i8(i8 %ld0, i1 0)
352 %cttz1 = call i8 @llvm.cttz.i8(i8 %ld1, i1 0)
353 %cttz2 = call i8 @llvm.cttz.i8(i8 %ld2, i1 0)
354 %cttz3 = call i8 @llvm.cttz.i8(i8 %ld3, i1 0)
355 %cttz4 = call i8 @llvm.cttz.i8(i8 %ld4, i1 0)
356 %cttz5 = call i8 @llvm.cttz.i8(i8 %ld5, i1 0)
357 %cttz6 = call i8 @llvm.cttz.i8(i8 %ld6, i1 0)
358 %cttz7 = call i8 @llvm.cttz.i8(i8 %ld7, i1 0)
359 %cttz8 = call i8 @llvm.cttz.i8(i8 %ld8, i1 0)
360 %cttz9 = call i8 @llvm.cttz.i8(i8 %ld9, i1 0)
361 %cttz10 = call i8 @llvm.cttz.i8(i8 %ld10, i1 0)
362 %cttz11 = call i8 @llvm.cttz.i8(i8 %ld11, i1 0)
363 %cttz12 = call i8 @llvm.cttz.i8(i8 %ld12, i1 0)
364 %cttz13 = call i8 @llvm.cttz.i8(i8 %ld13, i1 0)
365 %cttz14 = call i8 @llvm.cttz.i8(i8 %ld14, i1 0)
366 %cttz15 = call i8 @llvm.cttz.i8(i8 %ld15, i1 0)
367 store i8 %cttz0 , ptr @dst8, align 1
368 store i8 %cttz1 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 1), align 1
369 store i8 %cttz2 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 2), align 1
370 store i8 %cttz3 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 3), align 1
371 store i8 %cttz4 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 4), align 1
372 store i8 %cttz5 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 5), align 1
373 store i8 %cttz6 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 6), align 1
374 store i8 %cttz7 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 7), align 1
375 store i8 %cttz8 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 8), align 1
376 store i8 %cttz9 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 9), align 1
377 store i8 %cttz10, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 10), align 1
378 store i8 %cttz11, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 11), align 1
379 store i8 %cttz12, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 12), align 1
380 store i8 %cttz13, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 13), align 1
381 store i8 %cttz14, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 14), align 1
382 store i8 %cttz15, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 15), align 1
386 define void @cttz_32i8() #0 {
387 ; SSE-LABEL: @cttz_32i8(
388 ; SSE-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @src8, align 1
389 ; SSE-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> [[TMP1]], i1 false)
390 ; SSE-NEXT: store <16 x i8> [[TMP2]], ptr @dst8, align 1
391 ; SSE-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 16), align 1
392 ; SSE-NEXT: [[TMP4:%.*]] = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> [[TMP3]], i1 false)
393 ; SSE-NEXT: store <16 x i8> [[TMP4]], ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 16), align 1
396 ; AVX-LABEL: @cttz_32i8(
397 ; AVX-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @src8, align 1
398 ; AVX-NEXT: [[TMP2:%.*]] = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> [[TMP1]], i1 false)
399 ; AVX-NEXT: store <32 x i8> [[TMP2]], ptr @dst8, align 1
402 %ld0 = load i8, ptr @src8, align 1
403 %ld1 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 1), align 1
404 %ld2 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 2), align 1
405 %ld3 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 3), align 1
406 %ld4 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 4), align 1
407 %ld5 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 5), align 1
408 %ld6 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 6), align 1
409 %ld7 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 7), align 1
410 %ld8 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 8), align 1
411 %ld9 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 9), align 1
412 %ld10 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 10), align 1
413 %ld11 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 11), align 1
414 %ld12 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 12), align 1
415 %ld13 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 13), align 1
416 %ld14 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 14), align 1
417 %ld15 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 15), align 1
418 %ld16 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 16), align 1
419 %ld17 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 17), align 1
420 %ld18 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 18), align 1
421 %ld19 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 19), align 1
422 %ld20 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 20), align 1
423 %ld21 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 21), align 1
424 %ld22 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 22), align 1
425 %ld23 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 23), align 1
426 %ld24 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 24), align 1
427 %ld25 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 25), align 1
428 %ld26 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 26), align 1
429 %ld27 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 27), align 1
430 %ld28 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 28), align 1
431 %ld29 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 29), align 1
432 %ld30 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 30), align 1
433 %ld31 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 31), align 1
434 %cttz0 = call i8 @llvm.cttz.i8(i8 %ld0, i1 0)
435 %cttz1 = call i8 @llvm.cttz.i8(i8 %ld1, i1 0)
436 %cttz2 = call i8 @llvm.cttz.i8(i8 %ld2, i1 0)
437 %cttz3 = call i8 @llvm.cttz.i8(i8 %ld3, i1 0)
438 %cttz4 = call i8 @llvm.cttz.i8(i8 %ld4, i1 0)
439 %cttz5 = call i8 @llvm.cttz.i8(i8 %ld5, i1 0)
440 %cttz6 = call i8 @llvm.cttz.i8(i8 %ld6, i1 0)
441 %cttz7 = call i8 @llvm.cttz.i8(i8 %ld7, i1 0)
442 %cttz8 = call i8 @llvm.cttz.i8(i8 %ld8, i1 0)
443 %cttz9 = call i8 @llvm.cttz.i8(i8 %ld9, i1 0)
444 %cttz10 = call i8 @llvm.cttz.i8(i8 %ld10, i1 0)
445 %cttz11 = call i8 @llvm.cttz.i8(i8 %ld11, i1 0)
446 %cttz12 = call i8 @llvm.cttz.i8(i8 %ld12, i1 0)
447 %cttz13 = call i8 @llvm.cttz.i8(i8 %ld13, i1 0)
448 %cttz14 = call i8 @llvm.cttz.i8(i8 %ld14, i1 0)
449 %cttz15 = call i8 @llvm.cttz.i8(i8 %ld15, i1 0)
450 %cttz16 = call i8 @llvm.cttz.i8(i8 %ld16, i1 0)
451 %cttz17 = call i8 @llvm.cttz.i8(i8 %ld17, i1 0)
452 %cttz18 = call i8 @llvm.cttz.i8(i8 %ld18, i1 0)
453 %cttz19 = call i8 @llvm.cttz.i8(i8 %ld19, i1 0)
454 %cttz20 = call i8 @llvm.cttz.i8(i8 %ld20, i1 0)
455 %cttz21 = call i8 @llvm.cttz.i8(i8 %ld21, i1 0)
456 %cttz22 = call i8 @llvm.cttz.i8(i8 %ld22, i1 0)
457 %cttz23 = call i8 @llvm.cttz.i8(i8 %ld23, i1 0)
458 %cttz24 = call i8 @llvm.cttz.i8(i8 %ld24, i1 0)
459 %cttz25 = call i8 @llvm.cttz.i8(i8 %ld25, i1 0)
460 %cttz26 = call i8 @llvm.cttz.i8(i8 %ld26, i1 0)
461 %cttz27 = call i8 @llvm.cttz.i8(i8 %ld27, i1 0)
462 %cttz28 = call i8 @llvm.cttz.i8(i8 %ld28, i1 0)
463 %cttz29 = call i8 @llvm.cttz.i8(i8 %ld29, i1 0)
464 %cttz30 = call i8 @llvm.cttz.i8(i8 %ld30, i1 0)
465 %cttz31 = call i8 @llvm.cttz.i8(i8 %ld31, i1 0)
466 store i8 %cttz0 , ptr @dst8, align 1
467 store i8 %cttz1 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 1), align 1
468 store i8 %cttz2 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 2), align 1
469 store i8 %cttz3 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 3), align 1
470 store i8 %cttz4 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 4), align 1
471 store i8 %cttz5 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 5), align 1
472 store i8 %cttz6 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 6), align 1
473 store i8 %cttz7 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 7), align 1
474 store i8 %cttz8 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 8), align 1
475 store i8 %cttz9 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 9), align 1
476 store i8 %cttz10, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 10), align 1
477 store i8 %cttz11, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 11), align 1
478 store i8 %cttz12, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 12), align 1
479 store i8 %cttz13, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 13), align 1
480 store i8 %cttz14, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 14), align 1
481 store i8 %cttz15, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 15), align 1
482 store i8 %cttz16, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 16), align 1
483 store i8 %cttz17, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 17), align 1
484 store i8 %cttz18, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 18), align 1
485 store i8 %cttz19, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 19), align 1
486 store i8 %cttz20, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 20), align 1
487 store i8 %cttz21, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 21), align 1
488 store i8 %cttz22, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 22), align 1
489 store i8 %cttz23, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 23), align 1
490 store i8 %cttz24, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 24), align 1
491 store i8 %cttz25, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 25), align 1
492 store i8 %cttz26, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 26), align 1
493 store i8 %cttz27, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 27), align 1
494 store i8 %cttz28, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 28), align 1
495 store i8 %cttz29, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 29), align 1
496 store i8 %cttz30, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 30), align 1
497 store i8 %cttz31, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 31), align 1
505 define void @cttz_undef_2i64() #0 {
506 ; SSE-LABEL: @cttz_undef_2i64(
507 ; SSE-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 8
508 ; SSE-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
509 ; SSE-NEXT: [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 true)
510 ; SSE-NEXT: [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 true)
511 ; SSE-NEXT: store i64 [[CTTZ0]], ptr @dst64, align 8
512 ; SSE-NEXT: store i64 [[CTTZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
515 ; AVX1-LABEL: @cttz_undef_2i64(
516 ; AVX1-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 8
517 ; AVX1-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
518 ; AVX1-NEXT: [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 true)
519 ; AVX1-NEXT: [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 true)
520 ; AVX1-NEXT: store i64 [[CTTZ0]], ptr @dst64, align 8
521 ; AVX1-NEXT: store i64 [[CTTZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
522 ; AVX1-NEXT: ret void
524 ; AVX2-LABEL: @cttz_undef_2i64(
525 ; AVX2-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 8
526 ; AVX2-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
527 ; AVX2-NEXT: [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 true)
528 ; AVX2-NEXT: [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 true)
529 ; AVX2-NEXT: store i64 [[CTTZ0]], ptr @dst64, align 8
530 ; AVX2-NEXT: store i64 [[CTTZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
531 ; AVX2-NEXT: ret void
533 ; AVX512-LABEL: @cttz_undef_2i64(
534 ; AVX512-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 8
535 ; AVX512-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> [[TMP1]], i1 true)
536 ; AVX512-NEXT: store <2 x i64> [[TMP2]], ptr @dst64, align 8
537 ; AVX512-NEXT: ret void
539 %ld0 = load i64, ptr @src64, align 8
540 %ld1 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
541 %cttz0 = call i64 @llvm.cttz.i64(i64 %ld0, i1 -1)
542 %cttz1 = call i64 @llvm.cttz.i64(i64 %ld1, i1 -1)
543 store i64 %cttz0, ptr @dst64, align 8
544 store i64 %cttz1, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
548 define void @cttz_undef_4i64() #0 {
549 ; SSE-LABEL: @cttz_undef_4i64(
550 ; SSE-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 4
551 ; SSE-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4
552 ; SSE-NEXT: [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
553 ; SSE-NEXT: [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4
554 ; SSE-NEXT: [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 true)
555 ; SSE-NEXT: [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 true)
556 ; SSE-NEXT: [[CTTZ2:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD2]], i1 true)
557 ; SSE-NEXT: [[CTTZ3:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD3]], i1 true)
558 ; SSE-NEXT: store i64 [[CTTZ0]], ptr @dst64, align 4
559 ; SSE-NEXT: store i64 [[CTTZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4
560 ; SSE-NEXT: store i64 [[CTTZ2]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
561 ; SSE-NEXT: store i64 [[CTTZ3]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4
564 ; AVX1-LABEL: @cttz_undef_4i64(
565 ; AVX1-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 4
566 ; AVX1-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4
567 ; AVX1-NEXT: [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
568 ; AVX1-NEXT: [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4
569 ; AVX1-NEXT: [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 true)
570 ; AVX1-NEXT: [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 true)
571 ; AVX1-NEXT: [[CTTZ2:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD2]], i1 true)
572 ; AVX1-NEXT: [[CTTZ3:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD3]], i1 true)
573 ; AVX1-NEXT: store i64 [[CTTZ0]], ptr @dst64, align 4
574 ; AVX1-NEXT: store i64 [[CTTZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4
575 ; AVX1-NEXT: store i64 [[CTTZ2]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
576 ; AVX1-NEXT: store i64 [[CTTZ3]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4
577 ; AVX1-NEXT: ret void
579 ; AVX2-LABEL: @cttz_undef_4i64(
580 ; AVX2-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 4
581 ; AVX2-NEXT: [[TMP2:%.*]] = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> [[TMP1]], i1 true)
582 ; AVX2-NEXT: store <4 x i64> [[TMP2]], ptr @dst64, align 4
583 ; AVX2-NEXT: ret void
585 ; AVX512-LABEL: @cttz_undef_4i64(
586 ; AVX512-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 4
587 ; AVX512-NEXT: [[TMP2:%.*]] = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> [[TMP1]], i1 true)
588 ; AVX512-NEXT: store <4 x i64> [[TMP2]], ptr @dst64, align 4
589 ; AVX512-NEXT: ret void
591 %ld0 = load i64, ptr @src64, align 4
592 %ld1 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4
593 %ld2 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
594 %ld3 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4
595 %cttz0 = call i64 @llvm.cttz.i64(i64 %ld0, i1 -1)
596 %cttz1 = call i64 @llvm.cttz.i64(i64 %ld1, i1 -1)
597 %cttz2 = call i64 @llvm.cttz.i64(i64 %ld2, i1 -1)
598 %cttz3 = call i64 @llvm.cttz.i64(i64 %ld3, i1 -1)
599 store i64 %cttz0, ptr @dst64, align 4
600 store i64 %cttz1, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4
601 store i64 %cttz2, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
602 store i64 %cttz3, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4
606 define void @cttz_undef_4i32() #0 {
607 ; SSE-LABEL: @cttz_undef_4i32(
608 ; SSE-NEXT: [[LD0:%.*]] = load i32, ptr @src32, align 4
609 ; SSE-NEXT: [[LD1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 4
610 ; SSE-NEXT: [[LD2:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 4
611 ; SSE-NEXT: [[LD3:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 4
612 ; SSE-NEXT: [[CTTZ0:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD0]], i1 true)
613 ; SSE-NEXT: [[CTTZ1:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD1]], i1 true)
614 ; SSE-NEXT: [[CTTZ2:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD2]], i1 true)
615 ; SSE-NEXT: [[CTTZ3:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD3]], i1 true)
616 ; SSE-NEXT: store i32 [[CTTZ0]], ptr @dst32, align 4
617 ; SSE-NEXT: store i32 [[CTTZ1]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 4
618 ; SSE-NEXT: store i32 [[CTTZ2]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 4
619 ; SSE-NEXT: store i32 [[CTTZ3]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 4
622 ; AVX1-LABEL: @cttz_undef_4i32(
623 ; AVX1-NEXT: [[LD0:%.*]] = load i32, ptr @src32, align 4
624 ; AVX1-NEXT: [[LD1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 4
625 ; AVX1-NEXT: [[LD2:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 4
626 ; AVX1-NEXT: [[LD3:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 4
627 ; AVX1-NEXT: [[CTTZ0:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD0]], i1 true)
628 ; AVX1-NEXT: [[CTTZ1:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD1]], i1 true)
629 ; AVX1-NEXT: [[CTTZ2:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD2]], i1 true)
630 ; AVX1-NEXT: [[CTTZ3:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD3]], i1 true)
631 ; AVX1-NEXT: store i32 [[CTTZ0]], ptr @dst32, align 4
632 ; AVX1-NEXT: store i32 [[CTTZ1]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 4
633 ; AVX1-NEXT: store i32 [[CTTZ2]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 4
634 ; AVX1-NEXT: store i32 [[CTTZ3]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 4
635 ; AVX1-NEXT: ret void
637 ; AVX2-LABEL: @cttz_undef_4i32(
638 ; AVX2-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 4
639 ; AVX2-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> [[TMP1]], i1 true)
640 ; AVX2-NEXT: store <4 x i32> [[TMP2]], ptr @dst32, align 4
641 ; AVX2-NEXT: ret void
643 ; AVX512-LABEL: @cttz_undef_4i32(
644 ; AVX512-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 4
645 ; AVX512-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> [[TMP1]], i1 true)
646 ; AVX512-NEXT: store <4 x i32> [[TMP2]], ptr @dst32, align 4
647 ; AVX512-NEXT: ret void
649 %ld0 = load i32, ptr @src32, align 4
650 %ld1 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 4
651 %ld2 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 4
652 %ld3 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 4
653 %cttz0 = call i32 @llvm.cttz.i32(i32 %ld0, i1 -1)
654 %cttz1 = call i32 @llvm.cttz.i32(i32 %ld1, i1 -1)
655 %cttz2 = call i32 @llvm.cttz.i32(i32 %ld2, i1 -1)
656 %cttz3 = call i32 @llvm.cttz.i32(i32 %ld3, i1 -1)
657 store i32 %cttz0, ptr @dst32, align 4
658 store i32 %cttz1, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 4
659 store i32 %cttz2, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 4
660 store i32 %cttz3, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 4
664 define void @cttz_undef_8i32() #0 {
665 ; SSE-LABEL: @cttz_undef_8i32(
666 ; SSE-NEXT: [[LD0:%.*]] = load i32, ptr @src32, align 2
667 ; SSE-NEXT: [[LD1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 2
668 ; SSE-NEXT: [[LD2:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 2
669 ; SSE-NEXT: [[LD3:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 2
670 ; SSE-NEXT: [[LD4:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 4), align 2
671 ; SSE-NEXT: [[LD5:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 5), align 2
672 ; SSE-NEXT: [[LD6:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 6), align 2
673 ; SSE-NEXT: [[LD7:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 7), align 2
674 ; SSE-NEXT: [[CTTZ0:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD0]], i1 true)
675 ; SSE-NEXT: [[CTTZ1:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD1]], i1 true)
676 ; SSE-NEXT: [[CTTZ2:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD2]], i1 true)
677 ; SSE-NEXT: [[CTTZ3:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD3]], i1 true)
678 ; SSE-NEXT: [[CTTZ4:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD4]], i1 true)
679 ; SSE-NEXT: [[CTTZ5:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD5]], i1 true)
680 ; SSE-NEXT: [[CTTZ6:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD6]], i1 true)
681 ; SSE-NEXT: [[CTTZ7:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD7]], i1 true)
682 ; SSE-NEXT: store i32 [[CTTZ0]], ptr @dst32, align 2
683 ; SSE-NEXT: store i32 [[CTTZ1]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 2
684 ; SSE-NEXT: store i32 [[CTTZ2]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 2
685 ; SSE-NEXT: store i32 [[CTTZ3]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 2
686 ; SSE-NEXT: store i32 [[CTTZ4]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 4), align 2
687 ; SSE-NEXT: store i32 [[CTTZ5]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 5), align 2
688 ; SSE-NEXT: store i32 [[CTTZ6]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 6), align 2
689 ; SSE-NEXT: store i32 [[CTTZ7]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 7), align 2
692 ; AVX1-LABEL: @cttz_undef_8i32(
693 ; AVX1-NEXT: [[LD0:%.*]] = load i32, ptr @src32, align 2
694 ; AVX1-NEXT: [[LD1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 2
695 ; AVX1-NEXT: [[LD2:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 2
696 ; AVX1-NEXT: [[LD3:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 2
697 ; AVX1-NEXT: [[LD4:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 4), align 2
698 ; AVX1-NEXT: [[LD5:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 5), align 2
699 ; AVX1-NEXT: [[LD6:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 6), align 2
700 ; AVX1-NEXT: [[LD7:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 7), align 2
701 ; AVX1-NEXT: [[CTTZ0:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD0]], i1 true)
702 ; AVX1-NEXT: [[CTTZ1:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD1]], i1 true)
703 ; AVX1-NEXT: [[CTTZ2:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD2]], i1 true)
704 ; AVX1-NEXT: [[CTTZ3:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD3]], i1 true)
705 ; AVX1-NEXT: [[CTTZ4:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD4]], i1 true)
706 ; AVX1-NEXT: [[CTTZ5:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD5]], i1 true)
707 ; AVX1-NEXT: [[CTTZ6:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD6]], i1 true)
708 ; AVX1-NEXT: [[CTTZ7:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD7]], i1 true)
709 ; AVX1-NEXT: store i32 [[CTTZ0]], ptr @dst32, align 2
710 ; AVX1-NEXT: store i32 [[CTTZ1]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 2
711 ; AVX1-NEXT: store i32 [[CTTZ2]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 2
712 ; AVX1-NEXT: store i32 [[CTTZ3]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 2
713 ; AVX1-NEXT: store i32 [[CTTZ4]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 4), align 2
714 ; AVX1-NEXT: store i32 [[CTTZ5]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 5), align 2
715 ; AVX1-NEXT: store i32 [[CTTZ6]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 6), align 2
716 ; AVX1-NEXT: store i32 [[CTTZ7]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 7), align 2
717 ; AVX1-NEXT: ret void
719 ; AVX2-LABEL: @cttz_undef_8i32(
720 ; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 2
721 ; AVX2-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> [[TMP1]], i1 true)
722 ; AVX2-NEXT: store <8 x i32> [[TMP2]], ptr @dst32, align 2
723 ; AVX2-NEXT: ret void
725 ; AVX512-LABEL: @cttz_undef_8i32(
726 ; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 2
727 ; AVX512-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> [[TMP1]], i1 true)
728 ; AVX512-NEXT: store <8 x i32> [[TMP2]], ptr @dst32, align 2
729 ; AVX512-NEXT: ret void
731 %ld0 = load i32, ptr @src32, align 2
732 %ld1 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 2
733 %ld2 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 2
734 %ld3 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 2
735 %ld4 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 4), align 2
736 %ld5 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 5), align 2
737 %ld6 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 6), align 2
738 %ld7 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 7), align 2
739 %cttz0 = call i32 @llvm.cttz.i32(i32 %ld0, i1 -1)
740 %cttz1 = call i32 @llvm.cttz.i32(i32 %ld1, i1 -1)
741 %cttz2 = call i32 @llvm.cttz.i32(i32 %ld2, i1 -1)
742 %cttz3 = call i32 @llvm.cttz.i32(i32 %ld3, i1 -1)
743 %cttz4 = call i32 @llvm.cttz.i32(i32 %ld4, i1 -1)
744 %cttz5 = call i32 @llvm.cttz.i32(i32 %ld5, i1 -1)
745 %cttz6 = call i32 @llvm.cttz.i32(i32 %ld6, i1 -1)
746 %cttz7 = call i32 @llvm.cttz.i32(i32 %ld7, i1 -1)
747 store i32 %cttz0, ptr @dst32, align 2
748 store i32 %cttz1, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 2
749 store i32 %cttz2, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 2
750 store i32 %cttz3, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 2
751 store i32 %cttz4, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 4), align 2
752 store i32 %cttz5, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 5), align 2
753 store i32 %cttz6, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 6), align 2
754 store i32 %cttz7, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 7), align 2
758 define void @cttz_undef_8i16() #0 {
759 ; CHECK-LABEL: @cttz_undef_8i16(
760 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 2
761 ; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> [[TMP1]], i1 true)
762 ; CHECK-NEXT: store <8 x i16> [[TMP2]], ptr @dst16, align 2
763 ; CHECK-NEXT: ret void
765 %ld0 = load i16, ptr @src16, align 2
766 %ld1 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 1), align 2
767 %ld2 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 2), align 2
768 %ld3 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 3), align 2
769 %ld4 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 4), align 2
770 %ld5 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 5), align 2
771 %ld6 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 6), align 2
772 %ld7 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 7), align 2
773 %cttz0 = call i16 @llvm.cttz.i16(i16 %ld0, i1 -1)
774 %cttz1 = call i16 @llvm.cttz.i16(i16 %ld1, i1 -1)
775 %cttz2 = call i16 @llvm.cttz.i16(i16 %ld2, i1 -1)
776 %cttz3 = call i16 @llvm.cttz.i16(i16 %ld3, i1 -1)
777 %cttz4 = call i16 @llvm.cttz.i16(i16 %ld4, i1 -1)
778 %cttz5 = call i16 @llvm.cttz.i16(i16 %ld5, i1 -1)
779 %cttz6 = call i16 @llvm.cttz.i16(i16 %ld6, i1 -1)
780 %cttz7 = call i16 @llvm.cttz.i16(i16 %ld7, i1 -1)
781 store i16 %cttz0, ptr @dst16, align 2
782 store i16 %cttz1, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 1), align 2
783 store i16 %cttz2, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 2), align 2
784 store i16 %cttz3, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 3), align 2
785 store i16 %cttz4, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 4), align 2
786 store i16 %cttz5, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 5), align 2
787 store i16 %cttz6, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 6), align 2
788 store i16 %cttz7, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 7), align 2
792 define void @cttz_undef_16i16() #0 {
793 ; SSE-LABEL: @cttz_undef_16i16(
794 ; SSE-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 2
795 ; SSE-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> [[TMP1]], i1 true)
796 ; SSE-NEXT: store <8 x i16> [[TMP2]], ptr @dst16, align 2
797 ; SSE-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 8), align 2
798 ; SSE-NEXT: [[TMP4:%.*]] = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> [[TMP3]], i1 true)
799 ; SSE-NEXT: store <8 x i16> [[TMP4]], ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 8), align 2
802 ; AVX-LABEL: @cttz_undef_16i16(
803 ; AVX-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @src16, align 2
804 ; AVX-NEXT: [[TMP2:%.*]] = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> [[TMP1]], i1 true)
805 ; AVX-NEXT: store <16 x i16> [[TMP2]], ptr @dst16, align 2
808 %ld0 = load i16, ptr @src16, align 2
809 %ld1 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 1), align 2
810 %ld2 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 2), align 2
811 %ld3 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 3), align 2
812 %ld4 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 4), align 2
813 %ld5 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 5), align 2
814 %ld6 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 6), align 2
815 %ld7 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 7), align 2
816 %ld8 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 8), align 2
817 %ld9 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 9), align 2
818 %ld10 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 10), align 2
819 %ld11 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 11), align 2
820 %ld12 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 12), align 2
821 %ld13 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 13), align 2
822 %ld14 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 14), align 2
823 %ld15 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 15), align 2
824 %cttz0 = call i16 @llvm.cttz.i16(i16 %ld0, i1 -1)
825 %cttz1 = call i16 @llvm.cttz.i16(i16 %ld1, i1 -1)
826 %cttz2 = call i16 @llvm.cttz.i16(i16 %ld2, i1 -1)
827 %cttz3 = call i16 @llvm.cttz.i16(i16 %ld3, i1 -1)
828 %cttz4 = call i16 @llvm.cttz.i16(i16 %ld4, i1 -1)
829 %cttz5 = call i16 @llvm.cttz.i16(i16 %ld5, i1 -1)
830 %cttz6 = call i16 @llvm.cttz.i16(i16 %ld6, i1 -1)
831 %cttz7 = call i16 @llvm.cttz.i16(i16 %ld7, i1 -1)
832 %cttz8 = call i16 @llvm.cttz.i16(i16 %ld8, i1 -1)
833 %cttz9 = call i16 @llvm.cttz.i16(i16 %ld9, i1 -1)
834 %cttz10 = call i16 @llvm.cttz.i16(i16 %ld10, i1 -1)
835 %cttz11 = call i16 @llvm.cttz.i16(i16 %ld11, i1 -1)
836 %cttz12 = call i16 @llvm.cttz.i16(i16 %ld12, i1 -1)
837 %cttz13 = call i16 @llvm.cttz.i16(i16 %ld13, i1 -1)
838 %cttz14 = call i16 @llvm.cttz.i16(i16 %ld14, i1 -1)
839 %cttz15 = call i16 @llvm.cttz.i16(i16 %ld15, i1 -1)
840 store i16 %cttz0 , ptr @dst16, align 2
841 store i16 %cttz1 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 1), align 2
842 store i16 %cttz2 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 2), align 2
843 store i16 %cttz3 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 3), align 2
844 store i16 %cttz4 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 4), align 2
845 store i16 %cttz5 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 5), align 2
846 store i16 %cttz6 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 6), align 2
847 store i16 %cttz7 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 7), align 2
848 store i16 %cttz8 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 8), align 2
849 store i16 %cttz9 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 9), align 2
850 store i16 %cttz10, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 10), align 2
851 store i16 %cttz11, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 11), align 2
852 store i16 %cttz12, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 12), align 2
853 store i16 %cttz13, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 13), align 2
854 store i16 %cttz14, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 14), align 2
855 store i16 %cttz15, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 15), align 2
859 define void @cttz_undef_16i8() #0 {
860 ; CHECK-LABEL: @cttz_undef_16i8(
861 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @src8, align 1
862 ; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> [[TMP1]], i1 true)
863 ; CHECK-NEXT: store <16 x i8> [[TMP2]], ptr @dst8, align 1
864 ; CHECK-NEXT: ret void
866 %ld0 = load i8, ptr @src8, align 1
867 %ld1 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 1), align 1
868 %ld2 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 2), align 1
869 %ld3 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 3), align 1
870 %ld4 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 4), align 1
871 %ld5 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 5), align 1
872 %ld6 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 6), align 1
873 %ld7 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 7), align 1
874 %ld8 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 8), align 1
875 %ld9 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 9), align 1
876 %ld10 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 10), align 1
877 %ld11 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 11), align 1
878 %ld12 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 12), align 1
879 %ld13 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 13), align 1
880 %ld14 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 14), align 1
881 %ld15 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 15), align 1
882 %cttz0 = call i8 @llvm.cttz.i8(i8 %ld0, i1 -1)
883 %cttz1 = call i8 @llvm.cttz.i8(i8 %ld1, i1 -1)
884 %cttz2 = call i8 @llvm.cttz.i8(i8 %ld2, i1 -1)
885 %cttz3 = call i8 @llvm.cttz.i8(i8 %ld3, i1 -1)
886 %cttz4 = call i8 @llvm.cttz.i8(i8 %ld4, i1 -1)
887 %cttz5 = call i8 @llvm.cttz.i8(i8 %ld5, i1 -1)
888 %cttz6 = call i8 @llvm.cttz.i8(i8 %ld6, i1 -1)
889 %cttz7 = call i8 @llvm.cttz.i8(i8 %ld7, i1 -1)
890 %cttz8 = call i8 @llvm.cttz.i8(i8 %ld8, i1 -1)
891 %cttz9 = call i8 @llvm.cttz.i8(i8 %ld9, i1 -1)
892 %cttz10 = call i8 @llvm.cttz.i8(i8 %ld10, i1 -1)
893 %cttz11 = call i8 @llvm.cttz.i8(i8 %ld11, i1 -1)
894 %cttz12 = call i8 @llvm.cttz.i8(i8 %ld12, i1 -1)
895 %cttz13 = call i8 @llvm.cttz.i8(i8 %ld13, i1 -1)
896 %cttz14 = call i8 @llvm.cttz.i8(i8 %ld14, i1 -1)
897 %cttz15 = call i8 @llvm.cttz.i8(i8 %ld15, i1 -1)
898 store i8 %cttz0 , ptr @dst8, align 1
899 store i8 %cttz1 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 1), align 1
900 store i8 %cttz2 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 2), align 1
901 store i8 %cttz3 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 3), align 1
902 store i8 %cttz4 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 4), align 1
903 store i8 %cttz5 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 5), align 1
904 store i8 %cttz6 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 6), align 1
905 store i8 %cttz7 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 7), align 1
906 store i8 %cttz8 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 8), align 1
907 store i8 %cttz9 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 9), align 1
908 store i8 %cttz10, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 10), align 1
909 store i8 %cttz11, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 11), align 1
910 store i8 %cttz12, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 12), align 1
911 store i8 %cttz13, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 13), align 1
912 store i8 %cttz14, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 14), align 1
913 store i8 %cttz15, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 15), align 1
917 define void @cttz_undef_32i8() #0 {
918 ; SSE-LABEL: @cttz_undef_32i8(
919 ; SSE-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @src8, align 1
920 ; SSE-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> [[TMP1]], i1 true)
921 ; SSE-NEXT: store <16 x i8> [[TMP2]], ptr @dst8, align 1
922 ; SSE-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 16), align 1
923 ; SSE-NEXT: [[TMP4:%.*]] = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> [[TMP3]], i1 true)
924 ; SSE-NEXT: store <16 x i8> [[TMP4]], ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 16), align 1
927 ; AVX-LABEL: @cttz_undef_32i8(
928 ; AVX-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @src8, align 1
929 ; AVX-NEXT: [[TMP2:%.*]] = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> [[TMP1]], i1 true)
930 ; AVX-NEXT: store <32 x i8> [[TMP2]], ptr @dst8, align 1
933 %ld0 = load i8, ptr @src8, align 1
934 %ld1 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 1), align 1
935 %ld2 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 2), align 1
936 %ld3 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 3), align 1
937 %ld4 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 4), align 1
938 %ld5 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 5), align 1
939 %ld6 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 6), align 1
940 %ld7 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 7), align 1
941 %ld8 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 8), align 1
942 %ld9 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 9), align 1
943 %ld10 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 10), align 1
944 %ld11 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 11), align 1
945 %ld12 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 12), align 1
946 %ld13 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 13), align 1
947 %ld14 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 14), align 1
948 %ld15 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 15), align 1
949 %ld16 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 16), align 1
950 %ld17 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 17), align 1
951 %ld18 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 18), align 1
952 %ld19 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 19), align 1
953 %ld20 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 20), align 1
954 %ld21 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 21), align 1
955 %ld22 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 22), align 1
956 %ld23 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 23), align 1
957 %ld24 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 24), align 1
958 %ld25 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 25), align 1
959 %ld26 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 26), align 1
960 %ld27 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 27), align 1
961 %ld28 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 28), align 1
962 %ld29 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 29), align 1
963 %ld30 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 30), align 1
964 %ld31 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 31), align 1
965 %cttz0 = call i8 @llvm.cttz.i8(i8 %ld0, i1 -1)
966 %cttz1 = call i8 @llvm.cttz.i8(i8 %ld1, i1 -1)
967 %cttz2 = call i8 @llvm.cttz.i8(i8 %ld2, i1 -1)
968 %cttz3 = call i8 @llvm.cttz.i8(i8 %ld3, i1 -1)
969 %cttz4 = call i8 @llvm.cttz.i8(i8 %ld4, i1 -1)
970 %cttz5 = call i8 @llvm.cttz.i8(i8 %ld5, i1 -1)
971 %cttz6 = call i8 @llvm.cttz.i8(i8 %ld6, i1 -1)
972 %cttz7 = call i8 @llvm.cttz.i8(i8 %ld7, i1 -1)
973 %cttz8 = call i8 @llvm.cttz.i8(i8 %ld8, i1 -1)
974 %cttz9 = call i8 @llvm.cttz.i8(i8 %ld9, i1 -1)
975 %cttz10 = call i8 @llvm.cttz.i8(i8 %ld10, i1 -1)
976 %cttz11 = call i8 @llvm.cttz.i8(i8 %ld11, i1 -1)
977 %cttz12 = call i8 @llvm.cttz.i8(i8 %ld12, i1 -1)
978 %cttz13 = call i8 @llvm.cttz.i8(i8 %ld13, i1 -1)
979 %cttz14 = call i8 @llvm.cttz.i8(i8 %ld14, i1 -1)
980 %cttz15 = call i8 @llvm.cttz.i8(i8 %ld15, i1 -1)
981 %cttz16 = call i8 @llvm.cttz.i8(i8 %ld16, i1 -1)
982 %cttz17 = call i8 @llvm.cttz.i8(i8 %ld17, i1 -1)
983 %cttz18 = call i8 @llvm.cttz.i8(i8 %ld18, i1 -1)
984 %cttz19 = call i8 @llvm.cttz.i8(i8 %ld19, i1 -1)
985 %cttz20 = call i8 @llvm.cttz.i8(i8 %ld20, i1 -1)
986 %cttz21 = call i8 @llvm.cttz.i8(i8 %ld21, i1 -1)
987 %cttz22 = call i8 @llvm.cttz.i8(i8 %ld22, i1 -1)
988 %cttz23 = call i8 @llvm.cttz.i8(i8 %ld23, i1 -1)
989 %cttz24 = call i8 @llvm.cttz.i8(i8 %ld24, i1 -1)
990 %cttz25 = call i8 @llvm.cttz.i8(i8 %ld25, i1 -1)
991 %cttz26 = call i8 @llvm.cttz.i8(i8 %ld26, i1 -1)
992 %cttz27 = call i8 @llvm.cttz.i8(i8 %ld27, i1 -1)
993 %cttz28 = call i8 @llvm.cttz.i8(i8 %ld28, i1 -1)
994 %cttz29 = call i8 @llvm.cttz.i8(i8 %ld29, i1 -1)
995 %cttz30 = call i8 @llvm.cttz.i8(i8 %ld30, i1 -1)
996 %cttz31 = call i8 @llvm.cttz.i8(i8 %ld31, i1 -1)
997 store i8 %cttz0 , ptr @dst8, align 1
998 store i8 %cttz1 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 1), align 1
999 store i8 %cttz2 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 2), align 1
1000 store i8 %cttz3 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 3), align 1
1001 store i8 %cttz4 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 4), align 1
1002 store i8 %cttz5 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 5), align 1
1003 store i8 %cttz6 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 6), align 1
1004 store i8 %cttz7 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 7), align 1
1005 store i8 %cttz8 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 8), align 1
1006 store i8 %cttz9 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 9), align 1
1007 store i8 %cttz10, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 10), align 1
1008 store i8 %cttz11, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 11), align 1
1009 store i8 %cttz12, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 12), align 1
1010 store i8 %cttz13, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 13), align 1
1011 store i8 %cttz14, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 14), align 1
1012 store i8 %cttz15, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 15), align 1
1013 store i8 %cttz16, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 16), align 1
1014 store i8 %cttz17, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 17), align 1
1015 store i8 %cttz18, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 18), align 1
1016 store i8 %cttz19, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 19), align 1
1017 store i8 %cttz20, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 20), align 1
1018 store i8 %cttz21, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 21), align 1
1019 store i8 %cttz22, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 22), align 1
1020 store i8 %cttz23, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 23), align 1
1021 store i8 %cttz24, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 24), align 1
1022 store i8 %cttz25, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 25), align 1
1023 store i8 %cttz26, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 26), align 1
1024 store i8 %cttz27, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 27), align 1
1025 store i8 %cttz28, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 28), align 1
1026 store i8 %cttz29, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 29), align 1
1027 store i8 %cttz30, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 30), align 1
1028 store i8 %cttz31, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 31), align 1
1032 attributes #0 = { nounwind }