1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE
3 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE
4 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
5 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
6 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=icelake-server -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512
8 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
10 @src64 = common global [4 x i64] zeroinitializer, align 32
11 @dst64 = common global [4 x i64] zeroinitializer, align 32
12 @src32 = common global [8 x i32] zeroinitializer, align 32
13 @dst32 = common global [8 x i32] zeroinitializer, align 32
14 @src16 = common global [16 x i16] zeroinitializer, align 32
15 @dst16 = common global [16 x i16] zeroinitializer, align 32
16 @src8 = common global [32 x i8] zeroinitializer, align 32
17 @dst8 = common global [32 x i8] zeroinitializer, align 32
19 declare i64 @llvm.ctlz.i64(i64, i1)
20 declare i32 @llvm.ctlz.i32(i32, i1)
21 declare i16 @llvm.ctlz.i16(i16, i1)
22 declare i8 @llvm.ctlz.i8(i8, i1)
28 define void @ctlz_2i64() #0 {
29 ; SSE-LABEL: @ctlz_2i64(
30 ; SSE-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 8
31 ; SSE-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
32 ; SSE-NEXT: [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 false)
33 ; SSE-NEXT: [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 false)
34 ; SSE-NEXT: store i64 [[CTLZ0]], ptr @dst64, align 8
35 ; SSE-NEXT: store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
38 ; AVX1-LABEL: @ctlz_2i64(
39 ; AVX1-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 8
40 ; AVX1-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
41 ; AVX1-NEXT: [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 false)
42 ; AVX1-NEXT: [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 false)
43 ; AVX1-NEXT: store i64 [[CTLZ0]], ptr @dst64, align 8
44 ; AVX1-NEXT: store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
47 ; AVX2-LABEL: @ctlz_2i64(
48 ; AVX2-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 8
49 ; AVX2-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
50 ; AVX2-NEXT: [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 false)
51 ; AVX2-NEXT: [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 false)
52 ; AVX2-NEXT: store i64 [[CTLZ0]], ptr @dst64, align 8
53 ; AVX2-NEXT: store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
56 ; AVX512-LABEL: @ctlz_2i64(
57 ; AVX512-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 8
58 ; AVX512-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> [[TMP1]], i1 false)
59 ; AVX512-NEXT: store <2 x i64> [[TMP2]], ptr @dst64, align 8
60 ; AVX512-NEXT: ret void
62 %ld0 = load i64, ptr @src64, align 8
63 %ld1 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
64 %ctlz0 = call i64 @llvm.ctlz.i64(i64 %ld0, i1 0)
65 %ctlz1 = call i64 @llvm.ctlz.i64(i64 %ld1, i1 0)
66 store i64 %ctlz0, ptr @dst64, align 8
67 store i64 %ctlz1, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
71 define void @ctlz_4i64() #0 {
72 ; SSE-LABEL: @ctlz_4i64(
73 ; SSE-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 4
74 ; SSE-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4
75 ; SSE-NEXT: [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
76 ; SSE-NEXT: [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4
77 ; SSE-NEXT: [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 false)
78 ; SSE-NEXT: [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 false)
79 ; SSE-NEXT: [[CTLZ2:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD2]], i1 false)
80 ; SSE-NEXT: [[CTLZ3:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD3]], i1 false)
81 ; SSE-NEXT: store i64 [[CTLZ0]], ptr @dst64, align 4
82 ; SSE-NEXT: store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4
83 ; SSE-NEXT: store i64 [[CTLZ2]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
84 ; SSE-NEXT: store i64 [[CTLZ3]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4
87 ; AVX1-LABEL: @ctlz_4i64(
88 ; AVX1-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 4
89 ; AVX1-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4
90 ; AVX1-NEXT: [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
91 ; AVX1-NEXT: [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4
92 ; AVX1-NEXT: [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 false)
93 ; AVX1-NEXT: [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 false)
94 ; AVX1-NEXT: [[CTLZ2:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD2]], i1 false)
95 ; AVX1-NEXT: [[CTLZ3:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD3]], i1 false)
96 ; AVX1-NEXT: store i64 [[CTLZ0]], ptr @dst64, align 4
97 ; AVX1-NEXT: store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4
98 ; AVX1-NEXT: store i64 [[CTLZ2]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
99 ; AVX1-NEXT: store i64 [[CTLZ3]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4
100 ; AVX1-NEXT: ret void
102 ; AVX2-LABEL: @ctlz_4i64(
103 ; AVX2-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 4
104 ; AVX2-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4
105 ; AVX2-NEXT: [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
106 ; AVX2-NEXT: [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4
107 ; AVX2-NEXT: [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 false)
108 ; AVX2-NEXT: [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 false)
109 ; AVX2-NEXT: [[CTLZ2:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD2]], i1 false)
110 ; AVX2-NEXT: [[CTLZ3:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD3]], i1 false)
111 ; AVX2-NEXT: store i64 [[CTLZ0]], ptr @dst64, align 4
112 ; AVX2-NEXT: store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4
113 ; AVX2-NEXT: store i64 [[CTLZ2]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
114 ; AVX2-NEXT: store i64 [[CTLZ3]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4
115 ; AVX2-NEXT: ret void
117 ; AVX512-LABEL: @ctlz_4i64(
118 ; AVX512-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 4
119 ; AVX512-NEXT: [[TMP2:%.*]] = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> [[TMP1]], i1 false)
120 ; AVX512-NEXT: store <4 x i64> [[TMP2]], ptr @dst64, align 4
121 ; AVX512-NEXT: ret void
123 %ld0 = load i64, ptr @src64, align 4
124 %ld1 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4
125 %ld2 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
126 %ld3 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4
127 %ctlz0 = call i64 @llvm.ctlz.i64(i64 %ld0, i1 0)
128 %ctlz1 = call i64 @llvm.ctlz.i64(i64 %ld1, i1 0)
129 %ctlz2 = call i64 @llvm.ctlz.i64(i64 %ld2, i1 0)
130 %ctlz3 = call i64 @llvm.ctlz.i64(i64 %ld3, i1 0)
131 store i64 %ctlz0, ptr @dst64, align 4
132 store i64 %ctlz1, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4
133 store i64 %ctlz2, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
134 store i64 %ctlz3, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4
138 define void @ctlz_4i32() #0 {
139 ; CHECK-LABEL: @ctlz_4i32(
140 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 4
141 ; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[TMP1]], i1 false)
142 ; CHECK-NEXT: store <4 x i32> [[TMP2]], ptr @dst32, align 4
143 ; CHECK-NEXT: ret void
145 %ld0 = load i32, ptr @src32, align 4
146 %ld1 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 4
147 %ld2 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 4
148 %ld3 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 4
149 %ctlz0 = call i32 @llvm.ctlz.i32(i32 %ld0, i1 0)
150 %ctlz1 = call i32 @llvm.ctlz.i32(i32 %ld1, i1 0)
151 %ctlz2 = call i32 @llvm.ctlz.i32(i32 %ld2, i1 0)
152 %ctlz3 = call i32 @llvm.ctlz.i32(i32 %ld3, i1 0)
153 store i32 %ctlz0, ptr @dst32, align 4
154 store i32 %ctlz1, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 4
155 store i32 %ctlz2, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 4
156 store i32 %ctlz3, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 4
160 define void @ctlz_8i32() #0 {
161 ; SSE-LABEL: @ctlz_8i32(
162 ; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 2
163 ; SSE-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[TMP1]], i1 false)
164 ; SSE-NEXT: store <4 x i32> [[TMP2]], ptr @dst32, align 2
165 ; SSE-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 4), align 2
166 ; SSE-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[TMP3]], i1 false)
167 ; SSE-NEXT: store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 4), align 2
170 ; AVX-LABEL: @ctlz_8i32(
171 ; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 2
172 ; AVX-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> [[TMP1]], i1 false)
173 ; AVX-NEXT: store <8 x i32> [[TMP2]], ptr @dst32, align 2
176 %ld0 = load i32, ptr @src32, align 2
177 %ld1 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 2
178 %ld2 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 2
179 %ld3 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 2
180 %ld4 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 4), align 2
181 %ld5 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 5), align 2
182 %ld6 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 6), align 2
183 %ld7 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 7), align 2
184 %ctlz0 = call i32 @llvm.ctlz.i32(i32 %ld0, i1 0)
185 %ctlz1 = call i32 @llvm.ctlz.i32(i32 %ld1, i1 0)
186 %ctlz2 = call i32 @llvm.ctlz.i32(i32 %ld2, i1 0)
187 %ctlz3 = call i32 @llvm.ctlz.i32(i32 %ld3, i1 0)
188 %ctlz4 = call i32 @llvm.ctlz.i32(i32 %ld4, i1 0)
189 %ctlz5 = call i32 @llvm.ctlz.i32(i32 %ld5, i1 0)
190 %ctlz6 = call i32 @llvm.ctlz.i32(i32 %ld6, i1 0)
191 %ctlz7 = call i32 @llvm.ctlz.i32(i32 %ld7, i1 0)
192 store i32 %ctlz0, ptr @dst32, align 2
193 store i32 %ctlz1, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 2
194 store i32 %ctlz2, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 2
195 store i32 %ctlz3, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 2
196 store i32 %ctlz4, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 4), align 2
197 store i32 %ctlz5, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 5), align 2
198 store i32 %ctlz6, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 6), align 2
199 store i32 %ctlz7, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 7), align 2
203 define void @ctlz_8i16() #0 {
204 ; CHECK-LABEL: @ctlz_8i16(
205 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 2
206 ; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[TMP1]], i1 false)
207 ; CHECK-NEXT: store <8 x i16> [[TMP2]], ptr @dst16, align 2
208 ; CHECK-NEXT: ret void
210 %ld0 = load i16, ptr @src16, align 2
211 %ld1 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 1), align 2
212 %ld2 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 2), align 2
213 %ld3 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 3), align 2
214 %ld4 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 4), align 2
215 %ld5 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 5), align 2
216 %ld6 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 6), align 2
217 %ld7 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 7), align 2
218 %ctlz0 = call i16 @llvm.ctlz.i16(i16 %ld0, i1 0)
219 %ctlz1 = call i16 @llvm.ctlz.i16(i16 %ld1, i1 0)
220 %ctlz2 = call i16 @llvm.ctlz.i16(i16 %ld2, i1 0)
221 %ctlz3 = call i16 @llvm.ctlz.i16(i16 %ld3, i1 0)
222 %ctlz4 = call i16 @llvm.ctlz.i16(i16 %ld4, i1 0)
223 %ctlz5 = call i16 @llvm.ctlz.i16(i16 %ld5, i1 0)
224 %ctlz6 = call i16 @llvm.ctlz.i16(i16 %ld6, i1 0)
225 %ctlz7 = call i16 @llvm.ctlz.i16(i16 %ld7, i1 0)
226 store i16 %ctlz0, ptr @dst16, align 2
227 store i16 %ctlz1, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 1), align 2
228 store i16 %ctlz2, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 2), align 2
229 store i16 %ctlz3, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 3), align 2
230 store i16 %ctlz4, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 4), align 2
231 store i16 %ctlz5, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 5), align 2
232 store i16 %ctlz6, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 6), align 2
233 store i16 %ctlz7, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 7), align 2
237 define void @ctlz_16i16() #0 {
238 ; SSE-LABEL: @ctlz_16i16(
239 ; SSE-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 2
240 ; SSE-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[TMP1]], i1 false)
241 ; SSE-NEXT: store <8 x i16> [[TMP2]], ptr @dst16, align 2
242 ; SSE-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 8), align 2
243 ; SSE-NEXT: [[TMP4:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[TMP3]], i1 false)
244 ; SSE-NEXT: store <8 x i16> [[TMP4]], ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 8), align 2
247 ; AVX-LABEL: @ctlz_16i16(
248 ; AVX-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @src16, align 2
249 ; AVX-NEXT: [[TMP2:%.*]] = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> [[TMP1]], i1 false)
250 ; AVX-NEXT: store <16 x i16> [[TMP2]], ptr @dst16, align 2
253 %ld0 = load i16, ptr @src16, align 2
254 %ld1 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 1), align 2
255 %ld2 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 2), align 2
256 %ld3 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 3), align 2
257 %ld4 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 4), align 2
258 %ld5 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 5), align 2
259 %ld6 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 6), align 2
260 %ld7 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 7), align 2
261 %ld8 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 8), align 2
262 %ld9 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 9), align 2
263 %ld10 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 10), align 2
264 %ld11 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 11), align 2
265 %ld12 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 12), align 2
266 %ld13 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 13), align 2
267 %ld14 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 14), align 2
268 %ld15 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 15), align 2
269 %ctlz0 = call i16 @llvm.ctlz.i16(i16 %ld0, i1 0)
270 %ctlz1 = call i16 @llvm.ctlz.i16(i16 %ld1, i1 0)
271 %ctlz2 = call i16 @llvm.ctlz.i16(i16 %ld2, i1 0)
272 %ctlz3 = call i16 @llvm.ctlz.i16(i16 %ld3, i1 0)
273 %ctlz4 = call i16 @llvm.ctlz.i16(i16 %ld4, i1 0)
274 %ctlz5 = call i16 @llvm.ctlz.i16(i16 %ld5, i1 0)
275 %ctlz6 = call i16 @llvm.ctlz.i16(i16 %ld6, i1 0)
276 %ctlz7 = call i16 @llvm.ctlz.i16(i16 %ld7, i1 0)
277 %ctlz8 = call i16 @llvm.ctlz.i16(i16 %ld8, i1 0)
278 %ctlz9 = call i16 @llvm.ctlz.i16(i16 %ld9, i1 0)
279 %ctlz10 = call i16 @llvm.ctlz.i16(i16 %ld10, i1 0)
280 %ctlz11 = call i16 @llvm.ctlz.i16(i16 %ld11, i1 0)
281 %ctlz12 = call i16 @llvm.ctlz.i16(i16 %ld12, i1 0)
282 %ctlz13 = call i16 @llvm.ctlz.i16(i16 %ld13, i1 0)
283 %ctlz14 = call i16 @llvm.ctlz.i16(i16 %ld14, i1 0)
284 %ctlz15 = call i16 @llvm.ctlz.i16(i16 %ld15, i1 0)
285 store i16 %ctlz0 , ptr @dst16, align 2
286 store i16 %ctlz1 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 1), align 2
287 store i16 %ctlz2 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 2), align 2
288 store i16 %ctlz3 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 3), align 2
289 store i16 %ctlz4 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 4), align 2
290 store i16 %ctlz5 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 5), align 2
291 store i16 %ctlz6 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 6), align 2
292 store i16 %ctlz7 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 7), align 2
293 store i16 %ctlz8 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 8), align 2
294 store i16 %ctlz9 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 9), align 2
295 store i16 %ctlz10, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 10), align 2
296 store i16 %ctlz11, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 11), align 2
297 store i16 %ctlz12, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 12), align 2
298 store i16 %ctlz13, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 13), align 2
299 store i16 %ctlz14, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 14), align 2
300 store i16 %ctlz15, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 15), align 2
304 define void @ctlz_16i8() #0 {
305 ; CHECK-LABEL: @ctlz_16i8(
306 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @src8, align 1
307 ; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> [[TMP1]], i1 false)
308 ; CHECK-NEXT: store <16 x i8> [[TMP2]], ptr @dst8, align 1
309 ; CHECK-NEXT: ret void
311 %ld0 = load i8, ptr @src8, align 1
312 %ld1 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 1), align 1
313 %ld2 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 2), align 1
314 %ld3 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 3), align 1
315 %ld4 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 4), align 1
316 %ld5 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 5), align 1
317 %ld6 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 6), align 1
318 %ld7 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 7), align 1
319 %ld8 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 8), align 1
320 %ld9 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 9), align 1
321 %ld10 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 10), align 1
322 %ld11 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 11), align 1
323 %ld12 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 12), align 1
324 %ld13 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 13), align 1
325 %ld14 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 14), align 1
326 %ld15 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 15), align 1
327 %ctlz0 = call i8 @llvm.ctlz.i8(i8 %ld0, i1 0)
328 %ctlz1 = call i8 @llvm.ctlz.i8(i8 %ld1, i1 0)
329 %ctlz2 = call i8 @llvm.ctlz.i8(i8 %ld2, i1 0)
330 %ctlz3 = call i8 @llvm.ctlz.i8(i8 %ld3, i1 0)
331 %ctlz4 = call i8 @llvm.ctlz.i8(i8 %ld4, i1 0)
332 %ctlz5 = call i8 @llvm.ctlz.i8(i8 %ld5, i1 0)
333 %ctlz6 = call i8 @llvm.ctlz.i8(i8 %ld6, i1 0)
334 %ctlz7 = call i8 @llvm.ctlz.i8(i8 %ld7, i1 0)
335 %ctlz8 = call i8 @llvm.ctlz.i8(i8 %ld8, i1 0)
336 %ctlz9 = call i8 @llvm.ctlz.i8(i8 %ld9, i1 0)
337 %ctlz10 = call i8 @llvm.ctlz.i8(i8 %ld10, i1 0)
338 %ctlz11 = call i8 @llvm.ctlz.i8(i8 %ld11, i1 0)
339 %ctlz12 = call i8 @llvm.ctlz.i8(i8 %ld12, i1 0)
340 %ctlz13 = call i8 @llvm.ctlz.i8(i8 %ld13, i1 0)
341 %ctlz14 = call i8 @llvm.ctlz.i8(i8 %ld14, i1 0)
342 %ctlz15 = call i8 @llvm.ctlz.i8(i8 %ld15, i1 0)
343 store i8 %ctlz0 , ptr @dst8, align 1
344 store i8 %ctlz1 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 1), align 1
345 store i8 %ctlz2 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 2), align 1
346 store i8 %ctlz3 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 3), align 1
347 store i8 %ctlz4 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 4), align 1
348 store i8 %ctlz5 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 5), align 1
349 store i8 %ctlz6 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 6), align 1
350 store i8 %ctlz7 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 7), align 1
351 store i8 %ctlz8 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 8), align 1
352 store i8 %ctlz9 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 9), align 1
353 store i8 %ctlz10, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 10), align 1
354 store i8 %ctlz11, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 11), align 1
355 store i8 %ctlz12, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 12), align 1
356 store i8 %ctlz13, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 13), align 1
357 store i8 %ctlz14, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 14), align 1
358 store i8 %ctlz15, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 15), align 1
362 define void @ctlz_32i8() #0 {
363 ; SSE-LABEL: @ctlz_32i8(
364 ; SSE-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @src8, align 1
365 ; SSE-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> [[TMP1]], i1 false)
366 ; SSE-NEXT: store <16 x i8> [[TMP2]], ptr @dst8, align 1
367 ; SSE-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 16), align 1
368 ; SSE-NEXT: [[TMP4:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> [[TMP3]], i1 false)
369 ; SSE-NEXT: store <16 x i8> [[TMP4]], ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 16), align 1
372 ; AVX-LABEL: @ctlz_32i8(
373 ; AVX-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @src8, align 1
374 ; AVX-NEXT: [[TMP2:%.*]] = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> [[TMP1]], i1 false)
375 ; AVX-NEXT: store <32 x i8> [[TMP2]], ptr @dst8, align 1
378 %ld0 = load i8, ptr @src8, align 1
379 %ld1 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 1), align 1
380 %ld2 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 2), align 1
381 %ld3 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 3), align 1
382 %ld4 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 4), align 1
383 %ld5 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 5), align 1
384 %ld6 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 6), align 1
385 %ld7 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 7), align 1
386 %ld8 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 8), align 1
387 %ld9 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 9), align 1
388 %ld10 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 10), align 1
389 %ld11 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 11), align 1
390 %ld12 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 12), align 1
391 %ld13 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 13), align 1
392 %ld14 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 14), align 1
393 %ld15 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 15), align 1
394 %ld16 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 16), align 1
395 %ld17 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 17), align 1
396 %ld18 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 18), align 1
397 %ld19 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 19), align 1
398 %ld20 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 20), align 1
399 %ld21 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 21), align 1
400 %ld22 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 22), align 1
401 %ld23 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 23), align 1
402 %ld24 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 24), align 1
403 %ld25 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 25), align 1
404 %ld26 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 26), align 1
405 %ld27 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 27), align 1
406 %ld28 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 28), align 1
407 %ld29 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 29), align 1
408 %ld30 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 30), align 1
409 %ld31 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 31), align 1
410 %ctlz0 = call i8 @llvm.ctlz.i8(i8 %ld0, i1 0)
411 %ctlz1 = call i8 @llvm.ctlz.i8(i8 %ld1, i1 0)
412 %ctlz2 = call i8 @llvm.ctlz.i8(i8 %ld2, i1 0)
413 %ctlz3 = call i8 @llvm.ctlz.i8(i8 %ld3, i1 0)
414 %ctlz4 = call i8 @llvm.ctlz.i8(i8 %ld4, i1 0)
415 %ctlz5 = call i8 @llvm.ctlz.i8(i8 %ld5, i1 0)
416 %ctlz6 = call i8 @llvm.ctlz.i8(i8 %ld6, i1 0)
417 %ctlz7 = call i8 @llvm.ctlz.i8(i8 %ld7, i1 0)
418 %ctlz8 = call i8 @llvm.ctlz.i8(i8 %ld8, i1 0)
419 %ctlz9 = call i8 @llvm.ctlz.i8(i8 %ld9, i1 0)
420 %ctlz10 = call i8 @llvm.ctlz.i8(i8 %ld10, i1 0)
421 %ctlz11 = call i8 @llvm.ctlz.i8(i8 %ld11, i1 0)
422 %ctlz12 = call i8 @llvm.ctlz.i8(i8 %ld12, i1 0)
423 %ctlz13 = call i8 @llvm.ctlz.i8(i8 %ld13, i1 0)
424 %ctlz14 = call i8 @llvm.ctlz.i8(i8 %ld14, i1 0)
425 %ctlz15 = call i8 @llvm.ctlz.i8(i8 %ld15, i1 0)
426 %ctlz16 = call i8 @llvm.ctlz.i8(i8 %ld16, i1 0)
427 %ctlz17 = call i8 @llvm.ctlz.i8(i8 %ld17, i1 0)
428 %ctlz18 = call i8 @llvm.ctlz.i8(i8 %ld18, i1 0)
429 %ctlz19 = call i8 @llvm.ctlz.i8(i8 %ld19, i1 0)
430 %ctlz20 = call i8 @llvm.ctlz.i8(i8 %ld20, i1 0)
431 %ctlz21 = call i8 @llvm.ctlz.i8(i8 %ld21, i1 0)
432 %ctlz22 = call i8 @llvm.ctlz.i8(i8 %ld22, i1 0)
433 %ctlz23 = call i8 @llvm.ctlz.i8(i8 %ld23, i1 0)
434 %ctlz24 = call i8 @llvm.ctlz.i8(i8 %ld24, i1 0)
435 %ctlz25 = call i8 @llvm.ctlz.i8(i8 %ld25, i1 0)
436 %ctlz26 = call i8 @llvm.ctlz.i8(i8 %ld26, i1 0)
437 %ctlz27 = call i8 @llvm.ctlz.i8(i8 %ld27, i1 0)
438 %ctlz28 = call i8 @llvm.ctlz.i8(i8 %ld28, i1 0)
439 %ctlz29 = call i8 @llvm.ctlz.i8(i8 %ld29, i1 0)
440 %ctlz30 = call i8 @llvm.ctlz.i8(i8 %ld30, i1 0)
441 %ctlz31 = call i8 @llvm.ctlz.i8(i8 %ld31, i1 0)
442 store i8 %ctlz0 , ptr @dst8, align 1
443 store i8 %ctlz1 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 1), align 1
444 store i8 %ctlz2 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 2), align 1
445 store i8 %ctlz3 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 3), align 1
446 store i8 %ctlz4 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 4), align 1
447 store i8 %ctlz5 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 5), align 1
448 store i8 %ctlz6 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 6), align 1
449 store i8 %ctlz7 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 7), align 1
450 store i8 %ctlz8 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 8), align 1
451 store i8 %ctlz9 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 9), align 1
452 store i8 %ctlz10, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 10), align 1
453 store i8 %ctlz11, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 11), align 1
454 store i8 %ctlz12, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 12), align 1
455 store i8 %ctlz13, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 13), align 1
456 store i8 %ctlz14, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 14), align 1
457 store i8 %ctlz15, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 15), align 1
458 store i8 %ctlz16, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 16), align 1
459 store i8 %ctlz17, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 17), align 1
460 store i8 %ctlz18, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 18), align 1
461 store i8 %ctlz19, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 19), align 1
462 store i8 %ctlz20, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 20), align 1
463 store i8 %ctlz21, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 21), align 1
464 store i8 %ctlz22, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 22), align 1
465 store i8 %ctlz23, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 23), align 1
466 store i8 %ctlz24, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 24), align 1
467 store i8 %ctlz25, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 25), align 1
468 store i8 %ctlz26, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 26), align 1
469 store i8 %ctlz27, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 27), align 1
470 store i8 %ctlz28, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 28), align 1
471 store i8 %ctlz29, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 29), align 1
472 store i8 %ctlz30, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 30), align 1
473 store i8 %ctlz31, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 31), align 1
481 define void @ctlz_undef_2i64() #0 {
482 ; SSE-LABEL: @ctlz_undef_2i64(
483 ; SSE-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 8
484 ; SSE-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
485 ; SSE-NEXT: [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 true)
486 ; SSE-NEXT: [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 true)
487 ; SSE-NEXT: store i64 [[CTLZ0]], ptr @dst64, align 8
488 ; SSE-NEXT: store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
491 ; AVX1-LABEL: @ctlz_undef_2i64(
492 ; AVX1-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 8
493 ; AVX1-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
494 ; AVX1-NEXT: [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 true)
495 ; AVX1-NEXT: [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 true)
496 ; AVX1-NEXT: store i64 [[CTLZ0]], ptr @dst64, align 8
497 ; AVX1-NEXT: store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
498 ; AVX1-NEXT: ret void
500 ; AVX2-LABEL: @ctlz_undef_2i64(
501 ; AVX2-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 8
502 ; AVX2-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
503 ; AVX2-NEXT: [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 true)
504 ; AVX2-NEXT: [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 true)
505 ; AVX2-NEXT: store i64 [[CTLZ0]], ptr @dst64, align 8
506 ; AVX2-NEXT: store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
507 ; AVX2-NEXT: ret void
509 ; AVX512-LABEL: @ctlz_undef_2i64(
510 ; AVX512-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 8
511 ; AVX512-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> [[TMP1]], i1 true)
512 ; AVX512-NEXT: store <2 x i64> [[TMP2]], ptr @dst64, align 8
513 ; AVX512-NEXT: ret void
515 %ld0 = load i64, ptr @src64, align 8
516 %ld1 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
517 %ctlz0 = call i64 @llvm.ctlz.i64(i64 %ld0, i1 -1)
518 %ctlz1 = call i64 @llvm.ctlz.i64(i64 %ld1, i1 -1)
519 store i64 %ctlz0, ptr @dst64, align 8
520 store i64 %ctlz1, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
524 define void @ctlz_undef_4i64() #0 {
525 ; SSE-LABEL: @ctlz_undef_4i64(
526 ; SSE-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 4
527 ; SSE-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4
528 ; SSE-NEXT: [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
529 ; SSE-NEXT: [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4
530 ; SSE-NEXT: [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 true)
531 ; SSE-NEXT: [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 true)
532 ; SSE-NEXT: [[CTLZ2:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD2]], i1 true)
533 ; SSE-NEXT: [[CTLZ3:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD3]], i1 true)
534 ; SSE-NEXT: store i64 [[CTLZ0]], ptr @dst64, align 4
535 ; SSE-NEXT: store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4
536 ; SSE-NEXT: store i64 [[CTLZ2]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
537 ; SSE-NEXT: store i64 [[CTLZ3]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4
540 ; AVX1-LABEL: @ctlz_undef_4i64(
541 ; AVX1-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 4
542 ; AVX1-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4
543 ; AVX1-NEXT: [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
544 ; AVX1-NEXT: [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4
545 ; AVX1-NEXT: [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 true)
546 ; AVX1-NEXT: [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 true)
547 ; AVX1-NEXT: [[CTLZ2:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD2]], i1 true)
548 ; AVX1-NEXT: [[CTLZ3:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD3]], i1 true)
549 ; AVX1-NEXT: store i64 [[CTLZ0]], ptr @dst64, align 4
550 ; AVX1-NEXT: store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4
551 ; AVX1-NEXT: store i64 [[CTLZ2]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
552 ; AVX1-NEXT: store i64 [[CTLZ3]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4
553 ; AVX1-NEXT: ret void
555 ; AVX2-LABEL: @ctlz_undef_4i64(
556 ; AVX2-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 4
557 ; AVX2-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4
558 ; AVX2-NEXT: [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
559 ; AVX2-NEXT: [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4
560 ; AVX2-NEXT: [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 true)
561 ; AVX2-NEXT: [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 true)
562 ; AVX2-NEXT: [[CTLZ2:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD2]], i1 true)
563 ; AVX2-NEXT: [[CTLZ3:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD3]], i1 true)
564 ; AVX2-NEXT: store i64 [[CTLZ0]], ptr @dst64, align 4
565 ; AVX2-NEXT: store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4
566 ; AVX2-NEXT: store i64 [[CTLZ2]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
567 ; AVX2-NEXT: store i64 [[CTLZ3]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4
568 ; AVX2-NEXT: ret void
570 ; AVX512-LABEL: @ctlz_undef_4i64(
571 ; AVX512-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 4
572 ; AVX512-NEXT: [[TMP2:%.*]] = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> [[TMP1]], i1 true)
573 ; AVX512-NEXT: store <4 x i64> [[TMP2]], ptr @dst64, align 4
574 ; AVX512-NEXT: ret void
576 %ld0 = load i64, ptr @src64, align 4
577 %ld1 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4
578 %ld2 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
579 %ld3 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4
580 %ctlz0 = call i64 @llvm.ctlz.i64(i64 %ld0, i1 -1)
581 %ctlz1 = call i64 @llvm.ctlz.i64(i64 %ld1, i1 -1)
582 %ctlz2 = call i64 @llvm.ctlz.i64(i64 %ld2, i1 -1)
583 %ctlz3 = call i64 @llvm.ctlz.i64(i64 %ld3, i1 -1)
584 store i64 %ctlz0, ptr @dst64, align 4
585 store i64 %ctlz1, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4
586 store i64 %ctlz2, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
587 store i64 %ctlz3, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4
591 define void @ctlz_undef_4i32() #0 {
592 ; SSE-LABEL: @ctlz_undef_4i32(
593 ; SSE-NEXT: [[LD0:%.*]] = load i32, ptr @src32, align 4
594 ; SSE-NEXT: [[LD1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 4
595 ; SSE-NEXT: [[LD2:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 4
596 ; SSE-NEXT: [[LD3:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 4
597 ; SSE-NEXT: [[CTLZ0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD0]], i1 true)
598 ; SSE-NEXT: [[CTLZ1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD1]], i1 true)
599 ; SSE-NEXT: [[CTLZ2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD2]], i1 true)
600 ; SSE-NEXT: [[CTLZ3:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD3]], i1 true)
601 ; SSE-NEXT: store i32 [[CTLZ0]], ptr @dst32, align 4
602 ; SSE-NEXT: store i32 [[CTLZ1]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 4
603 ; SSE-NEXT: store i32 [[CTLZ2]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 4
604 ; SSE-NEXT: store i32 [[CTLZ3]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 4
607 ; AVX1-LABEL: @ctlz_undef_4i32(
608 ; AVX1-NEXT: [[LD0:%.*]] = load i32, ptr @src32, align 4
609 ; AVX1-NEXT: [[LD1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 4
610 ; AVX1-NEXT: [[LD2:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 4
611 ; AVX1-NEXT: [[LD3:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 4
612 ; AVX1-NEXT: [[CTLZ0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD0]], i1 true)
613 ; AVX1-NEXT: [[CTLZ1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD1]], i1 true)
614 ; AVX1-NEXT: [[CTLZ2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD2]], i1 true)
615 ; AVX1-NEXT: [[CTLZ3:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD3]], i1 true)
616 ; AVX1-NEXT: store i32 [[CTLZ0]], ptr @dst32, align 4
617 ; AVX1-NEXT: store i32 [[CTLZ1]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 4
618 ; AVX1-NEXT: store i32 [[CTLZ2]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 4
619 ; AVX1-NEXT: store i32 [[CTLZ3]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 4
620 ; AVX1-NEXT: ret void
622 ; AVX2-LABEL: @ctlz_undef_4i32(
623 ; AVX2-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 4
624 ; AVX2-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[TMP1]], i1 true)
625 ; AVX2-NEXT: store <4 x i32> [[TMP2]], ptr @dst32, align 4
626 ; AVX2-NEXT: ret void
628 ; AVX512-LABEL: @ctlz_undef_4i32(
629 ; AVX512-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 4
630 ; AVX512-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[TMP1]], i1 true)
631 ; AVX512-NEXT: store <4 x i32> [[TMP2]], ptr @dst32, align 4
632 ; AVX512-NEXT: ret void
634 %ld0 = load i32, ptr @src32, align 4
635 %ld1 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 4
636 %ld2 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 4
637 %ld3 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 4
638 %ctlz0 = call i32 @llvm.ctlz.i32(i32 %ld0, i1 -1)
639 %ctlz1 = call i32 @llvm.ctlz.i32(i32 %ld1, i1 -1)
640 %ctlz2 = call i32 @llvm.ctlz.i32(i32 %ld2, i1 -1)
641 %ctlz3 = call i32 @llvm.ctlz.i32(i32 %ld3, i1 -1)
642 store i32 %ctlz0, ptr @dst32, align 4
643 store i32 %ctlz1, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 4
644 store i32 %ctlz2, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 4
645 store i32 %ctlz3, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 4
649 define void @ctlz_undef_8i32() #0 {
650 ; SSE-LABEL: @ctlz_undef_8i32(
651 ; SSE-NEXT: [[LD0:%.*]] = load i32, ptr @src32, align 2
652 ; SSE-NEXT: [[LD1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 2
653 ; SSE-NEXT: [[LD2:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 2
654 ; SSE-NEXT: [[LD3:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 2
655 ; SSE-NEXT: [[LD4:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 4), align 2
656 ; SSE-NEXT: [[LD5:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 5), align 2
657 ; SSE-NEXT: [[LD6:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 6), align 2
658 ; SSE-NEXT: [[LD7:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 7), align 2
659 ; SSE-NEXT: [[CTLZ0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD0]], i1 true)
660 ; SSE-NEXT: [[CTLZ1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD1]], i1 true)
661 ; SSE-NEXT: [[CTLZ2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD2]], i1 true)
662 ; SSE-NEXT: [[CTLZ3:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD3]], i1 true)
663 ; SSE-NEXT: [[CTLZ4:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD4]], i1 true)
664 ; SSE-NEXT: [[CTLZ5:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD5]], i1 true)
665 ; SSE-NEXT: [[CTLZ6:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD6]], i1 true)
666 ; SSE-NEXT: [[CTLZ7:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD7]], i1 true)
667 ; SSE-NEXT: store i32 [[CTLZ0]], ptr @dst32, align 2
668 ; SSE-NEXT: store i32 [[CTLZ1]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 2
669 ; SSE-NEXT: store i32 [[CTLZ2]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 2
670 ; SSE-NEXT: store i32 [[CTLZ3]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 2
671 ; SSE-NEXT: store i32 [[CTLZ4]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 4), align 2
672 ; SSE-NEXT: store i32 [[CTLZ5]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 5), align 2
673 ; SSE-NEXT: store i32 [[CTLZ6]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 6), align 2
674 ; SSE-NEXT: store i32 [[CTLZ7]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 7), align 2
677 ; AVX1-LABEL: @ctlz_undef_8i32(
678 ; AVX1-NEXT: [[LD0:%.*]] = load i32, ptr @src32, align 2
679 ; AVX1-NEXT: [[LD1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 2
680 ; AVX1-NEXT: [[LD2:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 2
681 ; AVX1-NEXT: [[LD3:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 2
682 ; AVX1-NEXT: [[LD4:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 4), align 2
683 ; AVX1-NEXT: [[LD5:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 5), align 2
684 ; AVX1-NEXT: [[LD6:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 6), align 2
685 ; AVX1-NEXT: [[LD7:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 7), align 2
686 ; AVX1-NEXT: [[CTLZ0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD0]], i1 true)
687 ; AVX1-NEXT: [[CTLZ1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD1]], i1 true)
688 ; AVX1-NEXT: [[CTLZ2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD2]], i1 true)
689 ; AVX1-NEXT: [[CTLZ3:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD3]], i1 true)
690 ; AVX1-NEXT: [[CTLZ4:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD4]], i1 true)
691 ; AVX1-NEXT: [[CTLZ5:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD5]], i1 true)
692 ; AVX1-NEXT: [[CTLZ6:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD6]], i1 true)
693 ; AVX1-NEXT: [[CTLZ7:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD7]], i1 true)
694 ; AVX1-NEXT: store i32 [[CTLZ0]], ptr @dst32, align 2
695 ; AVX1-NEXT: store i32 [[CTLZ1]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 2
696 ; AVX1-NEXT: store i32 [[CTLZ2]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 2
697 ; AVX1-NEXT: store i32 [[CTLZ3]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 2
698 ; AVX1-NEXT: store i32 [[CTLZ4]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 4), align 2
699 ; AVX1-NEXT: store i32 [[CTLZ5]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 5), align 2
700 ; AVX1-NEXT: store i32 [[CTLZ6]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 6), align 2
701 ; AVX1-NEXT: store i32 [[CTLZ7]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 7), align 2
702 ; AVX1-NEXT: ret void
704 ; AVX2-LABEL: @ctlz_undef_8i32(
705 ; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 2
706 ; AVX2-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> [[TMP1]], i1 true)
707 ; AVX2-NEXT: store <8 x i32> [[TMP2]], ptr @dst32, align 2
708 ; AVX2-NEXT: ret void
710 ; AVX512-LABEL: @ctlz_undef_8i32(
711 ; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 2
712 ; AVX512-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> [[TMP1]], i1 true)
713 ; AVX512-NEXT: store <8 x i32> [[TMP2]], ptr @dst32, align 2
714 ; AVX512-NEXT: ret void
716 %ld0 = load i32, ptr @src32, align 2
717 %ld1 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 2
718 %ld2 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 2
719 %ld3 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 2
720 %ld4 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 4), align 2
721 %ld5 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 5), align 2
722 %ld6 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 6), align 2
723 %ld7 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 7), align 2
724 %ctlz0 = call i32 @llvm.ctlz.i32(i32 %ld0, i1 -1)
725 %ctlz1 = call i32 @llvm.ctlz.i32(i32 %ld1, i1 -1)
726 %ctlz2 = call i32 @llvm.ctlz.i32(i32 %ld2, i1 -1)
727 %ctlz3 = call i32 @llvm.ctlz.i32(i32 %ld3, i1 -1)
728 %ctlz4 = call i32 @llvm.ctlz.i32(i32 %ld4, i1 -1)
729 %ctlz5 = call i32 @llvm.ctlz.i32(i32 %ld5, i1 -1)
730 %ctlz6 = call i32 @llvm.ctlz.i32(i32 %ld6, i1 -1)
731 %ctlz7 = call i32 @llvm.ctlz.i32(i32 %ld7, i1 -1)
732 store i32 %ctlz0, ptr @dst32, align 2
733 store i32 %ctlz1, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 2
734 store i32 %ctlz2, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 2
735 store i32 %ctlz3, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 2
736 store i32 %ctlz4, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 4), align 2
737 store i32 %ctlz5, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 5), align 2
738 store i32 %ctlz6, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 6), align 2
739 store i32 %ctlz7, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 7), align 2
743 define void @ctlz_undef_8i16() #0 {
744 ; CHECK-LABEL: @ctlz_undef_8i16(
745 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 2
746 ; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[TMP1]], i1 true)
747 ; CHECK-NEXT: store <8 x i16> [[TMP2]], ptr @dst16, align 2
748 ; CHECK-NEXT: ret void
750 %ld0 = load i16, ptr @src16, align 2
751 %ld1 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 1), align 2
752 %ld2 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 2), align 2
753 %ld3 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 3), align 2
754 %ld4 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 4), align 2
755 %ld5 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 5), align 2
756 %ld6 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 6), align 2
757 %ld7 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 7), align 2
758 %ctlz0 = call i16 @llvm.ctlz.i16(i16 %ld0, i1 -1)
759 %ctlz1 = call i16 @llvm.ctlz.i16(i16 %ld1, i1 -1)
760 %ctlz2 = call i16 @llvm.ctlz.i16(i16 %ld2, i1 -1)
761 %ctlz3 = call i16 @llvm.ctlz.i16(i16 %ld3, i1 -1)
762 %ctlz4 = call i16 @llvm.ctlz.i16(i16 %ld4, i1 -1)
763 %ctlz5 = call i16 @llvm.ctlz.i16(i16 %ld5, i1 -1)
764 %ctlz6 = call i16 @llvm.ctlz.i16(i16 %ld6, i1 -1)
765 %ctlz7 = call i16 @llvm.ctlz.i16(i16 %ld7, i1 -1)
766 store i16 %ctlz0, ptr @dst16, align 2
767 store i16 %ctlz1, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 1), align 2
768 store i16 %ctlz2, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 2), align 2
769 store i16 %ctlz3, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 3), align 2
770 store i16 %ctlz4, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 4), align 2
771 store i16 %ctlz5, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 5), align 2
772 store i16 %ctlz6, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 6), align 2
773 store i16 %ctlz7, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 7), align 2
777 define void @ctlz_undef_16i16() #0 {
778 ; SSE-LABEL: @ctlz_undef_16i16(
779 ; SSE-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 2
780 ; SSE-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[TMP1]], i1 true)
781 ; SSE-NEXT: store <8 x i16> [[TMP2]], ptr @dst16, align 2
782 ; SSE-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 8), align 2
783 ; SSE-NEXT: [[TMP4:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[TMP3]], i1 true)
784 ; SSE-NEXT: store <8 x i16> [[TMP4]], ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 8), align 2
787 ; AVX-LABEL: @ctlz_undef_16i16(
788 ; AVX-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @src16, align 2
789 ; AVX-NEXT: [[TMP2:%.*]] = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> [[TMP1]], i1 true)
790 ; AVX-NEXT: store <16 x i16> [[TMP2]], ptr @dst16, align 2
793 %ld0 = load i16, ptr @src16, align 2
794 %ld1 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 1), align 2
795 %ld2 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 2), align 2
796 %ld3 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 3), align 2
797 %ld4 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 4), align 2
798 %ld5 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 5), align 2
799 %ld6 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 6), align 2
800 %ld7 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 7), align 2
801 %ld8 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 8), align 2
802 %ld9 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 9), align 2
803 %ld10 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 10), align 2
804 %ld11 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 11), align 2
805 %ld12 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 12), align 2
806 %ld13 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 13), align 2
807 %ld14 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 14), align 2
808 %ld15 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 15), align 2
809 %ctlz0 = call i16 @llvm.ctlz.i16(i16 %ld0, i1 -1)
810 %ctlz1 = call i16 @llvm.ctlz.i16(i16 %ld1, i1 -1)
811 %ctlz2 = call i16 @llvm.ctlz.i16(i16 %ld2, i1 -1)
812 %ctlz3 = call i16 @llvm.ctlz.i16(i16 %ld3, i1 -1)
813 %ctlz4 = call i16 @llvm.ctlz.i16(i16 %ld4, i1 -1)
814 %ctlz5 = call i16 @llvm.ctlz.i16(i16 %ld5, i1 -1)
815 %ctlz6 = call i16 @llvm.ctlz.i16(i16 %ld6, i1 -1)
816 %ctlz7 = call i16 @llvm.ctlz.i16(i16 %ld7, i1 -1)
817 %ctlz8 = call i16 @llvm.ctlz.i16(i16 %ld8, i1 -1)
818 %ctlz9 = call i16 @llvm.ctlz.i16(i16 %ld9, i1 -1)
819 %ctlz10 = call i16 @llvm.ctlz.i16(i16 %ld10, i1 -1)
820 %ctlz11 = call i16 @llvm.ctlz.i16(i16 %ld11, i1 -1)
821 %ctlz12 = call i16 @llvm.ctlz.i16(i16 %ld12, i1 -1)
822 %ctlz13 = call i16 @llvm.ctlz.i16(i16 %ld13, i1 -1)
823 %ctlz14 = call i16 @llvm.ctlz.i16(i16 %ld14, i1 -1)
824 %ctlz15 = call i16 @llvm.ctlz.i16(i16 %ld15, i1 -1)
825 store i16 %ctlz0 , ptr @dst16, align 2
826 store i16 %ctlz1 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 1), align 2
827 store i16 %ctlz2 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 2), align 2
828 store i16 %ctlz3 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 3), align 2
829 store i16 %ctlz4 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 4), align 2
830 store i16 %ctlz5 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 5), align 2
831 store i16 %ctlz6 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 6), align 2
832 store i16 %ctlz7 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 7), align 2
833 store i16 %ctlz8 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 8), align 2
834 store i16 %ctlz9 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 9), align 2
835 store i16 %ctlz10, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 10), align 2
836 store i16 %ctlz11, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 11), align 2
837 store i16 %ctlz12, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 12), align 2
838 store i16 %ctlz13, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 13), align 2
839 store i16 %ctlz14, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 14), align 2
840 store i16 %ctlz15, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 15), align 2
844 define void @ctlz_undef_16i8() #0 {
845 ; CHECK-LABEL: @ctlz_undef_16i8(
846 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @src8, align 1
847 ; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> [[TMP1]], i1 true)
848 ; CHECK-NEXT: store <16 x i8> [[TMP2]], ptr @dst8, align 1
849 ; CHECK-NEXT: ret void
851 %ld0 = load i8, ptr @src8, align 1
852 %ld1 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 1), align 1
853 %ld2 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 2), align 1
854 %ld3 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 3), align 1
855 %ld4 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 4), align 1
856 %ld5 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 5), align 1
857 %ld6 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 6), align 1
858 %ld7 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 7), align 1
859 %ld8 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 8), align 1
860 %ld9 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 9), align 1
861 %ld10 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 10), align 1
862 %ld11 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 11), align 1
863 %ld12 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 12), align 1
864 %ld13 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 13), align 1
865 %ld14 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 14), align 1
866 %ld15 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 15), align 1
867 %ctlz0 = call i8 @llvm.ctlz.i8(i8 %ld0, i1 -1)
868 %ctlz1 = call i8 @llvm.ctlz.i8(i8 %ld1, i1 -1)
869 %ctlz2 = call i8 @llvm.ctlz.i8(i8 %ld2, i1 -1)
870 %ctlz3 = call i8 @llvm.ctlz.i8(i8 %ld3, i1 -1)
871 %ctlz4 = call i8 @llvm.ctlz.i8(i8 %ld4, i1 -1)
872 %ctlz5 = call i8 @llvm.ctlz.i8(i8 %ld5, i1 -1)
873 %ctlz6 = call i8 @llvm.ctlz.i8(i8 %ld6, i1 -1)
874 %ctlz7 = call i8 @llvm.ctlz.i8(i8 %ld7, i1 -1)
875 %ctlz8 = call i8 @llvm.ctlz.i8(i8 %ld8, i1 -1)
876 %ctlz9 = call i8 @llvm.ctlz.i8(i8 %ld9, i1 -1)
877 %ctlz10 = call i8 @llvm.ctlz.i8(i8 %ld10, i1 -1)
878 %ctlz11 = call i8 @llvm.ctlz.i8(i8 %ld11, i1 -1)
879 %ctlz12 = call i8 @llvm.ctlz.i8(i8 %ld12, i1 -1)
880 %ctlz13 = call i8 @llvm.ctlz.i8(i8 %ld13, i1 -1)
881 %ctlz14 = call i8 @llvm.ctlz.i8(i8 %ld14, i1 -1)
882 %ctlz15 = call i8 @llvm.ctlz.i8(i8 %ld15, i1 -1)
883 store i8 %ctlz0 , ptr @dst8, align 1
884 store i8 %ctlz1 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 1), align 1
885 store i8 %ctlz2 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 2), align 1
886 store i8 %ctlz3 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 3), align 1
887 store i8 %ctlz4 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 4), align 1
888 store i8 %ctlz5 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 5), align 1
889 store i8 %ctlz6 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 6), align 1
890 store i8 %ctlz7 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 7), align 1
891 store i8 %ctlz8 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 8), align 1
892 store i8 %ctlz9 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 9), align 1
893 store i8 %ctlz10, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 10), align 1
894 store i8 %ctlz11, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 11), align 1
895 store i8 %ctlz12, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 12), align 1
896 store i8 %ctlz13, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 13), align 1
897 store i8 %ctlz14, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 14), align 1
898 store i8 %ctlz15, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 15), align 1
902 define void @ctlz_undef_32i8() #0 {
903 ; SSE-LABEL: @ctlz_undef_32i8(
904 ; SSE-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @src8, align 1
905 ; SSE-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> [[TMP1]], i1 true)
906 ; SSE-NEXT: store <16 x i8> [[TMP2]], ptr @dst8, align 1
907 ; SSE-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 16), align 1
908 ; SSE-NEXT: [[TMP4:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> [[TMP3]], i1 true)
909 ; SSE-NEXT: store <16 x i8> [[TMP4]], ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 16), align 1
912 ; AVX-LABEL: @ctlz_undef_32i8(
913 ; AVX-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @src8, align 1
914 ; AVX-NEXT: [[TMP2:%.*]] = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> [[TMP1]], i1 true)
915 ; AVX-NEXT: store <32 x i8> [[TMP2]], ptr @dst8, align 1
918 %ld0 = load i8, ptr @src8, align 1
919 %ld1 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 1), align 1
920 %ld2 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 2), align 1
921 %ld3 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 3), align 1
922 %ld4 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 4), align 1
923 %ld5 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 5), align 1
924 %ld6 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 6), align 1
925 %ld7 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 7), align 1
926 %ld8 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 8), align 1
927 %ld9 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 9), align 1
928 %ld10 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 10), align 1
929 %ld11 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 11), align 1
930 %ld12 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 12), align 1
931 %ld13 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 13), align 1
932 %ld14 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 14), align 1
933 %ld15 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 15), align 1
934 %ld16 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 16), align 1
935 %ld17 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 17), align 1
936 %ld18 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 18), align 1
937 %ld19 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 19), align 1
938 %ld20 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 20), align 1
939 %ld21 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 21), align 1
940 %ld22 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 22), align 1
941 %ld23 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 23), align 1
942 %ld24 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 24), align 1
943 %ld25 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 25), align 1
944 %ld26 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 26), align 1
945 %ld27 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 27), align 1
946 %ld28 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 28), align 1
947 %ld29 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 29), align 1
948 %ld30 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 30), align 1
949 %ld31 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 31), align 1
950 %ctlz0 = call i8 @llvm.ctlz.i8(i8 %ld0, i1 -1)
951 %ctlz1 = call i8 @llvm.ctlz.i8(i8 %ld1, i1 -1)
952 %ctlz2 = call i8 @llvm.ctlz.i8(i8 %ld2, i1 -1)
953 %ctlz3 = call i8 @llvm.ctlz.i8(i8 %ld3, i1 -1)
954 %ctlz4 = call i8 @llvm.ctlz.i8(i8 %ld4, i1 -1)
955 %ctlz5 = call i8 @llvm.ctlz.i8(i8 %ld5, i1 -1)
956 %ctlz6 = call i8 @llvm.ctlz.i8(i8 %ld6, i1 -1)
957 %ctlz7 = call i8 @llvm.ctlz.i8(i8 %ld7, i1 -1)
958 %ctlz8 = call i8 @llvm.ctlz.i8(i8 %ld8, i1 -1)
959 %ctlz9 = call i8 @llvm.ctlz.i8(i8 %ld9, i1 -1)
960 %ctlz10 = call i8 @llvm.ctlz.i8(i8 %ld10, i1 -1)
961 %ctlz11 = call i8 @llvm.ctlz.i8(i8 %ld11, i1 -1)
962 %ctlz12 = call i8 @llvm.ctlz.i8(i8 %ld12, i1 -1)
963 %ctlz13 = call i8 @llvm.ctlz.i8(i8 %ld13, i1 -1)
964 %ctlz14 = call i8 @llvm.ctlz.i8(i8 %ld14, i1 -1)
965 %ctlz15 = call i8 @llvm.ctlz.i8(i8 %ld15, i1 -1)
966 %ctlz16 = call i8 @llvm.ctlz.i8(i8 %ld16, i1 -1)
967 %ctlz17 = call i8 @llvm.ctlz.i8(i8 %ld17, i1 -1)
968 %ctlz18 = call i8 @llvm.ctlz.i8(i8 %ld18, i1 -1)
969 %ctlz19 = call i8 @llvm.ctlz.i8(i8 %ld19, i1 -1)
970 %ctlz20 = call i8 @llvm.ctlz.i8(i8 %ld20, i1 -1)
971 %ctlz21 = call i8 @llvm.ctlz.i8(i8 %ld21, i1 -1)
972 %ctlz22 = call i8 @llvm.ctlz.i8(i8 %ld22, i1 -1)
973 %ctlz23 = call i8 @llvm.ctlz.i8(i8 %ld23, i1 -1)
974 %ctlz24 = call i8 @llvm.ctlz.i8(i8 %ld24, i1 -1)
975 %ctlz25 = call i8 @llvm.ctlz.i8(i8 %ld25, i1 -1)
976 %ctlz26 = call i8 @llvm.ctlz.i8(i8 %ld26, i1 -1)
977 %ctlz27 = call i8 @llvm.ctlz.i8(i8 %ld27, i1 -1)
978 %ctlz28 = call i8 @llvm.ctlz.i8(i8 %ld28, i1 -1)
979 %ctlz29 = call i8 @llvm.ctlz.i8(i8 %ld29, i1 -1)
980 %ctlz30 = call i8 @llvm.ctlz.i8(i8 %ld30, i1 -1)
981 %ctlz31 = call i8 @llvm.ctlz.i8(i8 %ld31, i1 -1)
982 store i8 %ctlz0 , ptr @dst8, align 1
983 store i8 %ctlz1 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 1), align 1
984 store i8 %ctlz2 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 2), align 1
985 store i8 %ctlz3 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 3), align 1
986 store i8 %ctlz4 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 4), align 1
987 store i8 %ctlz5 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 5), align 1
988 store i8 %ctlz6 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 6), align 1
989 store i8 %ctlz7 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 7), align 1
990 store i8 %ctlz8 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 8), align 1
991 store i8 %ctlz9 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 9), align 1
992 store i8 %ctlz10, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 10), align 1
993 store i8 %ctlz11, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 11), align 1
994 store i8 %ctlz12, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 12), align 1
995 store i8 %ctlz13, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 13), align 1
996 store i8 %ctlz14, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 14), align 1
997 store i8 %ctlz15, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 15), align 1
998 store i8 %ctlz16, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 16), align 1
999 store i8 %ctlz17, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 17), align 1
1000 store i8 %ctlz18, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 18), align 1
1001 store i8 %ctlz19, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 19), align 1
1002 store i8 %ctlz20, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 20), align 1
1003 store i8 %ctlz21, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 21), align 1
1004 store i8 %ctlz22, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 22), align 1
1005 store i8 %ctlz23, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 23), align 1
1006 store i8 %ctlz24, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 24), align 1
1007 store i8 %ctlz25, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 25), align 1
1008 store i8 %ctlz26, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 26), align 1
1009 store i8 %ctlz27, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 27), align 1
1010 store i8 %ctlz28, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 28), align 1
1011 store i8 %ctlz29, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 29), align 1
1012 store i8 %ctlz30, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 30), align 1
1013 store i8 %ctlz31, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 31), align 1
1017 attributes #0 = { nounwind }