1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
3 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE,SSE4
4 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
5 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
6 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=icelake-server -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512
8 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
10 @src64 = common global [4 x i64] zeroinitializer, align 32
11 @dst64 = common global [4 x i64] zeroinitializer, align 32
12 @src32 = common global [8 x i32] zeroinitializer, align 32
13 @dst32 = common global [8 x i32] zeroinitializer, align 32
14 @src16 = common global [16 x i16] zeroinitializer, align 32
15 @dst16 = common global [16 x i16] zeroinitializer, align 32
16 @src8 = common global [32 x i8] zeroinitializer, align 32
17 @dst8 = common global [32 x i8] zeroinitializer, align 32
19 declare i64 @llvm.ctlz.i64(i64, i1)
20 declare i32 @llvm.ctlz.i32(i32, i1)
21 declare i16 @llvm.ctlz.i16(i16, i1)
22 declare i8 @llvm.ctlz.i8(i8, i1)
28 define void @ctlz_2i64() #0 {
29 ; SSE-LABEL: @ctlz_2i64(
30 ; SSE-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 8
31 ; SSE-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
32 ; SSE-NEXT: [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 false)
33 ; SSE-NEXT: [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 false)
34 ; SSE-NEXT: store i64 [[CTLZ0]], ptr @dst64, align 8
35 ; SSE-NEXT: store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
38 ; AVX1-LABEL: @ctlz_2i64(
39 ; AVX1-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 8
40 ; AVX1-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
41 ; AVX1-NEXT: [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 false)
42 ; AVX1-NEXT: [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 false)
43 ; AVX1-NEXT: store i64 [[CTLZ0]], ptr @dst64, align 8
44 ; AVX1-NEXT: store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
47 ; AVX2-LABEL: @ctlz_2i64(
48 ; AVX2-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 8
49 ; AVX2-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
50 ; AVX2-NEXT: [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 false)
51 ; AVX2-NEXT: [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 false)
52 ; AVX2-NEXT: store i64 [[CTLZ0]], ptr @dst64, align 8
53 ; AVX2-NEXT: store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
56 ; AVX512-LABEL: @ctlz_2i64(
57 ; AVX512-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 8
58 ; AVX512-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> [[TMP1]], i1 false)
59 ; AVX512-NEXT: store <2 x i64> [[TMP2]], ptr @dst64, align 8
60 ; AVX512-NEXT: ret void
62 %ld0 = load i64, ptr @src64, align 8
63 %ld1 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
64 %ctlz0 = call i64 @llvm.ctlz.i64(i64 %ld0, i1 0)
65 %ctlz1 = call i64 @llvm.ctlz.i64(i64 %ld1, i1 0)
66 store i64 %ctlz0, ptr @dst64, align 8
67 store i64 %ctlz1, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
71 define void @ctlz_4i64() #0 {
72 ; SSE-LABEL: @ctlz_4i64(
73 ; SSE-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 4
74 ; SSE-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4
75 ; SSE-NEXT: [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
76 ; SSE-NEXT: [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4
77 ; SSE-NEXT: [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 false)
78 ; SSE-NEXT: [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 false)
79 ; SSE-NEXT: [[CTLZ2:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD2]], i1 false)
80 ; SSE-NEXT: [[CTLZ3:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD3]], i1 false)
81 ; SSE-NEXT: store i64 [[CTLZ0]], ptr @dst64, align 4
82 ; SSE-NEXT: store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4
83 ; SSE-NEXT: store i64 [[CTLZ2]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
84 ; SSE-NEXT: store i64 [[CTLZ3]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4
87 ; AVX1-LABEL: @ctlz_4i64(
88 ; AVX1-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 4
89 ; AVX1-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4
90 ; AVX1-NEXT: [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
91 ; AVX1-NEXT: [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4
92 ; AVX1-NEXT: [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 false)
93 ; AVX1-NEXT: [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 false)
94 ; AVX1-NEXT: [[CTLZ2:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD2]], i1 false)
95 ; AVX1-NEXT: [[CTLZ3:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD3]], i1 false)
96 ; AVX1-NEXT: store i64 [[CTLZ0]], ptr @dst64, align 4
97 ; AVX1-NEXT: store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4
98 ; AVX1-NEXT: store i64 [[CTLZ2]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
99 ; AVX1-NEXT: store i64 [[CTLZ3]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4
100 ; AVX1-NEXT: ret void
102 ; AVX2-LABEL: @ctlz_4i64(
103 ; AVX2-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 4
104 ; AVX2-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4
105 ; AVX2-NEXT: [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
106 ; AVX2-NEXT: [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4
107 ; AVX2-NEXT: [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 false)
108 ; AVX2-NEXT: [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 false)
109 ; AVX2-NEXT: [[CTLZ2:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD2]], i1 false)
110 ; AVX2-NEXT: [[CTLZ3:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD3]], i1 false)
111 ; AVX2-NEXT: store i64 [[CTLZ0]], ptr @dst64, align 4
112 ; AVX2-NEXT: store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4
113 ; AVX2-NEXT: store i64 [[CTLZ2]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
114 ; AVX2-NEXT: store i64 [[CTLZ3]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4
115 ; AVX2-NEXT: ret void
117 ; AVX512-LABEL: @ctlz_4i64(
118 ; AVX512-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 4
119 ; AVX512-NEXT: [[TMP2:%.*]] = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> [[TMP1]], i1 false)
120 ; AVX512-NEXT: store <4 x i64> [[TMP2]], ptr @dst64, align 4
121 ; AVX512-NEXT: ret void
123 %ld0 = load i64, ptr @src64, align 4
124 %ld1 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4
125 %ld2 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
126 %ld3 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4
127 %ctlz0 = call i64 @llvm.ctlz.i64(i64 %ld0, i1 0)
128 %ctlz1 = call i64 @llvm.ctlz.i64(i64 %ld1, i1 0)
129 %ctlz2 = call i64 @llvm.ctlz.i64(i64 %ld2, i1 0)
130 %ctlz3 = call i64 @llvm.ctlz.i64(i64 %ld3, i1 0)
131 store i64 %ctlz0, ptr @dst64, align 4
132 store i64 %ctlz1, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4
133 store i64 %ctlz2, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
134 store i64 %ctlz3, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4
138 define void @ctlz_4i32() #0 {
139 ; SSE2-LABEL: @ctlz_4i32(
140 ; SSE2-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 4
141 ; SSE2-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[TMP1]], i1 false)
142 ; SSE2-NEXT: store <4 x i32> [[TMP2]], ptr @dst32, align 4
143 ; SSE2-NEXT: ret void
145 ; SSE4-LABEL: @ctlz_4i32(
146 ; SSE4-NEXT: [[LD0:%.*]] = load i32, ptr @src32, align 4
147 ; SSE4-NEXT: [[LD1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 4
148 ; SSE4-NEXT: [[LD2:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 4
149 ; SSE4-NEXT: [[LD3:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 4
150 ; SSE4-NEXT: [[CTLZ0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD0]], i1 false)
151 ; SSE4-NEXT: [[CTLZ1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD1]], i1 false)
152 ; SSE4-NEXT: [[CTLZ2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD2]], i1 false)
153 ; SSE4-NEXT: [[CTLZ3:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD3]], i1 false)
154 ; SSE4-NEXT: store i32 [[CTLZ0]], ptr @dst32, align 4
155 ; SSE4-NEXT: store i32 [[CTLZ1]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 4
156 ; SSE4-NEXT: store i32 [[CTLZ2]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 4
157 ; SSE4-NEXT: store i32 [[CTLZ3]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 4
158 ; SSE4-NEXT: ret void
160 ; AVX-LABEL: @ctlz_4i32(
161 ; AVX-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 4
162 ; AVX-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[TMP1]], i1 false)
163 ; AVX-NEXT: store <4 x i32> [[TMP2]], ptr @dst32, align 4
166 %ld0 = load i32, ptr @src32, align 4
167 %ld1 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 4
168 %ld2 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 4
169 %ld3 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 4
170 %ctlz0 = call i32 @llvm.ctlz.i32(i32 %ld0, i1 0)
171 %ctlz1 = call i32 @llvm.ctlz.i32(i32 %ld1, i1 0)
172 %ctlz2 = call i32 @llvm.ctlz.i32(i32 %ld2, i1 0)
173 %ctlz3 = call i32 @llvm.ctlz.i32(i32 %ld3, i1 0)
174 store i32 %ctlz0, ptr @dst32, align 4
175 store i32 %ctlz1, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 4
176 store i32 %ctlz2, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 4
177 store i32 %ctlz3, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 4
181 define void @ctlz_8i32() #0 {
182 ; SSE2-LABEL: @ctlz_8i32(
183 ; SSE2-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 2
184 ; SSE2-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[TMP1]], i1 false)
185 ; SSE2-NEXT: store <4 x i32> [[TMP2]], ptr @dst32, align 2
186 ; SSE2-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 4), align 2
187 ; SSE2-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[TMP3]], i1 false)
188 ; SSE2-NEXT: store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 4), align 2
189 ; SSE2-NEXT: ret void
191 ; SSE4-LABEL: @ctlz_8i32(
192 ; SSE4-NEXT: [[LD0:%.*]] = load i32, ptr @src32, align 2
193 ; SSE4-NEXT: [[LD1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 2
194 ; SSE4-NEXT: [[LD2:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 2
195 ; SSE4-NEXT: [[LD3:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 2
196 ; SSE4-NEXT: [[LD4:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 4), align 2
197 ; SSE4-NEXT: [[LD5:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 5), align 2
198 ; SSE4-NEXT: [[LD6:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 6), align 2
199 ; SSE4-NEXT: [[LD7:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 7), align 2
200 ; SSE4-NEXT: [[CTLZ0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD0]], i1 false)
201 ; SSE4-NEXT: [[CTLZ1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD1]], i1 false)
202 ; SSE4-NEXT: [[CTLZ2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD2]], i1 false)
203 ; SSE4-NEXT: [[CTLZ3:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD3]], i1 false)
204 ; SSE4-NEXT: [[CTLZ4:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD4]], i1 false)
205 ; SSE4-NEXT: [[CTLZ5:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD5]], i1 false)
206 ; SSE4-NEXT: [[CTLZ6:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD6]], i1 false)
207 ; SSE4-NEXT: [[CTLZ7:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD7]], i1 false)
208 ; SSE4-NEXT: store i32 [[CTLZ0]], ptr @dst32, align 2
209 ; SSE4-NEXT: store i32 [[CTLZ1]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 2
210 ; SSE4-NEXT: store i32 [[CTLZ2]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 2
211 ; SSE4-NEXT: store i32 [[CTLZ3]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 2
212 ; SSE4-NEXT: store i32 [[CTLZ4]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 4), align 2
213 ; SSE4-NEXT: store i32 [[CTLZ5]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 5), align 2
214 ; SSE4-NEXT: store i32 [[CTLZ6]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 6), align 2
215 ; SSE4-NEXT: store i32 [[CTLZ7]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 7), align 2
216 ; SSE4-NEXT: ret void
218 ; AVX-LABEL: @ctlz_8i32(
219 ; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 2
220 ; AVX-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> [[TMP1]], i1 false)
221 ; AVX-NEXT: store <8 x i32> [[TMP2]], ptr @dst32, align 2
224 %ld0 = load i32, ptr @src32, align 2
225 %ld1 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 2
226 %ld2 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 2
227 %ld3 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 2
228 %ld4 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 4), align 2
229 %ld5 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 5), align 2
230 %ld6 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 6), align 2
231 %ld7 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 7), align 2
232 %ctlz0 = call i32 @llvm.ctlz.i32(i32 %ld0, i1 0)
233 %ctlz1 = call i32 @llvm.ctlz.i32(i32 %ld1, i1 0)
234 %ctlz2 = call i32 @llvm.ctlz.i32(i32 %ld2, i1 0)
235 %ctlz3 = call i32 @llvm.ctlz.i32(i32 %ld3, i1 0)
236 %ctlz4 = call i32 @llvm.ctlz.i32(i32 %ld4, i1 0)
237 %ctlz5 = call i32 @llvm.ctlz.i32(i32 %ld5, i1 0)
238 %ctlz6 = call i32 @llvm.ctlz.i32(i32 %ld6, i1 0)
239 %ctlz7 = call i32 @llvm.ctlz.i32(i32 %ld7, i1 0)
240 store i32 %ctlz0, ptr @dst32, align 2
241 store i32 %ctlz1, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 2
242 store i32 %ctlz2, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 2
243 store i32 %ctlz3, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 2
244 store i32 %ctlz4, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 4), align 2
245 store i32 %ctlz5, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 5), align 2
246 store i32 %ctlz6, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 6), align 2
247 store i32 %ctlz7, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 7), align 2
251 define void @ctlz_8i16() #0 {
252 ; CHECK-LABEL: @ctlz_8i16(
253 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 2
254 ; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[TMP1]], i1 false)
255 ; CHECK-NEXT: store <8 x i16> [[TMP2]], ptr @dst16, align 2
256 ; CHECK-NEXT: ret void
258 %ld0 = load i16, ptr @src16, align 2
259 %ld1 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 1), align 2
260 %ld2 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 2), align 2
261 %ld3 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 3), align 2
262 %ld4 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 4), align 2
263 %ld5 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 5), align 2
264 %ld6 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 6), align 2
265 %ld7 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 7), align 2
266 %ctlz0 = call i16 @llvm.ctlz.i16(i16 %ld0, i1 0)
267 %ctlz1 = call i16 @llvm.ctlz.i16(i16 %ld1, i1 0)
268 %ctlz2 = call i16 @llvm.ctlz.i16(i16 %ld2, i1 0)
269 %ctlz3 = call i16 @llvm.ctlz.i16(i16 %ld3, i1 0)
270 %ctlz4 = call i16 @llvm.ctlz.i16(i16 %ld4, i1 0)
271 %ctlz5 = call i16 @llvm.ctlz.i16(i16 %ld5, i1 0)
272 %ctlz6 = call i16 @llvm.ctlz.i16(i16 %ld6, i1 0)
273 %ctlz7 = call i16 @llvm.ctlz.i16(i16 %ld7, i1 0)
274 store i16 %ctlz0, ptr @dst16, align 2
275 store i16 %ctlz1, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 1), align 2
276 store i16 %ctlz2, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 2), align 2
277 store i16 %ctlz3, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 3), align 2
278 store i16 %ctlz4, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 4), align 2
279 store i16 %ctlz5, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 5), align 2
280 store i16 %ctlz6, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 6), align 2
281 store i16 %ctlz7, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 7), align 2
285 define void @ctlz_16i16() #0 {
286 ; SSE-LABEL: @ctlz_16i16(
287 ; SSE-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 2
288 ; SSE-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[TMP1]], i1 false)
289 ; SSE-NEXT: store <8 x i16> [[TMP2]], ptr @dst16, align 2
290 ; SSE-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 8), align 2
291 ; SSE-NEXT: [[TMP4:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[TMP3]], i1 false)
292 ; SSE-NEXT: store <8 x i16> [[TMP4]], ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 8), align 2
295 ; AVX-LABEL: @ctlz_16i16(
296 ; AVX-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @src16, align 2
297 ; AVX-NEXT: [[TMP2:%.*]] = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> [[TMP1]], i1 false)
298 ; AVX-NEXT: store <16 x i16> [[TMP2]], ptr @dst16, align 2
301 %ld0 = load i16, ptr @src16, align 2
302 %ld1 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 1), align 2
303 %ld2 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 2), align 2
304 %ld3 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 3), align 2
305 %ld4 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 4), align 2
306 %ld5 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 5), align 2
307 %ld6 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 6), align 2
308 %ld7 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 7), align 2
309 %ld8 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 8), align 2
310 %ld9 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 9), align 2
311 %ld10 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 10), align 2
312 %ld11 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 11), align 2
313 %ld12 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 12), align 2
314 %ld13 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 13), align 2
315 %ld14 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 14), align 2
316 %ld15 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 15), align 2
317 %ctlz0 = call i16 @llvm.ctlz.i16(i16 %ld0, i1 0)
318 %ctlz1 = call i16 @llvm.ctlz.i16(i16 %ld1, i1 0)
319 %ctlz2 = call i16 @llvm.ctlz.i16(i16 %ld2, i1 0)
320 %ctlz3 = call i16 @llvm.ctlz.i16(i16 %ld3, i1 0)
321 %ctlz4 = call i16 @llvm.ctlz.i16(i16 %ld4, i1 0)
322 %ctlz5 = call i16 @llvm.ctlz.i16(i16 %ld5, i1 0)
323 %ctlz6 = call i16 @llvm.ctlz.i16(i16 %ld6, i1 0)
324 %ctlz7 = call i16 @llvm.ctlz.i16(i16 %ld7, i1 0)
325 %ctlz8 = call i16 @llvm.ctlz.i16(i16 %ld8, i1 0)
326 %ctlz9 = call i16 @llvm.ctlz.i16(i16 %ld9, i1 0)
327 %ctlz10 = call i16 @llvm.ctlz.i16(i16 %ld10, i1 0)
328 %ctlz11 = call i16 @llvm.ctlz.i16(i16 %ld11, i1 0)
329 %ctlz12 = call i16 @llvm.ctlz.i16(i16 %ld12, i1 0)
330 %ctlz13 = call i16 @llvm.ctlz.i16(i16 %ld13, i1 0)
331 %ctlz14 = call i16 @llvm.ctlz.i16(i16 %ld14, i1 0)
332 %ctlz15 = call i16 @llvm.ctlz.i16(i16 %ld15, i1 0)
333 store i16 %ctlz0 , ptr @dst16, align 2
334 store i16 %ctlz1 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 1), align 2
335 store i16 %ctlz2 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 2), align 2
336 store i16 %ctlz3 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 3), align 2
337 store i16 %ctlz4 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 4), align 2
338 store i16 %ctlz5 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 5), align 2
339 store i16 %ctlz6 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 6), align 2
340 store i16 %ctlz7 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 7), align 2
341 store i16 %ctlz8 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 8), align 2
342 store i16 %ctlz9 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 9), align 2
343 store i16 %ctlz10, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 10), align 2
344 store i16 %ctlz11, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 11), align 2
345 store i16 %ctlz12, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 12), align 2
346 store i16 %ctlz13, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 13), align 2
347 store i16 %ctlz14, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 14), align 2
348 store i16 %ctlz15, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 15), align 2
352 define void @ctlz_16i8() #0 {
353 ; CHECK-LABEL: @ctlz_16i8(
354 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @src8, align 1
355 ; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> [[TMP1]], i1 false)
356 ; CHECK-NEXT: store <16 x i8> [[TMP2]], ptr @dst8, align 1
357 ; CHECK-NEXT: ret void
359 %ld0 = load i8, ptr @src8, align 1
360 %ld1 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 1), align 1
361 %ld2 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 2), align 1
362 %ld3 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 3), align 1
363 %ld4 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 4), align 1
364 %ld5 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 5), align 1
365 %ld6 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 6), align 1
366 %ld7 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 7), align 1
367 %ld8 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 8), align 1
368 %ld9 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 9), align 1
369 %ld10 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 10), align 1
370 %ld11 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 11), align 1
371 %ld12 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 12), align 1
372 %ld13 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 13), align 1
373 %ld14 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 14), align 1
374 %ld15 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 15), align 1
375 %ctlz0 = call i8 @llvm.ctlz.i8(i8 %ld0, i1 0)
376 %ctlz1 = call i8 @llvm.ctlz.i8(i8 %ld1, i1 0)
377 %ctlz2 = call i8 @llvm.ctlz.i8(i8 %ld2, i1 0)
378 %ctlz3 = call i8 @llvm.ctlz.i8(i8 %ld3, i1 0)
379 %ctlz4 = call i8 @llvm.ctlz.i8(i8 %ld4, i1 0)
380 %ctlz5 = call i8 @llvm.ctlz.i8(i8 %ld5, i1 0)
381 %ctlz6 = call i8 @llvm.ctlz.i8(i8 %ld6, i1 0)
382 %ctlz7 = call i8 @llvm.ctlz.i8(i8 %ld7, i1 0)
383 %ctlz8 = call i8 @llvm.ctlz.i8(i8 %ld8, i1 0)
384 %ctlz9 = call i8 @llvm.ctlz.i8(i8 %ld9, i1 0)
385 %ctlz10 = call i8 @llvm.ctlz.i8(i8 %ld10, i1 0)
386 %ctlz11 = call i8 @llvm.ctlz.i8(i8 %ld11, i1 0)
387 %ctlz12 = call i8 @llvm.ctlz.i8(i8 %ld12, i1 0)
388 %ctlz13 = call i8 @llvm.ctlz.i8(i8 %ld13, i1 0)
389 %ctlz14 = call i8 @llvm.ctlz.i8(i8 %ld14, i1 0)
390 %ctlz15 = call i8 @llvm.ctlz.i8(i8 %ld15, i1 0)
391 store i8 %ctlz0 , ptr @dst8, align 1
392 store i8 %ctlz1 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 1), align 1
393 store i8 %ctlz2 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 2), align 1
394 store i8 %ctlz3 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 3), align 1
395 store i8 %ctlz4 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 4), align 1
396 store i8 %ctlz5 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 5), align 1
397 store i8 %ctlz6 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 6), align 1
398 store i8 %ctlz7 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 7), align 1
399 store i8 %ctlz8 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 8), align 1
400 store i8 %ctlz9 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 9), align 1
401 store i8 %ctlz10, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 10), align 1
402 store i8 %ctlz11, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 11), align 1
403 store i8 %ctlz12, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 12), align 1
404 store i8 %ctlz13, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 13), align 1
405 store i8 %ctlz14, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 14), align 1
406 store i8 %ctlz15, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 15), align 1
410 define void @ctlz_32i8() #0 {
411 ; SSE-LABEL: @ctlz_32i8(
412 ; SSE-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @src8, align 1
413 ; SSE-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> [[TMP1]], i1 false)
414 ; SSE-NEXT: store <16 x i8> [[TMP2]], ptr @dst8, align 1
415 ; SSE-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 16), align 1
416 ; SSE-NEXT: [[TMP4:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> [[TMP3]], i1 false)
417 ; SSE-NEXT: store <16 x i8> [[TMP4]], ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 16), align 1
420 ; AVX-LABEL: @ctlz_32i8(
421 ; AVX-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @src8, align 1
422 ; AVX-NEXT: [[TMP2:%.*]] = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> [[TMP1]], i1 false)
423 ; AVX-NEXT: store <32 x i8> [[TMP2]], ptr @dst8, align 1
426 %ld0 = load i8, ptr @src8, align 1
427 %ld1 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 1), align 1
428 %ld2 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 2), align 1
429 %ld3 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 3), align 1
430 %ld4 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 4), align 1
431 %ld5 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 5), align 1
432 %ld6 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 6), align 1
433 %ld7 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 7), align 1
434 %ld8 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 8), align 1
435 %ld9 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 9), align 1
436 %ld10 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 10), align 1
437 %ld11 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 11), align 1
438 %ld12 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 12), align 1
439 %ld13 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 13), align 1
440 %ld14 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 14), align 1
441 %ld15 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 15), align 1
442 %ld16 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 16), align 1
443 %ld17 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 17), align 1
444 %ld18 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 18), align 1
445 %ld19 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 19), align 1
446 %ld20 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 20), align 1
447 %ld21 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 21), align 1
448 %ld22 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 22), align 1
449 %ld23 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 23), align 1
450 %ld24 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 24), align 1
451 %ld25 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 25), align 1
452 %ld26 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 26), align 1
453 %ld27 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 27), align 1
454 %ld28 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 28), align 1
455 %ld29 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 29), align 1
456 %ld30 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 30), align 1
457 %ld31 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 31), align 1
458 %ctlz0 = call i8 @llvm.ctlz.i8(i8 %ld0, i1 0)
459 %ctlz1 = call i8 @llvm.ctlz.i8(i8 %ld1, i1 0)
460 %ctlz2 = call i8 @llvm.ctlz.i8(i8 %ld2, i1 0)
461 %ctlz3 = call i8 @llvm.ctlz.i8(i8 %ld3, i1 0)
462 %ctlz4 = call i8 @llvm.ctlz.i8(i8 %ld4, i1 0)
463 %ctlz5 = call i8 @llvm.ctlz.i8(i8 %ld5, i1 0)
464 %ctlz6 = call i8 @llvm.ctlz.i8(i8 %ld6, i1 0)
465 %ctlz7 = call i8 @llvm.ctlz.i8(i8 %ld7, i1 0)
466 %ctlz8 = call i8 @llvm.ctlz.i8(i8 %ld8, i1 0)
467 %ctlz9 = call i8 @llvm.ctlz.i8(i8 %ld9, i1 0)
468 %ctlz10 = call i8 @llvm.ctlz.i8(i8 %ld10, i1 0)
469 %ctlz11 = call i8 @llvm.ctlz.i8(i8 %ld11, i1 0)
470 %ctlz12 = call i8 @llvm.ctlz.i8(i8 %ld12, i1 0)
471 %ctlz13 = call i8 @llvm.ctlz.i8(i8 %ld13, i1 0)
472 %ctlz14 = call i8 @llvm.ctlz.i8(i8 %ld14, i1 0)
473 %ctlz15 = call i8 @llvm.ctlz.i8(i8 %ld15, i1 0)
474 %ctlz16 = call i8 @llvm.ctlz.i8(i8 %ld16, i1 0)
475 %ctlz17 = call i8 @llvm.ctlz.i8(i8 %ld17, i1 0)
476 %ctlz18 = call i8 @llvm.ctlz.i8(i8 %ld18, i1 0)
477 %ctlz19 = call i8 @llvm.ctlz.i8(i8 %ld19, i1 0)
478 %ctlz20 = call i8 @llvm.ctlz.i8(i8 %ld20, i1 0)
479 %ctlz21 = call i8 @llvm.ctlz.i8(i8 %ld21, i1 0)
480 %ctlz22 = call i8 @llvm.ctlz.i8(i8 %ld22, i1 0)
481 %ctlz23 = call i8 @llvm.ctlz.i8(i8 %ld23, i1 0)
482 %ctlz24 = call i8 @llvm.ctlz.i8(i8 %ld24, i1 0)
483 %ctlz25 = call i8 @llvm.ctlz.i8(i8 %ld25, i1 0)
484 %ctlz26 = call i8 @llvm.ctlz.i8(i8 %ld26, i1 0)
485 %ctlz27 = call i8 @llvm.ctlz.i8(i8 %ld27, i1 0)
486 %ctlz28 = call i8 @llvm.ctlz.i8(i8 %ld28, i1 0)
487 %ctlz29 = call i8 @llvm.ctlz.i8(i8 %ld29, i1 0)
488 %ctlz30 = call i8 @llvm.ctlz.i8(i8 %ld30, i1 0)
489 %ctlz31 = call i8 @llvm.ctlz.i8(i8 %ld31, i1 0)
490 store i8 %ctlz0 , ptr @dst8, align 1
491 store i8 %ctlz1 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 1), align 1
492 store i8 %ctlz2 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 2), align 1
493 store i8 %ctlz3 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 3), align 1
494 store i8 %ctlz4 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 4), align 1
495 store i8 %ctlz5 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 5), align 1
496 store i8 %ctlz6 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 6), align 1
497 store i8 %ctlz7 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 7), align 1
498 store i8 %ctlz8 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 8), align 1
499 store i8 %ctlz9 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 9), align 1
500 store i8 %ctlz10, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 10), align 1
501 store i8 %ctlz11, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 11), align 1
502 store i8 %ctlz12, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 12), align 1
503 store i8 %ctlz13, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 13), align 1
504 store i8 %ctlz14, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 14), align 1
505 store i8 %ctlz15, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 15), align 1
506 store i8 %ctlz16, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 16), align 1
507 store i8 %ctlz17, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 17), align 1
508 store i8 %ctlz18, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 18), align 1
509 store i8 %ctlz19, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 19), align 1
510 store i8 %ctlz20, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 20), align 1
511 store i8 %ctlz21, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 21), align 1
512 store i8 %ctlz22, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 22), align 1
513 store i8 %ctlz23, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 23), align 1
514 store i8 %ctlz24, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 24), align 1
515 store i8 %ctlz25, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 25), align 1
516 store i8 %ctlz26, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 26), align 1
517 store i8 %ctlz27, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 27), align 1
518 store i8 %ctlz28, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 28), align 1
519 store i8 %ctlz29, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 29), align 1
520 store i8 %ctlz30, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 30), align 1
521 store i8 %ctlz31, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 31), align 1
529 define void @ctlz_undef_2i64() #0 {
530 ; SSE-LABEL: @ctlz_undef_2i64(
531 ; SSE-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 8
532 ; SSE-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
533 ; SSE-NEXT: [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 true)
534 ; SSE-NEXT: [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 true)
535 ; SSE-NEXT: store i64 [[CTLZ0]], ptr @dst64, align 8
536 ; SSE-NEXT: store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
539 ; AVX1-LABEL: @ctlz_undef_2i64(
540 ; AVX1-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 8
541 ; AVX1-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
542 ; AVX1-NEXT: [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 true)
543 ; AVX1-NEXT: [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 true)
544 ; AVX1-NEXT: store i64 [[CTLZ0]], ptr @dst64, align 8
545 ; AVX1-NEXT: store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
546 ; AVX1-NEXT: ret void
548 ; AVX2-LABEL: @ctlz_undef_2i64(
549 ; AVX2-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 8
550 ; AVX2-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
551 ; AVX2-NEXT: [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 true)
552 ; AVX2-NEXT: [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 true)
553 ; AVX2-NEXT: store i64 [[CTLZ0]], ptr @dst64, align 8
554 ; AVX2-NEXT: store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
555 ; AVX2-NEXT: ret void
557 ; AVX512-LABEL: @ctlz_undef_2i64(
558 ; AVX512-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @src64, align 8
559 ; AVX512-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> [[TMP1]], i1 true)
560 ; AVX512-NEXT: store <2 x i64> [[TMP2]], ptr @dst64, align 8
561 ; AVX512-NEXT: ret void
563 %ld0 = load i64, ptr @src64, align 8
564 %ld1 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i32 0, i64 1), align 8
565 %ctlz0 = call i64 @llvm.ctlz.i64(i64 %ld0, i1 -1)
566 %ctlz1 = call i64 @llvm.ctlz.i64(i64 %ld1, i1 -1)
567 store i64 %ctlz0, ptr @dst64, align 8
568 store i64 %ctlz1, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i32 0, i64 1), align 8
572 define void @ctlz_undef_4i64() #0 {
573 ; SSE-LABEL: @ctlz_undef_4i64(
574 ; SSE-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 4
575 ; SSE-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4
576 ; SSE-NEXT: [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
577 ; SSE-NEXT: [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4
578 ; SSE-NEXT: [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 true)
579 ; SSE-NEXT: [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 true)
580 ; SSE-NEXT: [[CTLZ2:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD2]], i1 true)
581 ; SSE-NEXT: [[CTLZ3:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD3]], i1 true)
582 ; SSE-NEXT: store i64 [[CTLZ0]], ptr @dst64, align 4
583 ; SSE-NEXT: store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4
584 ; SSE-NEXT: store i64 [[CTLZ2]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
585 ; SSE-NEXT: store i64 [[CTLZ3]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4
588 ; AVX1-LABEL: @ctlz_undef_4i64(
589 ; AVX1-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 4
590 ; AVX1-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4
591 ; AVX1-NEXT: [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
592 ; AVX1-NEXT: [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4
593 ; AVX1-NEXT: [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 true)
594 ; AVX1-NEXT: [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 true)
595 ; AVX1-NEXT: [[CTLZ2:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD2]], i1 true)
596 ; AVX1-NEXT: [[CTLZ3:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD3]], i1 true)
597 ; AVX1-NEXT: store i64 [[CTLZ0]], ptr @dst64, align 4
598 ; AVX1-NEXT: store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4
599 ; AVX1-NEXT: store i64 [[CTLZ2]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
600 ; AVX1-NEXT: store i64 [[CTLZ3]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4
601 ; AVX1-NEXT: ret void
603 ; AVX2-LABEL: @ctlz_undef_4i64(
604 ; AVX2-NEXT: [[LD0:%.*]] = load i64, ptr @src64, align 4
605 ; AVX2-NEXT: [[LD1:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4
606 ; AVX2-NEXT: [[LD2:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
607 ; AVX2-NEXT: [[LD3:%.*]] = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4
608 ; AVX2-NEXT: [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 true)
609 ; AVX2-NEXT: [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 true)
610 ; AVX2-NEXT: [[CTLZ2:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD2]], i1 true)
611 ; AVX2-NEXT: [[CTLZ3:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD3]], i1 true)
612 ; AVX2-NEXT: store i64 [[CTLZ0]], ptr @dst64, align 4
613 ; AVX2-NEXT: store i64 [[CTLZ1]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4
614 ; AVX2-NEXT: store i64 [[CTLZ2]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
615 ; AVX2-NEXT: store i64 [[CTLZ3]], ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4
616 ; AVX2-NEXT: ret void
618 ; AVX512-LABEL: @ctlz_undef_4i64(
619 ; AVX512-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @src64, align 4
620 ; AVX512-NEXT: [[TMP2:%.*]] = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> [[TMP1]], i1 true)
621 ; AVX512-NEXT: store <4 x i64> [[TMP2]], ptr @dst64, align 4
622 ; AVX512-NEXT: ret void
624 %ld0 = load i64, ptr @src64, align 4
625 %ld1 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 1), align 4
626 %ld2 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 2), align 4
627 %ld3 = load i64, ptr getelementptr inbounds ([4 x i64], ptr @src64, i64 0, i64 3), align 4
628 %ctlz0 = call i64 @llvm.ctlz.i64(i64 %ld0, i1 -1)
629 %ctlz1 = call i64 @llvm.ctlz.i64(i64 %ld1, i1 -1)
630 %ctlz2 = call i64 @llvm.ctlz.i64(i64 %ld2, i1 -1)
631 %ctlz3 = call i64 @llvm.ctlz.i64(i64 %ld3, i1 -1)
632 store i64 %ctlz0, ptr @dst64, align 4
633 store i64 %ctlz1, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 1), align 4
634 store i64 %ctlz2, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 2), align 4
635 store i64 %ctlz3, ptr getelementptr inbounds ([4 x i64], ptr @dst64, i64 0, i64 3), align 4
639 define void @ctlz_undef_4i32() #0 {
640 ; SSE-LABEL: @ctlz_undef_4i32(
641 ; SSE-NEXT: [[LD0:%.*]] = load i32, ptr @src32, align 4
642 ; SSE-NEXT: [[LD1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 4
643 ; SSE-NEXT: [[LD2:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 4
644 ; SSE-NEXT: [[LD3:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 4
645 ; SSE-NEXT: [[CTLZ0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD0]], i1 true)
646 ; SSE-NEXT: [[CTLZ1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD1]], i1 true)
647 ; SSE-NEXT: [[CTLZ2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD2]], i1 true)
648 ; SSE-NEXT: [[CTLZ3:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD3]], i1 true)
649 ; SSE-NEXT: store i32 [[CTLZ0]], ptr @dst32, align 4
650 ; SSE-NEXT: store i32 [[CTLZ1]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 4
651 ; SSE-NEXT: store i32 [[CTLZ2]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 4
652 ; SSE-NEXT: store i32 [[CTLZ3]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 4
655 ; AVX1-LABEL: @ctlz_undef_4i32(
656 ; AVX1-NEXT: [[LD0:%.*]] = load i32, ptr @src32, align 4
657 ; AVX1-NEXT: [[LD1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 4
658 ; AVX1-NEXT: [[LD2:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 4
659 ; AVX1-NEXT: [[LD3:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 4
660 ; AVX1-NEXT: [[CTLZ0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD0]], i1 true)
661 ; AVX1-NEXT: [[CTLZ1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD1]], i1 true)
662 ; AVX1-NEXT: [[CTLZ2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD2]], i1 true)
663 ; AVX1-NEXT: [[CTLZ3:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD3]], i1 true)
664 ; AVX1-NEXT: store i32 [[CTLZ0]], ptr @dst32, align 4
665 ; AVX1-NEXT: store i32 [[CTLZ1]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 4
666 ; AVX1-NEXT: store i32 [[CTLZ2]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 4
667 ; AVX1-NEXT: store i32 [[CTLZ3]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 4
668 ; AVX1-NEXT: ret void
670 ; AVX2-LABEL: @ctlz_undef_4i32(
671 ; AVX2-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 4
672 ; AVX2-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[TMP1]], i1 true)
673 ; AVX2-NEXT: store <4 x i32> [[TMP2]], ptr @dst32, align 4
674 ; AVX2-NEXT: ret void
676 ; AVX512-LABEL: @ctlz_undef_4i32(
677 ; AVX512-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @src32, align 4
678 ; AVX512-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[TMP1]], i1 true)
679 ; AVX512-NEXT: store <4 x i32> [[TMP2]], ptr @dst32, align 4
680 ; AVX512-NEXT: ret void
682 %ld0 = load i32, ptr @src32, align 4
683 %ld1 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 4
684 %ld2 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 4
685 %ld3 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 4
686 %ctlz0 = call i32 @llvm.ctlz.i32(i32 %ld0, i1 -1)
687 %ctlz1 = call i32 @llvm.ctlz.i32(i32 %ld1, i1 -1)
688 %ctlz2 = call i32 @llvm.ctlz.i32(i32 %ld2, i1 -1)
689 %ctlz3 = call i32 @llvm.ctlz.i32(i32 %ld3, i1 -1)
690 store i32 %ctlz0, ptr @dst32, align 4
691 store i32 %ctlz1, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 4
692 store i32 %ctlz2, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 4
693 store i32 %ctlz3, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 4
697 define void @ctlz_undef_8i32() #0 {
698 ; SSE-LABEL: @ctlz_undef_8i32(
699 ; SSE-NEXT: [[LD0:%.*]] = load i32, ptr @src32, align 2
700 ; SSE-NEXT: [[LD1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 2
701 ; SSE-NEXT: [[LD2:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 2
702 ; SSE-NEXT: [[LD3:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 2
703 ; SSE-NEXT: [[LD4:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 4), align 2
704 ; SSE-NEXT: [[LD5:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 5), align 2
705 ; SSE-NEXT: [[LD6:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 6), align 2
706 ; SSE-NEXT: [[LD7:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 7), align 2
707 ; SSE-NEXT: [[CTLZ0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD0]], i1 true)
708 ; SSE-NEXT: [[CTLZ1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD1]], i1 true)
709 ; SSE-NEXT: [[CTLZ2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD2]], i1 true)
710 ; SSE-NEXT: [[CTLZ3:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD3]], i1 true)
711 ; SSE-NEXT: [[CTLZ4:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD4]], i1 true)
712 ; SSE-NEXT: [[CTLZ5:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD5]], i1 true)
713 ; SSE-NEXT: [[CTLZ6:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD6]], i1 true)
714 ; SSE-NEXT: [[CTLZ7:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD7]], i1 true)
715 ; SSE-NEXT: store i32 [[CTLZ0]], ptr @dst32, align 2
716 ; SSE-NEXT: store i32 [[CTLZ1]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 2
717 ; SSE-NEXT: store i32 [[CTLZ2]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 2
718 ; SSE-NEXT: store i32 [[CTLZ3]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 2
719 ; SSE-NEXT: store i32 [[CTLZ4]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 4), align 2
720 ; SSE-NEXT: store i32 [[CTLZ5]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 5), align 2
721 ; SSE-NEXT: store i32 [[CTLZ6]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 6), align 2
722 ; SSE-NEXT: store i32 [[CTLZ7]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 7), align 2
725 ; AVX1-LABEL: @ctlz_undef_8i32(
726 ; AVX1-NEXT: [[LD0:%.*]] = load i32, ptr @src32, align 2
727 ; AVX1-NEXT: [[LD1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 2
728 ; AVX1-NEXT: [[LD2:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 2
729 ; AVX1-NEXT: [[LD3:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 2
730 ; AVX1-NEXT: [[LD4:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 4), align 2
731 ; AVX1-NEXT: [[LD5:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 5), align 2
732 ; AVX1-NEXT: [[LD6:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 6), align 2
733 ; AVX1-NEXT: [[LD7:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 7), align 2
734 ; AVX1-NEXT: [[CTLZ0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD0]], i1 true)
735 ; AVX1-NEXT: [[CTLZ1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD1]], i1 true)
736 ; AVX1-NEXT: [[CTLZ2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD2]], i1 true)
737 ; AVX1-NEXT: [[CTLZ3:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD3]], i1 true)
738 ; AVX1-NEXT: [[CTLZ4:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD4]], i1 true)
739 ; AVX1-NEXT: [[CTLZ5:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD5]], i1 true)
740 ; AVX1-NEXT: [[CTLZ6:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD6]], i1 true)
741 ; AVX1-NEXT: [[CTLZ7:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD7]], i1 true)
742 ; AVX1-NEXT: store i32 [[CTLZ0]], ptr @dst32, align 2
743 ; AVX1-NEXT: store i32 [[CTLZ1]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 2
744 ; AVX1-NEXT: store i32 [[CTLZ2]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 2
745 ; AVX1-NEXT: store i32 [[CTLZ3]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 2
746 ; AVX1-NEXT: store i32 [[CTLZ4]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 4), align 2
747 ; AVX1-NEXT: store i32 [[CTLZ5]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 5), align 2
748 ; AVX1-NEXT: store i32 [[CTLZ6]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 6), align 2
749 ; AVX1-NEXT: store i32 [[CTLZ7]], ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 7), align 2
750 ; AVX1-NEXT: ret void
752 ; AVX2-LABEL: @ctlz_undef_8i32(
753 ; AVX2-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 2
754 ; AVX2-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> [[TMP1]], i1 true)
755 ; AVX2-NEXT: store <8 x i32> [[TMP2]], ptr @dst32, align 2
756 ; AVX2-NEXT: ret void
758 ; AVX512-LABEL: @ctlz_undef_8i32(
759 ; AVX512-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @src32, align 2
760 ; AVX512-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> [[TMP1]], i1 true)
761 ; AVX512-NEXT: store <8 x i32> [[TMP2]], ptr @dst32, align 2
762 ; AVX512-NEXT: ret void
764 %ld0 = load i32, ptr @src32, align 2
765 %ld1 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 1), align 2
766 %ld2 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 2), align 2
767 %ld3 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 3), align 2
768 %ld4 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 4), align 2
769 %ld5 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 5), align 2
770 %ld6 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 6), align 2
771 %ld7 = load i32, ptr getelementptr inbounds ([8 x i32], ptr @src32, i32 0, i64 7), align 2
772 %ctlz0 = call i32 @llvm.ctlz.i32(i32 %ld0, i1 -1)
773 %ctlz1 = call i32 @llvm.ctlz.i32(i32 %ld1, i1 -1)
774 %ctlz2 = call i32 @llvm.ctlz.i32(i32 %ld2, i1 -1)
775 %ctlz3 = call i32 @llvm.ctlz.i32(i32 %ld3, i1 -1)
776 %ctlz4 = call i32 @llvm.ctlz.i32(i32 %ld4, i1 -1)
777 %ctlz5 = call i32 @llvm.ctlz.i32(i32 %ld5, i1 -1)
778 %ctlz6 = call i32 @llvm.ctlz.i32(i32 %ld6, i1 -1)
779 %ctlz7 = call i32 @llvm.ctlz.i32(i32 %ld7, i1 -1)
780 store i32 %ctlz0, ptr @dst32, align 2
781 store i32 %ctlz1, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 1), align 2
782 store i32 %ctlz2, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 2), align 2
783 store i32 %ctlz3, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 3), align 2
784 store i32 %ctlz4, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 4), align 2
785 store i32 %ctlz5, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 5), align 2
786 store i32 %ctlz6, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 6), align 2
787 store i32 %ctlz7, ptr getelementptr inbounds ([8 x i32], ptr @dst32, i32 0, i64 7), align 2
791 define void @ctlz_undef_8i16() #0 {
792 ; CHECK-LABEL: @ctlz_undef_8i16(
793 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 2
794 ; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[TMP1]], i1 true)
795 ; CHECK-NEXT: store <8 x i16> [[TMP2]], ptr @dst16, align 2
796 ; CHECK-NEXT: ret void
798 %ld0 = load i16, ptr @src16, align 2
799 %ld1 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 1), align 2
800 %ld2 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 2), align 2
801 %ld3 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 3), align 2
802 %ld4 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 4), align 2
803 %ld5 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 5), align 2
804 %ld6 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 6), align 2
805 %ld7 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 7), align 2
806 %ctlz0 = call i16 @llvm.ctlz.i16(i16 %ld0, i1 -1)
807 %ctlz1 = call i16 @llvm.ctlz.i16(i16 %ld1, i1 -1)
808 %ctlz2 = call i16 @llvm.ctlz.i16(i16 %ld2, i1 -1)
809 %ctlz3 = call i16 @llvm.ctlz.i16(i16 %ld3, i1 -1)
810 %ctlz4 = call i16 @llvm.ctlz.i16(i16 %ld4, i1 -1)
811 %ctlz5 = call i16 @llvm.ctlz.i16(i16 %ld5, i1 -1)
812 %ctlz6 = call i16 @llvm.ctlz.i16(i16 %ld6, i1 -1)
813 %ctlz7 = call i16 @llvm.ctlz.i16(i16 %ld7, i1 -1)
814 store i16 %ctlz0, ptr @dst16, align 2
815 store i16 %ctlz1, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 1), align 2
816 store i16 %ctlz2, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 2), align 2
817 store i16 %ctlz3, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 3), align 2
818 store i16 %ctlz4, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 4), align 2
819 store i16 %ctlz5, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 5), align 2
820 store i16 %ctlz6, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 6), align 2
821 store i16 %ctlz7, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 7), align 2
825 define void @ctlz_undef_16i16() #0 {
826 ; SSE-LABEL: @ctlz_undef_16i16(
827 ; SSE-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @src16, align 2
828 ; SSE-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[TMP1]], i1 true)
829 ; SSE-NEXT: store <8 x i16> [[TMP2]], ptr @dst16, align 2
830 ; SSE-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 8), align 2
831 ; SSE-NEXT: [[TMP4:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[TMP3]], i1 true)
832 ; SSE-NEXT: store <8 x i16> [[TMP4]], ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 8), align 2
835 ; AVX-LABEL: @ctlz_undef_16i16(
836 ; AVX-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @src16, align 2
837 ; AVX-NEXT: [[TMP2:%.*]] = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> [[TMP1]], i1 true)
838 ; AVX-NEXT: store <16 x i16> [[TMP2]], ptr @dst16, align 2
841 %ld0 = load i16, ptr @src16, align 2
842 %ld1 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 1), align 2
843 %ld2 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 2), align 2
844 %ld3 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 3), align 2
845 %ld4 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 4), align 2
846 %ld5 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 5), align 2
847 %ld6 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 6), align 2
848 %ld7 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 7), align 2
849 %ld8 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 8), align 2
850 %ld9 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 9), align 2
851 %ld10 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 10), align 2
852 %ld11 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 11), align 2
853 %ld12 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 12), align 2
854 %ld13 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 13), align 2
855 %ld14 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 14), align 2
856 %ld15 = load i16, ptr getelementptr inbounds ([16 x i16], ptr @src16, i16 0, i64 15), align 2
857 %ctlz0 = call i16 @llvm.ctlz.i16(i16 %ld0, i1 -1)
858 %ctlz1 = call i16 @llvm.ctlz.i16(i16 %ld1, i1 -1)
859 %ctlz2 = call i16 @llvm.ctlz.i16(i16 %ld2, i1 -1)
860 %ctlz3 = call i16 @llvm.ctlz.i16(i16 %ld3, i1 -1)
861 %ctlz4 = call i16 @llvm.ctlz.i16(i16 %ld4, i1 -1)
862 %ctlz5 = call i16 @llvm.ctlz.i16(i16 %ld5, i1 -1)
863 %ctlz6 = call i16 @llvm.ctlz.i16(i16 %ld6, i1 -1)
864 %ctlz7 = call i16 @llvm.ctlz.i16(i16 %ld7, i1 -1)
865 %ctlz8 = call i16 @llvm.ctlz.i16(i16 %ld8, i1 -1)
866 %ctlz9 = call i16 @llvm.ctlz.i16(i16 %ld9, i1 -1)
867 %ctlz10 = call i16 @llvm.ctlz.i16(i16 %ld10, i1 -1)
868 %ctlz11 = call i16 @llvm.ctlz.i16(i16 %ld11, i1 -1)
869 %ctlz12 = call i16 @llvm.ctlz.i16(i16 %ld12, i1 -1)
870 %ctlz13 = call i16 @llvm.ctlz.i16(i16 %ld13, i1 -1)
871 %ctlz14 = call i16 @llvm.ctlz.i16(i16 %ld14, i1 -1)
872 %ctlz15 = call i16 @llvm.ctlz.i16(i16 %ld15, i1 -1)
873 store i16 %ctlz0 , ptr @dst16, align 2
874 store i16 %ctlz1 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 1), align 2
875 store i16 %ctlz2 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 2), align 2
876 store i16 %ctlz3 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 3), align 2
877 store i16 %ctlz4 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 4), align 2
878 store i16 %ctlz5 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 5), align 2
879 store i16 %ctlz6 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 6), align 2
880 store i16 %ctlz7 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 7), align 2
881 store i16 %ctlz8 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 8), align 2
882 store i16 %ctlz9 , ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 9), align 2
883 store i16 %ctlz10, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 10), align 2
884 store i16 %ctlz11, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 11), align 2
885 store i16 %ctlz12, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 12), align 2
886 store i16 %ctlz13, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 13), align 2
887 store i16 %ctlz14, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 14), align 2
888 store i16 %ctlz15, ptr getelementptr inbounds ([16 x i16], ptr @dst16, i16 0, i64 15), align 2
892 define void @ctlz_undef_16i8() #0 {
893 ; CHECK-LABEL: @ctlz_undef_16i8(
894 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @src8, align 1
895 ; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> [[TMP1]], i1 true)
896 ; CHECK-NEXT: store <16 x i8> [[TMP2]], ptr @dst8, align 1
897 ; CHECK-NEXT: ret void
899 %ld0 = load i8, ptr @src8, align 1
900 %ld1 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 1), align 1
901 %ld2 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 2), align 1
902 %ld3 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 3), align 1
903 %ld4 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 4), align 1
904 %ld5 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 5), align 1
905 %ld6 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 6), align 1
906 %ld7 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 7), align 1
907 %ld8 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 8), align 1
908 %ld9 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 9), align 1
909 %ld10 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 10), align 1
910 %ld11 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 11), align 1
911 %ld12 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 12), align 1
912 %ld13 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 13), align 1
913 %ld14 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 14), align 1
914 %ld15 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 15), align 1
915 %ctlz0 = call i8 @llvm.ctlz.i8(i8 %ld0, i1 -1)
916 %ctlz1 = call i8 @llvm.ctlz.i8(i8 %ld1, i1 -1)
917 %ctlz2 = call i8 @llvm.ctlz.i8(i8 %ld2, i1 -1)
918 %ctlz3 = call i8 @llvm.ctlz.i8(i8 %ld3, i1 -1)
919 %ctlz4 = call i8 @llvm.ctlz.i8(i8 %ld4, i1 -1)
920 %ctlz5 = call i8 @llvm.ctlz.i8(i8 %ld5, i1 -1)
921 %ctlz6 = call i8 @llvm.ctlz.i8(i8 %ld6, i1 -1)
922 %ctlz7 = call i8 @llvm.ctlz.i8(i8 %ld7, i1 -1)
923 %ctlz8 = call i8 @llvm.ctlz.i8(i8 %ld8, i1 -1)
924 %ctlz9 = call i8 @llvm.ctlz.i8(i8 %ld9, i1 -1)
925 %ctlz10 = call i8 @llvm.ctlz.i8(i8 %ld10, i1 -1)
926 %ctlz11 = call i8 @llvm.ctlz.i8(i8 %ld11, i1 -1)
927 %ctlz12 = call i8 @llvm.ctlz.i8(i8 %ld12, i1 -1)
928 %ctlz13 = call i8 @llvm.ctlz.i8(i8 %ld13, i1 -1)
929 %ctlz14 = call i8 @llvm.ctlz.i8(i8 %ld14, i1 -1)
930 %ctlz15 = call i8 @llvm.ctlz.i8(i8 %ld15, i1 -1)
931 store i8 %ctlz0 , ptr @dst8, align 1
932 store i8 %ctlz1 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 1), align 1
933 store i8 %ctlz2 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 2), align 1
934 store i8 %ctlz3 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 3), align 1
935 store i8 %ctlz4 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 4), align 1
936 store i8 %ctlz5 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 5), align 1
937 store i8 %ctlz6 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 6), align 1
938 store i8 %ctlz7 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 7), align 1
939 store i8 %ctlz8 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 8), align 1
940 store i8 %ctlz9 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 9), align 1
941 store i8 %ctlz10, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 10), align 1
942 store i8 %ctlz11, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 11), align 1
943 store i8 %ctlz12, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 12), align 1
944 store i8 %ctlz13, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 13), align 1
945 store i8 %ctlz14, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 14), align 1
946 store i8 %ctlz15, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 15), align 1
950 define void @ctlz_undef_32i8() #0 {
951 ; SSE-LABEL: @ctlz_undef_32i8(
952 ; SSE-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @src8, align 1
953 ; SSE-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> [[TMP1]], i1 true)
954 ; SSE-NEXT: store <16 x i8> [[TMP2]], ptr @dst8, align 1
955 ; SSE-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 16), align 1
956 ; SSE-NEXT: [[TMP4:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> [[TMP3]], i1 true)
957 ; SSE-NEXT: store <16 x i8> [[TMP4]], ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 16), align 1
960 ; AVX-LABEL: @ctlz_undef_32i8(
961 ; AVX-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @src8, align 1
962 ; AVX-NEXT: [[TMP2:%.*]] = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> [[TMP1]], i1 true)
963 ; AVX-NEXT: store <32 x i8> [[TMP2]], ptr @dst8, align 1
966 %ld0 = load i8, ptr @src8, align 1
967 %ld1 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 1), align 1
968 %ld2 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 2), align 1
969 %ld3 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 3), align 1
970 %ld4 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 4), align 1
971 %ld5 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 5), align 1
972 %ld6 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 6), align 1
973 %ld7 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 7), align 1
974 %ld8 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 8), align 1
975 %ld9 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 9), align 1
976 %ld10 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 10), align 1
977 %ld11 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 11), align 1
978 %ld12 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 12), align 1
979 %ld13 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 13), align 1
980 %ld14 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 14), align 1
981 %ld15 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 15), align 1
982 %ld16 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 16), align 1
983 %ld17 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 17), align 1
984 %ld18 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 18), align 1
985 %ld19 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 19), align 1
986 %ld20 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 20), align 1
987 %ld21 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 21), align 1
988 %ld22 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 22), align 1
989 %ld23 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 23), align 1
990 %ld24 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 24), align 1
991 %ld25 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 25), align 1
992 %ld26 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 26), align 1
993 %ld27 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 27), align 1
994 %ld28 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 28), align 1
995 %ld29 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 29), align 1
996 %ld30 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 30), align 1
997 %ld31 = load i8, ptr getelementptr inbounds ([32 x i8], ptr @src8, i8 0, i64 31), align 1
998 %ctlz0 = call i8 @llvm.ctlz.i8(i8 %ld0, i1 -1)
999 %ctlz1 = call i8 @llvm.ctlz.i8(i8 %ld1, i1 -1)
1000 %ctlz2 = call i8 @llvm.ctlz.i8(i8 %ld2, i1 -1)
1001 %ctlz3 = call i8 @llvm.ctlz.i8(i8 %ld3, i1 -1)
1002 %ctlz4 = call i8 @llvm.ctlz.i8(i8 %ld4, i1 -1)
1003 %ctlz5 = call i8 @llvm.ctlz.i8(i8 %ld5, i1 -1)
1004 %ctlz6 = call i8 @llvm.ctlz.i8(i8 %ld6, i1 -1)
1005 %ctlz7 = call i8 @llvm.ctlz.i8(i8 %ld7, i1 -1)
1006 %ctlz8 = call i8 @llvm.ctlz.i8(i8 %ld8, i1 -1)
1007 %ctlz9 = call i8 @llvm.ctlz.i8(i8 %ld9, i1 -1)
1008 %ctlz10 = call i8 @llvm.ctlz.i8(i8 %ld10, i1 -1)
1009 %ctlz11 = call i8 @llvm.ctlz.i8(i8 %ld11, i1 -1)
1010 %ctlz12 = call i8 @llvm.ctlz.i8(i8 %ld12, i1 -1)
1011 %ctlz13 = call i8 @llvm.ctlz.i8(i8 %ld13, i1 -1)
1012 %ctlz14 = call i8 @llvm.ctlz.i8(i8 %ld14, i1 -1)
1013 %ctlz15 = call i8 @llvm.ctlz.i8(i8 %ld15, i1 -1)
1014 %ctlz16 = call i8 @llvm.ctlz.i8(i8 %ld16, i1 -1)
1015 %ctlz17 = call i8 @llvm.ctlz.i8(i8 %ld17, i1 -1)
1016 %ctlz18 = call i8 @llvm.ctlz.i8(i8 %ld18, i1 -1)
1017 %ctlz19 = call i8 @llvm.ctlz.i8(i8 %ld19, i1 -1)
1018 %ctlz20 = call i8 @llvm.ctlz.i8(i8 %ld20, i1 -1)
1019 %ctlz21 = call i8 @llvm.ctlz.i8(i8 %ld21, i1 -1)
1020 %ctlz22 = call i8 @llvm.ctlz.i8(i8 %ld22, i1 -1)
1021 %ctlz23 = call i8 @llvm.ctlz.i8(i8 %ld23, i1 -1)
1022 %ctlz24 = call i8 @llvm.ctlz.i8(i8 %ld24, i1 -1)
1023 %ctlz25 = call i8 @llvm.ctlz.i8(i8 %ld25, i1 -1)
1024 %ctlz26 = call i8 @llvm.ctlz.i8(i8 %ld26, i1 -1)
1025 %ctlz27 = call i8 @llvm.ctlz.i8(i8 %ld27, i1 -1)
1026 %ctlz28 = call i8 @llvm.ctlz.i8(i8 %ld28, i1 -1)
1027 %ctlz29 = call i8 @llvm.ctlz.i8(i8 %ld29, i1 -1)
1028 %ctlz30 = call i8 @llvm.ctlz.i8(i8 %ld30, i1 -1)
1029 %ctlz31 = call i8 @llvm.ctlz.i8(i8 %ld31, i1 -1)
1030 store i8 %ctlz0 , ptr @dst8, align 1
1031 store i8 %ctlz1 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 1), align 1
1032 store i8 %ctlz2 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 2), align 1
1033 store i8 %ctlz3 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 3), align 1
1034 store i8 %ctlz4 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 4), align 1
1035 store i8 %ctlz5 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 5), align 1
1036 store i8 %ctlz6 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 6), align 1
1037 store i8 %ctlz7 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 7), align 1
1038 store i8 %ctlz8 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 8), align 1
1039 store i8 %ctlz9 , ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 9), align 1
1040 store i8 %ctlz10, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 10), align 1
1041 store i8 %ctlz11, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 11), align 1
1042 store i8 %ctlz12, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 12), align 1
1043 store i8 %ctlz13, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 13), align 1
1044 store i8 %ctlz14, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 14), align 1
1045 store i8 %ctlz15, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 15), align 1
1046 store i8 %ctlz16, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 16), align 1
1047 store i8 %ctlz17, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 17), align 1
1048 store i8 %ctlz18, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 18), align 1
1049 store i8 %ctlz19, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 19), align 1
1050 store i8 %ctlz20, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 20), align 1
1051 store i8 %ctlz21, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 21), align 1
1052 store i8 %ctlz22, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 22), align 1
1053 store i8 %ctlz23, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 23), align 1
1054 store i8 %ctlz24, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 24), align 1
1055 store i8 %ctlz25, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 25), align 1
1056 store i8 %ctlz26, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 26), align 1
1057 store i8 %ctlz27, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 27), align 1
1058 store i8 %ctlz28, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 28), align 1
1059 store i8 %ctlz29, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 29), align 1
1060 store i8 %ctlz30, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 30), align 1
1061 store i8 %ctlz31, ptr getelementptr inbounds ([32 x i8], ptr @dst8, i8 0, i64 31), align 1
1065 attributes #0 = { nounwind }