1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt %s -S -passes=msan 2>&1 | FileCheck %s
4 target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
5 target triple = "x86_64-unknown-linux-gnu"
7 define <16 x i16> @test_x86_avx2_packssdw(<8 x i32> %a0, <8 x i32> %a1) #0 {
8 ; CHECK-LABEL: @test_x86_avx2_packssdw(
9 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
10 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
11 ; CHECK-NEXT: call void @llvm.donothing()
12 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i32> [[TMP1]], zeroinitializer
13 ; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i32>
14 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i32> [[TMP2]], zeroinitializer
15 ; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i32>
16 ; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[TMP4]], <8 x i32> [[TMP6]])
17 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]])
18 ; CHECK-NEXT: store <16 x i16> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8
19 ; CHECK-NEXT: ret <16 x i16> [[RES]]
21 %res = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a0, <8 x i32> %a1) ; <<16 x i16>> [#uses=1]
24 declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone
27 define <16 x i16> @test_x86_avx2_packssdw_fold() #0 {
28 ; CHECK-LABEL: @test_x86_avx2_packssdw_fold(
29 ; CHECK-NEXT: call void @llvm.donothing()
30 ; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> zeroinitializer, <8 x i32> zeroinitializer)
31 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> zeroinitializer, <8 x i32> <i32 255, i32 32767, i32 65535, i32 -1, i32 -32767, i32 -65535, i32 0, i32 -256>)
32 ; CHECK-NEXT: store <16 x i16> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8
33 ; CHECK-NEXT: ret <16 x i16> [[RES]]
35 %res = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> zeroinitializer, <8 x i32> <i32 255, i32 32767, i32 65535, i32 -1, i32 -32767, i32 -65535, i32 0, i32 -256>)
40 define <32 x i8> @test_x86_avx2_packsswb(<16 x i16> %a0, <16 x i16> %a1) #0 {
41 ; CHECK-LABEL: @test_x86_avx2_packsswb(
42 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
43 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
44 ; CHECK-NEXT: call void @llvm.donothing()
45 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <16 x i16> [[TMP1]], zeroinitializer
46 ; CHECK-NEXT: [[TMP4:%.*]] = sext <16 x i1> [[TMP3]] to <16 x i16>
47 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i16> [[TMP2]], zeroinitializer
48 ; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i16>
49 ; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> [[TMP4]], <16 x i16> [[TMP6]])
50 ; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
51 ; CHECK-NEXT: store <32 x i8> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8
52 ; CHECK-NEXT: ret <32 x i8> [[RES]]
54 %res = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a0, <16 x i16> %a1) ; <<32 x i8>> [#uses=1]
57 declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone
60 define <32 x i8> @test_x86_avx2_packsswb_fold() #0 {
61 ; CHECK-LABEL: @test_x86_avx2_packsswb_fold(
62 ; CHECK-NEXT: call void @llvm.donothing()
63 ; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> zeroinitializer, <16 x i16> zeroinitializer)
64 ; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> <i16 0, i16 255, i16 256, i16 -1, i16 -1, i16 -255, i16 -256, i16 -32678, i16 0, i16 255, i16 256, i16 -1, i16 -1, i16 -255, i16 -256, i16 -32678>, <16 x i16> zeroinitializer)
65 ; CHECK-NEXT: store <32 x i8> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8
66 ; CHECK-NEXT: ret <32 x i8> [[RES]]
68 %res = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> <i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678, i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678>, <16 x i16> zeroinitializer)
73 define <32 x i8> @test_x86_avx2_packuswb(<16 x i16> %a0, <16 x i16> %a1) #0 {
74 ; CHECK-LABEL: @test_x86_avx2_packuswb(
75 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
76 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
77 ; CHECK-NEXT: call void @llvm.donothing()
78 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <16 x i16> [[TMP1]], zeroinitializer
79 ; CHECK-NEXT: [[TMP4:%.*]] = sext <16 x i1> [[TMP3]] to <16 x i16>
80 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i16> [[TMP2]], zeroinitializer
81 ; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i16>
82 ; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> [[TMP4]], <16 x i16> [[TMP6]])
83 ; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
84 ; CHECK-NEXT: store <32 x i8> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8
85 ; CHECK-NEXT: ret <32 x i8> [[RES]]
87 %res = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a0, <16 x i16> %a1) ; <<32 x i8>> [#uses=1]
90 declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone
93 define <32 x i8> @test_x86_avx2_packuswb_fold() #0 {
94 ; CHECK-LABEL: @test_x86_avx2_packuswb_fold(
95 ; CHECK-NEXT: call void @llvm.donothing()
96 ; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> zeroinitializer, <16 x i16> zeroinitializer)
97 ; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> <i16 0, i16 255, i16 256, i16 -1, i16 -1, i16 -255, i16 -256, i16 -32678, i16 0, i16 255, i16 256, i16 -1, i16 -1, i16 -255, i16 -256, i16 -32678>, <16 x i16> zeroinitializer)
98 ; CHECK-NEXT: store <32 x i8> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8
99 ; CHECK-NEXT: ret <32 x i8> [[RES]]
101 %res = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> <i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678, i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678>, <16 x i16> zeroinitializer)
106 define <32 x i8> @test_x86_avx2_pavg_b(<32 x i8> %a0, <32 x i8> %a1) #0 {
107 ; CHECK-LABEL: @test_x86_avx2_pavg_b(
108 ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8
109 ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
110 ; CHECK-NEXT: call void @llvm.donothing()
111 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]]
112 ; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]])
113 ; CHECK-NEXT: store <32 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
114 ; CHECK-NEXT: ret <32 x i8> [[RES]]
116 %res = call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
119 declare <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8>, <32 x i8>) nounwind readnone
122 define <16 x i16> @test_x86_avx2_pavg_w(<16 x i16> %a0, <16 x i16> %a1) #0 {
123 ; CHECK-LABEL: @test_x86_avx2_pavg_w(
124 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
125 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
126 ; CHECK-NEXT: call void @llvm.donothing()
127 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]]
128 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
129 ; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
130 ; CHECK-NEXT: ret <16 x i16> [[RES]]
132 %res = call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
135 declare <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16>, <16 x i16>) nounwind readnone
138 define <8 x i32> @test_x86_avx2_pmadd_wd(<16 x i16> %a0, <16 x i16> %a1) #0 {
139 ; CHECK-LABEL: @test_x86_avx2_pmadd_wd(
140 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
141 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
142 ; CHECK-NEXT: call void @llvm.donothing()
143 ; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]]
144 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i16> [[TMP3]] to <8 x i32>
145 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i32> [[TMP4]], zeroinitializer
146 ; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i32>
147 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
148 ; CHECK-NEXT: store <8 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8
149 ; CHECK-NEXT: ret <8 x i32> [[RES]]
151 %res = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %a0, <16 x i16> %a1) ; <<8 x i32>> [#uses=1]
154 declare <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>) nounwind readnone
157 define i32 @test_x86_avx2_pmovmskb(<32 x i8> %a0) #0 {
158 ; CHECK-LABEL: @test_x86_avx2_pmovmskb(
159 ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8
160 ; CHECK-NEXT: call void @llvm.donothing()
161 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <32 x i8> [[TMP1]] to i256
162 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0
163 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0:![0-9]+]]
165 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6:[0-9]+]]
166 ; CHECK-NEXT: unreachable
168 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> [[A0:%.*]])
169 ; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
170 ; CHECK-NEXT: ret i32 [[RES]]
172 %res = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> %a0) ; <i32> [#uses=1]
175 declare i32 @llvm.x86.avx2.pmovmskb(<32 x i8>) nounwind readnone
178 define <16 x i16> @test_x86_avx2_pmulh_w(<16 x i16> %a0, <16 x i16> %a1) #0 {
179 ; CHECK-LABEL: @test_x86_avx2_pmulh_w(
180 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
181 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
182 ; CHECK-NEXT: call void @llvm.donothing()
183 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]]
184 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
185 ; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
186 ; CHECK-NEXT: ret <16 x i16> [[RES]]
188 %res = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
191 declare <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16>, <16 x i16>) nounwind readnone
194 define <16 x i16> @test_x86_avx2_pmulhu_w(<16 x i16> %a0, <16 x i16> %a1) #0 {
195 ; CHECK-LABEL: @test_x86_avx2_pmulhu_w(
196 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
197 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
198 ; CHECK-NEXT: call void @llvm.donothing()
199 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]]
200 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
201 ; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
202 ; CHECK-NEXT: ret <16 x i16> [[RES]]
204 %res = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
207 declare <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16>, <16 x i16>) nounwind readnone
210 define <4 x i64> @test_x86_avx2_psad_bw(<32 x i8> %a0, <32 x i8> %a1) #0 {
211 ; CHECK-LABEL: @test_x86_avx2_psad_bw(
212 ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8
213 ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
214 ; CHECK-NEXT: call void @llvm.donothing()
215 ; CHECK-NEXT: [[TMP3:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]]
216 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i8> [[TMP3]] to <4 x i64>
217 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i64> [[TMP4]], zeroinitializer
218 ; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i64>
219 ; CHECK-NEXT: [[TMP7:%.*]] = lshr <4 x i64> [[TMP6]], <i64 48, i64 48, i64 48, i64 48>
220 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]])
221 ; CHECK-NEXT: store <4 x i64> [[TMP7]], ptr @__msan_retval_tls, align 8
222 ; CHECK-NEXT: ret <4 x i64> [[RES]]
224 %res = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %a0, <32 x i8> %a1) ; <<4 x i64>> [#uses=1]
227 declare <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8>, <32 x i8>) nounwind readnone
230 define <8 x i32> @test_x86_avx2_psll_d(<8 x i32> %a0, <4 x i32> %a1) #0 {
231 ; CHECK-LABEL: @test_x86_avx2_psll_d(
232 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
233 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
234 ; CHECK-NEXT: call void @llvm.donothing()
235 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
236 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
237 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
238 ; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i256
239 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i256 [[TMP6]] to <8 x i32>
240 ; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
241 ; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i32> [[TMP8]], [[TMP7]]
242 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> [[A0:%.*]], <4 x i32> [[A1]])
243 ; CHECK-NEXT: store <8 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8
244 ; CHECK-NEXT: ret <8 x i32> [[RES]]
246 %res = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1]
249 declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone
252 define <4 x i64> @test_x86_avx2_psll_q(<4 x i64> %a0, <2 x i64> %a1) #0 {
253 ; CHECK-LABEL: @test_x86_avx2_psll_q(
254 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
255 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
256 ; CHECK-NEXT: call void @llvm.donothing()
257 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
258 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
259 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
260 ; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i256
261 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i256 [[TMP6]] to <4 x i64>
262 ; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> [[TMP1]], <2 x i64> [[A1:%.*]])
263 ; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i64> [[TMP8]], [[TMP7]]
264 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> [[A0:%.*]], <2 x i64> [[A1]])
265 ; CHECK-NEXT: store <4 x i64> [[TMP9]], ptr @__msan_retval_tls, align 8
266 ; CHECK-NEXT: ret <4 x i64> [[RES]]
268 %res = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1]
271 declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) nounwind readnone
274 define <16 x i16> @test_x86_avx2_psll_w(<16 x i16> %a0, <8 x i16> %a1) #0 {
275 ; CHECK-LABEL: @test_x86_avx2_psll_w(
276 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
277 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
278 ; CHECK-NEXT: call void @llvm.donothing()
279 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
280 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
281 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
282 ; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i256
283 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i256 [[TMP6]] to <16 x i16>
284 ; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> [[TMP1]], <8 x i16> [[A1:%.*]])
285 ; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i16> [[TMP8]], [[TMP7]]
286 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> [[A0:%.*]], <8 x i16> [[A1]])
287 ; CHECK-NEXT: store <16 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8
288 ; CHECK-NEXT: ret <16 x i16> [[RES]]
290 %res = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1]
293 declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) nounwind readnone
296 define <8 x i32> @test_x86_avx2_pslli_d(<8 x i32> %a0) #0 {
297 ; CHECK-LABEL: @test_x86_avx2_pslli_d(
298 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
299 ; CHECK-NEXT: call void @llvm.donothing()
300 ; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> [[TMP1]], i32 7)
301 ; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i32> [[TMP2]], zeroinitializer
302 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> [[A0:%.*]], i32 7)
303 ; CHECK-NEXT: store <8 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8
304 ; CHECK-NEXT: ret <8 x i32> [[RES]]
306 %res = call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1]
309 declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) nounwind readnone
312 define <4 x i64> @test_x86_avx2_pslli_q(<4 x i64> %a0) #0 {
313 ; CHECK-LABEL: @test_x86_avx2_pslli_q(
314 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
315 ; CHECK-NEXT: call void @llvm.donothing()
316 ; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> [[TMP1]], i32 7)
317 ; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i64> [[TMP2]], zeroinitializer
318 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> [[A0:%.*]], i32 7)
319 ; CHECK-NEXT: store <4 x i64> [[TMP3]], ptr @__msan_retval_tls, align 8
320 ; CHECK-NEXT: ret <4 x i64> [[RES]]
322 %res = call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
325 declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) nounwind readnone
328 define <16 x i16> @test_x86_avx2_pslli_w(<16 x i16> %a0) #0 {
329 ; CHECK-LABEL: @test_x86_avx2_pslli_w(
330 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
331 ; CHECK-NEXT: call void @llvm.donothing()
332 ; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> [[TMP1]], i32 7)
333 ; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i16> [[TMP2]], zeroinitializer
334 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> [[A0:%.*]], i32 7)
335 ; CHECK-NEXT: store <16 x i16> [[TMP3]], ptr @__msan_retval_tls, align 8
336 ; CHECK-NEXT: ret <16 x i16> [[RES]]
338 %res = call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1]
341 declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) nounwind readnone
344 define <8 x i32> @test_x86_avx2_psra_d(<8 x i32> %a0, <4 x i32> %a1) #0 {
345 ; CHECK-LABEL: @test_x86_avx2_psra_d(
346 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
347 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
348 ; CHECK-NEXT: call void @llvm.donothing()
349 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
350 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
351 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
352 ; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i256
353 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i256 [[TMP6]] to <8 x i32>
354 ; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
355 ; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i32> [[TMP8]], [[TMP7]]
356 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> [[A0:%.*]], <4 x i32> [[A1]])
357 ; CHECK-NEXT: store <8 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8
358 ; CHECK-NEXT: ret <8 x i32> [[RES]]
360 %res = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1]
363 declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) nounwind readnone
366 define <16 x i16> @test_x86_avx2_psra_w(<16 x i16> %a0, <8 x i16> %a1) #0 {
367 ; CHECK-LABEL: @test_x86_avx2_psra_w(
368 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
369 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
370 ; CHECK-NEXT: call void @llvm.donothing()
371 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
372 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
373 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
374 ; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i256
375 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i256 [[TMP6]] to <16 x i16>
376 ; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> [[TMP1]], <8 x i16> [[A1:%.*]])
377 ; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i16> [[TMP8]], [[TMP7]]
378 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> [[A0:%.*]], <8 x i16> [[A1]])
379 ; CHECK-NEXT: store <16 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8
380 ; CHECK-NEXT: ret <16 x i16> [[RES]]
382 %res = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1]
385 declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) nounwind readnone
388 define <8 x i32> @test_x86_avx2_psrai_d(<8 x i32> %a0) #0 {
389 ; CHECK-LABEL: @test_x86_avx2_psrai_d(
390 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
391 ; CHECK-NEXT: call void @llvm.donothing()
392 ; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> [[TMP1]], i32 7)
393 ; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i32> [[TMP2]], zeroinitializer
394 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> [[A0:%.*]], i32 7)
395 ; CHECK-NEXT: store <8 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8
396 ; CHECK-NEXT: ret <8 x i32> [[RES]]
398 %res = call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1]
401 declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32) nounwind readnone
404 define <16 x i16> @test_x86_avx2_psrai_w(<16 x i16> %a0) #0 {
405 ; CHECK-LABEL: @test_x86_avx2_psrai_w(
406 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
407 ; CHECK-NEXT: call void @llvm.donothing()
408 ; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> [[TMP1]], i32 7)
409 ; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i16> [[TMP2]], zeroinitializer
410 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> [[A0:%.*]], i32 7)
411 ; CHECK-NEXT: store <16 x i16> [[TMP3]], ptr @__msan_retval_tls, align 8
412 ; CHECK-NEXT: ret <16 x i16> [[RES]]
414 %res = call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1]
417 declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32) nounwind readnone
420 define <8 x i32> @test_x86_avx2_psrl_d(<8 x i32> %a0, <4 x i32> %a1) #0 {
421 ; CHECK-LABEL: @test_x86_avx2_psrl_d(
422 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
423 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
424 ; CHECK-NEXT: call void @llvm.donothing()
425 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
426 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
427 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
428 ; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i256
429 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i256 [[TMP6]] to <8 x i32>
430 ; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
431 ; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i32> [[TMP8]], [[TMP7]]
432 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> [[A0:%.*]], <4 x i32> [[A1]])
433 ; CHECK-NEXT: store <8 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8
434 ; CHECK-NEXT: ret <8 x i32> [[RES]]
436 %res = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1]
439 declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone
442 define <4 x i64> @test_x86_avx2_psrl_q(<4 x i64> %a0, <2 x i64> %a1) #0 {
443 ; CHECK-LABEL: @test_x86_avx2_psrl_q(
444 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
445 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
446 ; CHECK-NEXT: call void @llvm.donothing()
447 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
448 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
449 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
450 ; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i256
451 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i256 [[TMP6]] to <4 x i64>
452 ; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> [[TMP1]], <2 x i64> [[A1:%.*]])
453 ; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i64> [[TMP8]], [[TMP7]]
454 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> [[A0:%.*]], <2 x i64> [[A1]])
455 ; CHECK-NEXT: store <4 x i64> [[TMP9]], ptr @__msan_retval_tls, align 8
456 ; CHECK-NEXT: ret <4 x i64> [[RES]]
458 %res = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1]
461 declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) nounwind readnone
464 define <16 x i16> @test_x86_avx2_psrl_w(<16 x i16> %a0, <8 x i16> %a1) #0 {
465 ; CHECK-LABEL: @test_x86_avx2_psrl_w(
466 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
467 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
468 ; CHECK-NEXT: call void @llvm.donothing()
469 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
470 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
471 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
472 ; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i256
473 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i256 [[TMP6]] to <16 x i16>
474 ; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> [[TMP1]], <8 x i16> [[A1:%.*]])
475 ; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i16> [[TMP8]], [[TMP7]]
476 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> [[A0:%.*]], <8 x i16> [[A1]])
477 ; CHECK-NEXT: store <16 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8
478 ; CHECK-NEXT: ret <16 x i16> [[RES]]
480 %res = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1]
483 declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) nounwind readnone
486 define <16 x i16> @test_x86_avx2_psrl_w_load(<16 x i16> %a0, ptr %p) #0 {
487 ; CHECK-LABEL: @test_x86_avx2_psrl_w_load(
488 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
489 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
490 ; CHECK-NEXT: call void @llvm.donothing()
491 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
492 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
494 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
495 ; CHECK-NEXT: unreachable
497 ; CHECK-NEXT: [[A1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 16
498 ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64
499 ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
500 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
501 ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
502 ; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128
503 ; CHECK-NEXT: [[TMP9:%.*]] = trunc i128 [[TMP8]] to i64
504 ; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
505 ; CHECK-NEXT: [[TMP11:%.*]] = sext i1 [[TMP10]] to i256
506 ; CHECK-NEXT: [[TMP12:%.*]] = bitcast i256 [[TMP11]] to <16 x i16>
507 ; CHECK-NEXT: [[TMP13:%.*]] = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> [[TMP2]], <8 x i16> [[A1]])
508 ; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i16> [[TMP13]], [[TMP12]]
509 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> [[A0:%.*]], <8 x i16> [[A1]])
510 ; CHECK-NEXT: store <16 x i16> [[TMP14]], ptr @__msan_retval_tls, align 8
511 ; CHECK-NEXT: ret <16 x i16> [[RES]]
513 %a1 = load <8 x i16>, ptr %p
514 %res = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1]
519 define <8 x i32> @test_x86_avx2_psrli_d(<8 x i32> %a0) #0 {
520 ; CHECK-LABEL: @test_x86_avx2_psrli_d(
521 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
522 ; CHECK-NEXT: call void @llvm.donothing()
523 ; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> [[TMP1]], i32 7)
524 ; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i32> [[TMP2]], zeroinitializer
525 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> [[A0:%.*]], i32 7)
526 ; CHECK-NEXT: store <8 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8
527 ; CHECK-NEXT: ret <8 x i32> [[RES]]
529 %res = call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1]
532 declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) nounwind readnone
535 define <4 x i64> @test_x86_avx2_psrli_q(<4 x i64> %a0) #0 {
536 ; CHECK-LABEL: @test_x86_avx2_psrli_q(
537 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
538 ; CHECK-NEXT: call void @llvm.donothing()
539 ; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> [[TMP1]], i32 7)
540 ; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i64> [[TMP2]], zeroinitializer
541 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> [[A0:%.*]], i32 7)
542 ; CHECK-NEXT: store <4 x i64> [[TMP3]], ptr @__msan_retval_tls, align 8
543 ; CHECK-NEXT: ret <4 x i64> [[RES]]
545 %res = call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
548 declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) nounwind readnone
551 define <16 x i16> @test_x86_avx2_psrli_w(<16 x i16> %a0) #0 {
552 ; CHECK-LABEL: @test_x86_avx2_psrli_w(
553 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
554 ; CHECK-NEXT: call void @llvm.donothing()
555 ; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> [[TMP1]], i32 7)
556 ; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i16> [[TMP2]], zeroinitializer
557 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> [[A0:%.*]], i32 7)
558 ; CHECK-NEXT: store <16 x i16> [[TMP3]], ptr @__msan_retval_tls, align 8
559 ; CHECK-NEXT: ret <16 x i16> [[RES]]
561 %res = call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1]
564 declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) nounwind readnone
567 define <8 x i32> @test_x86_avx2_phadd_d(<8 x i32> %a0, <8 x i32> %a1) #0 {
568 ; CHECK-LABEL: @test_x86_avx2_phadd_d(
569 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
570 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
571 ; CHECK-NEXT: call void @llvm.donothing()
572 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]]
573 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]])
574 ; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
575 ; CHECK-NEXT: ret <8 x i32> [[RES]]
577 %res = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
580 declare <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32>, <8 x i32>) nounwind readnone
583 define <16 x i16> @test_x86_avx2_phadd_sw(<16 x i16> %a0, <16 x i16> %a1) #0 {
584 ; CHECK-LABEL: @test_x86_avx2_phadd_sw(
585 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
586 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
587 ; CHECK-NEXT: call void @llvm.donothing()
588 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]]
589 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
590 ; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
591 ; CHECK-NEXT: ret <16 x i16> [[RES]]
593 %res = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
596 declare <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16>, <16 x i16>) nounwind readnone
599 define <16 x i16> @test_x86_avx2_phadd_w(<16 x i16> %a0, <16 x i16> %a1) #0 {
600 ; CHECK-LABEL: @test_x86_avx2_phadd_w(
601 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
602 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
603 ; CHECK-NEXT: call void @llvm.donothing()
604 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]]
605 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
606 ; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
607 ; CHECK-NEXT: ret <16 x i16> [[RES]]
609 %res = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
612 declare <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16>, <16 x i16>) nounwind readnone
615 define <8 x i32> @test_x86_avx2_phsub_d(<8 x i32> %a0, <8 x i32> %a1) #0 {
616 ; CHECK-LABEL: @test_x86_avx2_phsub_d(
617 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
618 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
619 ; CHECK-NEXT: call void @llvm.donothing()
620 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]]
621 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]])
622 ; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
623 ; CHECK-NEXT: ret <8 x i32> [[RES]]
625 %res = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
628 declare <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32>, <8 x i32>) nounwind readnone
631 define <16 x i16> @test_x86_avx2_phsub_sw(<16 x i16> %a0, <16 x i16> %a1) #0 {
632 ; CHECK-LABEL: @test_x86_avx2_phsub_sw(
633 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
634 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
635 ; CHECK-NEXT: call void @llvm.donothing()
636 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]]
637 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
638 ; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
639 ; CHECK-NEXT: ret <16 x i16> [[RES]]
641 %res = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
644 declare <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16>, <16 x i16>) nounwind readnone
647 define <16 x i16> @test_x86_avx2_phsub_w(<16 x i16> %a0, <16 x i16> %a1) #0 {
648 ; CHECK-LABEL: @test_x86_avx2_phsub_w(
649 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
650 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
651 ; CHECK-NEXT: call void @llvm.donothing()
652 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]]
653 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
654 ; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
655 ; CHECK-NEXT: ret <16 x i16> [[RES]]
657 %res = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
660 declare <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16>, <16 x i16>) nounwind readnone
663 define <16 x i16> @test_x86_avx2_pmadd_ub_sw(<32 x i8> %a0, <32 x i8> %a1) #0 {
664 ; CHECK-LABEL: @test_x86_avx2_pmadd_ub_sw(
665 ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8
666 ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
667 ; CHECK-NEXT: call void @llvm.donothing()
668 ; CHECK-NEXT: [[TMP3:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]]
669 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i8> [[TMP3]] to <16 x i16>
670 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i16> [[TMP4]], zeroinitializer
671 ; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i16>
672 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]])
673 ; CHECK-NEXT: store <16 x i16> [[TMP6]], ptr @__msan_retval_tls, align 8
674 ; CHECK-NEXT: ret <16 x i16> [[RES]]
676 %res = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i16>> [#uses=1]
679 declare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) nounwind readnone
681 define <16 x i16> @test_x86_avx2_pmadd_ub_sw_load_op0(ptr %ptr, <32 x i8> %a1) #0 {
682 ; CHECK-LABEL: @test_x86_avx2_pmadd_ub_sw_load_op0(
683 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
684 ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
685 ; CHECK-NEXT: call void @llvm.donothing()
686 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
687 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
689 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
690 ; CHECK-NEXT: unreachable
692 ; CHECK-NEXT: [[A0:%.*]] = load <32 x i8>, ptr [[PTR:%.*]], align 32
693 ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR]] to i64
694 ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
695 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
696 ; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i8>, ptr [[TMP7]], align 32
697 ; CHECK-NEXT: [[TMP8:%.*]] = or <32 x i8> [[_MSLD]], [[TMP2]]
698 ; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i8> [[TMP8]] to <16 x i16>
699 ; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i16> [[TMP9]], zeroinitializer
700 ; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i16>
701 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1:%.*]])
702 ; CHECK-NEXT: store <16 x i16> [[TMP11]], ptr @__msan_retval_tls, align 8
703 ; CHECK-NEXT: ret <16 x i16> [[RES]]
705 %a0 = load <32 x i8>, ptr %ptr
706 %res = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i16>> [#uses=1]
710 define <16 x i16> @test_x86_avx2_pmul_hr_sw(<16 x i16> %a0, <16 x i16> %a1) #0 {
711 ; CHECK-LABEL: @test_x86_avx2_pmul_hr_sw(
712 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
713 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
714 ; CHECK-NEXT: call void @llvm.donothing()
715 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]]
716 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
717 ; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
718 ; CHECK-NEXT: ret <16 x i16> [[RES]]
720 %res = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
723 declare <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16>, <16 x i16>) nounwind readnone
726 define <32 x i8> @test_x86_avx2_pshuf_b(<32 x i8> %a0, <32 x i8> %a1) #0 {
727 ; CHECK-LABEL: @test_x86_avx2_pshuf_b(
728 ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8
729 ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
730 ; CHECK-NEXT: call void @llvm.donothing()
731 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]]
732 ; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]])
733 ; CHECK-NEXT: store <32 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
734 ; CHECK-NEXT: ret <32 x i8> [[RES]]
736 %res = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i8>> [#uses=1]
739 declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) nounwind readnone
742 define <32 x i8> @test_x86_avx2_psign_b(<32 x i8> %a0, <32 x i8> %a1) #0 {
743 ; CHECK-LABEL: @test_x86_avx2_psign_b(
744 ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8
745 ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
746 ; CHECK-NEXT: call void @llvm.donothing()
747 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]]
748 ; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]])
749 ; CHECK-NEXT: store <32 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
750 ; CHECK-NEXT: ret <32 x i8> [[RES]]
752 %res = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
755 declare <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8>, <32 x i8>) nounwind readnone
758 define <8 x i32> @test_x86_avx2_psign_d(<8 x i32> %a0, <8 x i32> %a1) #0 {
759 ; CHECK-LABEL: @test_x86_avx2_psign_d(
760 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
761 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
762 ; CHECK-NEXT: call void @llvm.donothing()
763 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]]
764 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]])
765 ; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
766 ; CHECK-NEXT: ret <8 x i32> [[RES]]
768 %res = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %a0, <8 x i32> %a1) ; <<4 x i32>> [#uses=1]
771 declare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) nounwind readnone
774 define <16 x i16> @test_x86_avx2_psign_w(<16 x i16> %a0, <16 x i16> %a1) #0 {
775 ; CHECK-LABEL: @test_x86_avx2_psign_w(
776 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
777 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
778 ; CHECK-NEXT: call void @llvm.donothing()
779 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]]
780 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
781 ; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
782 ; CHECK-NEXT: ret <16 x i16> [[RES]]
784 %res = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
787 declare <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16>, <16 x i16>) nounwind readnone
790 define <16 x i16> @test_x86_avx2_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) #0 {
791 ; CHECK-LABEL: @test_x86_avx2_mpsadbw(
792 ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8
793 ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
794 ; CHECK-NEXT: call void @llvm.donothing()
795 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <32 x i8> [[TMP1]] to i256
796 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0
797 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i8> [[TMP2]] to i256
798 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0
799 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
800 ; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]]
802 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
803 ; CHECK-NEXT: unreachable
805 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]], i8 7)
806 ; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
807 ; CHECK-NEXT: ret <16 x i16> [[RES]]
809 %res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i8 7) ; <<16 x i16>> [#uses=1]
812 declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i8) nounwind readnone
814 define <16 x i16> @test_x86_avx2_mpsadbw_load_op0(ptr %ptr, <32 x i8> %a1) #0 {
815 ; CHECK-LABEL: @test_x86_avx2_mpsadbw_load_op0(
816 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
817 ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
818 ; CHECK-NEXT: call void @llvm.donothing()
819 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
820 ; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
822 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
823 ; CHECK-NEXT: unreachable
825 ; CHECK-NEXT: [[A0:%.*]] = load <32 x i8>, ptr [[PTR:%.*]], align 32
826 ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR]] to i64
827 ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
828 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
829 ; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i8>, ptr [[TMP7]], align 32
830 ; CHECK-NEXT: [[TMP8:%.*]] = bitcast <32 x i8> [[_MSLD]] to i256
831 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP8]], 0
832 ; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i8> [[TMP2]] to i256
833 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP9]], 0
834 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
835 ; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
837 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
838 ; CHECK-NEXT: unreachable
840 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> [[A0]], <32 x i8> [[A1:%.*]], i8 7)
841 ; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
842 ; CHECK-NEXT: ret <16 x i16> [[RES]]
844 %a0 = load <32 x i8>, ptr %ptr
845 %res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i8 7) ; <<16 x i16>> [#uses=1]
849 define <16 x i16> @test_x86_avx2_packusdw(<8 x i32> %a0, <8 x i32> %a1) #0 {
850 ; CHECK-LABEL: @test_x86_avx2_packusdw(
851 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
852 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
853 ; CHECK-NEXT: call void @llvm.donothing()
854 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i32> [[TMP1]], zeroinitializer
855 ; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i32>
856 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i32> [[TMP2]], zeroinitializer
857 ; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i32>
858 ; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[TMP4]], <8 x i32> [[TMP6]])
859 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]])
860 ; CHECK-NEXT: store <16 x i16> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8
861 ; CHECK-NEXT: ret <16 x i16> [[RES]]
863 %res = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a0, <8 x i32> %a1) ; <<16 x i16>> [#uses=1]
866 declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone
869 define <16 x i16> @test_x86_avx2_packusdw_fold() #0 {
870 ; CHECK-LABEL: @test_x86_avx2_packusdw_fold(
871 ; CHECK-NEXT: call void @llvm.donothing()
872 ; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> zeroinitializer, <8 x i32> zeroinitializer)
873 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> zeroinitializer, <8 x i32> <i32 255, i32 32767, i32 65535, i32 -1, i32 -32767, i32 -65535, i32 0, i32 -256>)
874 ; CHECK-NEXT: store <16 x i16> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8
875 ; CHECK-NEXT: ret <16 x i16> [[RES]]
877 %res = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> zeroinitializer, <8 x i32> <i32 255, i32 32767, i32 65535, i32 -1, i32 -32767, i32 -65535, i32 0, i32 -256>)
882 define <32 x i8> @test_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) #0 {
883 ; CHECK-LABEL: @test_x86_avx2_pblendvb(
884 ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8
885 ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
886 ; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
887 ; CHECK-NEXT: call void @llvm.donothing()
888 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]]
889 ; CHECK-NEXT: [[_MSPROP1:%.*]] = or <32 x i8> [[_MSPROP]], [[TMP3]]
890 ; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]], <32 x i8> [[A2:%.*]])
891 ; CHECK-NEXT: store <32 x i8> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
892 ; CHECK-NEXT: ret <32 x i8> [[RES]]
894 %res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) ; <<32 x i8>> [#uses=1]
897 declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>) nounwind readnone
900 define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) #0 {
901 ; CHECK-LABEL: @test_x86_avx2_pblendw(
902 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
903 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
904 ; CHECK-NEXT: call void @llvm.donothing()
905 ; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> [[TMP2]], <16 x i32> <i32 16, i32 17, i32 18, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 11, i32 12, i32 13, i32 14, i32 15>
906 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]], <16 x i32> <i32 16, i32 17, i32 18, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 11, i32 12, i32 13, i32 14, i32 15>
907 ; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
908 ; CHECK-NEXT: ret <16 x i16> [[TMP3]]
910 %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i8 7) ; <<16 x i16>> [#uses=1]
913 declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i8) nounwind readnone
916 define <4 x i32> @test_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) #0 {
917 ; CHECK-LABEL: @test_x86_avx2_pblendd_128(
918 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
919 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
920 ; CHECK-NEXT: call void @llvm.donothing()
921 ; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 4, i32 5, i32 6, i32 3>
922 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]], <4 x i32> <i32 4, i32 5, i32 6, i32 3>
923 ; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
924 ; CHECK-NEXT: ret <4 x i32> [[TMP3]]
926 %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i8 7) ; <<4 x i32>> [#uses=1]
929 declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i8) nounwind readnone
932 define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) #0 {
933 ; CHECK-LABEL: @test_x86_avx2_pblendd_256(
934 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
935 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
936 ; CHECK-NEXT: call void @llvm.donothing()
937 ; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 8, i32 9, i32 10, i32 3, i32 4, i32 5, i32 6, i32 7>
938 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]], <8 x i32> <i32 8, i32 9, i32 10, i32 3, i32 4, i32 5, i32 6, i32 7>
939 ; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
940 ; CHECK-NEXT: ret <8 x i32> [[TMP3]]
942 %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1]
945 declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
948 define <8 x i32> @test_x86_avx2_permd(<8 x i32> %a0, <8 x i32> %a1) #0 {
949 ; CHECK-LABEL: @test_x86_avx2_permd(
950 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
951 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
952 ; CHECK-NEXT: call void @llvm.donothing()
953 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]]
954 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]])
955 ; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
956 ; CHECK-NEXT: ret <8 x i32> [[RES]]
958 %res = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
961 declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly
964 define <8 x float> @test_x86_avx2_permps(<8 x float> %a0, <8 x i32> %a1) #0 {
965 ; CHECK-LABEL: @test_x86_avx2_permps(
966 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
967 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
968 ; CHECK-NEXT: call void @llvm.donothing()
969 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP1]] to i256
970 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0
971 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
972 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0
973 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
974 ; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]]
976 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
977 ; CHECK-NEXT: unreachable
979 ; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx2.permps(<8 x float> [[A0:%.*]], <8 x i32> [[A1:%.*]])
980 ; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
981 ; CHECK-NEXT: ret <8 x float> [[RES]]
983 %res = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1]
986 declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) nounwind readonly
989 define <2 x i64> @test_x86_avx2_maskload_q(ptr %a0, <2 x i64> %a1) #0 {
990 ; CHECK-LABEL: @test_x86_avx2_maskload_q(
991 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
992 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
993 ; CHECK-NEXT: call void @llvm.donothing()
994 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
995 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
996 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP3]], 0
997 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
998 ; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]]
1000 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1001 ; CHECK-NEXT: unreachable
1003 ; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.avx2.maskload.q(ptr [[A0:%.*]], <2 x i64> [[A1:%.*]])
1004 ; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
1005 ; CHECK-NEXT: ret <2 x i64> [[RES]]
1007 %res = call <2 x i64> @llvm.x86.avx2.maskload.q(ptr %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
1010 declare <2 x i64> @llvm.x86.avx2.maskload.q(ptr, <2 x i64>) nounwind readonly
1013 define <4 x i64> @test_x86_avx2_maskload_q_256(ptr %a0, <4 x i64> %a1) #0 {
1014 ; CHECK-LABEL: @test_x86_avx2_maskload_q_256(
1015 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
1016 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1017 ; CHECK-NEXT: call void @llvm.donothing()
1018 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1019 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[TMP2]] to i256
1020 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP3]], 0
1021 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1022 ; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]]
1024 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1025 ; CHECK-NEXT: unreachable
1027 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.maskload.q.256(ptr [[A0:%.*]], <4 x i64> [[A1:%.*]])
1028 ; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
1029 ; CHECK-NEXT: ret <4 x i64> [[RES]]
1031 %res = call <4 x i64> @llvm.x86.avx2.maskload.q.256(ptr %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1]
1034 declare <4 x i64> @llvm.x86.avx2.maskload.q.256(ptr, <4 x i64>) nounwind readonly
1037 define <4 x i32> @test_x86_avx2_maskload_d(ptr %a0, <4 x i32> %a1) #0 {
1038 ; CHECK-LABEL: @test_x86_avx2_maskload_d(
1039 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
1040 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1041 ; CHECK-NEXT: call void @llvm.donothing()
1042 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1043 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1044 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP3]], 0
1045 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1046 ; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]]
1048 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1049 ; CHECK-NEXT: unreachable
1051 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx2.maskload.d(ptr [[A0:%.*]], <4 x i32> [[A1:%.*]])
1052 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
1053 ; CHECK-NEXT: ret <4 x i32> [[RES]]
1055 %res = call <4 x i32> @llvm.x86.avx2.maskload.d(ptr %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1058 declare <4 x i32> @llvm.x86.avx2.maskload.d(ptr, <4 x i32>) nounwind readonly
1061 define <8 x i32> @test_x86_avx2_maskload_d_256(ptr %a0, <8 x i32> %a1) #0 {
1062 ; CHECK-LABEL: @test_x86_avx2_maskload_d_256(
1063 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
1064 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1065 ; CHECK-NEXT: call void @llvm.donothing()
1066 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1067 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
1068 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP3]], 0
1069 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1070 ; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]]
1072 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1073 ; CHECK-NEXT: unreachable
1075 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.maskload.d.256(ptr [[A0:%.*]], <8 x i32> [[A1:%.*]])
1076 ; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
1077 ; CHECK-NEXT: ret <8 x i32> [[RES]]
1079 %res = call <8 x i32> @llvm.x86.avx2.maskload.d.256(ptr %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
1082 declare <8 x i32> @llvm.x86.avx2.maskload.d.256(ptr, <8 x i32>) nounwind readonly
1085 define void @test_x86_avx2_maskstore_q(ptr %a0, <2 x i64> %a1, <2 x i64> %a2) #0 {
1086 ; CHECK-LABEL: @test_x86_avx2_maskstore_q(
1087 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
1088 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1089 ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
1090 ; CHECK-NEXT: call void @llvm.donothing()
1091 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1092 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1093 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
1094 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1095 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
1096 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP5]], 0
1097 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
1098 ; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
1100 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1101 ; CHECK-NEXT: unreachable
1103 ; CHECK-NEXT: call void @llvm.x86.avx2.maskstore.q(ptr [[A0:%.*]], <2 x i64> [[A1:%.*]], <2 x i64> [[A2:%.*]])
1104 ; CHECK-NEXT: ret void
1106 call void @llvm.x86.avx2.maskstore.q(ptr %a0, <2 x i64> %a1, <2 x i64> %a2)
1109 declare void @llvm.x86.avx2.maskstore.q(ptr, <2 x i64>, <2 x i64>) nounwind
1112 define void @test_x86_avx2_maskstore_q_256(ptr %a0, <4 x i64> %a1, <4 x i64> %a2) #0 {
1113 ; CHECK-LABEL: @test_x86_avx2_maskstore_q_256(
1114 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
1115 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1116 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
1117 ; CHECK-NEXT: call void @llvm.donothing()
1118 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1119 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i64> [[TMP2]] to i256
1120 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0
1121 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1122 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i64> [[TMP3]] to i256
1123 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP5]], 0
1124 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
1125 ; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
1127 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1128 ; CHECK-NEXT: unreachable
1130 ; CHECK-NEXT: call void @llvm.x86.avx2.maskstore.q.256(ptr [[A0:%.*]], <4 x i64> [[A1:%.*]], <4 x i64> [[A2:%.*]])
1131 ; CHECK-NEXT: ret void
1133 call void @llvm.x86.avx2.maskstore.q.256(ptr %a0, <4 x i64> %a1, <4 x i64> %a2)
1136 declare void @llvm.x86.avx2.maskstore.q.256(ptr, <4 x i64>, <4 x i64>) nounwind
1139 define void @test_x86_avx2_maskstore_d(ptr %a0, <4 x i32> %a1, <4 x i32> %a2) #0 {
1140 ; CHECK-LABEL: @test_x86_avx2_maskstore_d(
1141 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
1142 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1143 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
1144 ; CHECK-NEXT: call void @llvm.donothing()
1145 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1146 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1147 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
1148 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1149 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
1150 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP5]], 0
1151 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
1152 ; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
1154 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1155 ; CHECK-NEXT: unreachable
1157 ; CHECK-NEXT: call void @llvm.x86.avx2.maskstore.d(ptr [[A0:%.*]], <4 x i32> [[A1:%.*]], <4 x i32> [[A2:%.*]])
1158 ; CHECK-NEXT: ret void
1160 call void @llvm.x86.avx2.maskstore.d(ptr %a0, <4 x i32> %a1, <4 x i32> %a2)
1163 declare void @llvm.x86.avx2.maskstore.d(ptr, <4 x i32>, <4 x i32>) nounwind
1166 define void @test_x86_avx2_maskstore_d_256(ptr %a0, <8 x i32> %a1, <8 x i32> %a2) #0 {
1167 ; CHECK-LABEL: @test_x86_avx2_maskstore_d_256(
1168 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
1169 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1170 ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
1171 ; CHECK-NEXT: call void @llvm.donothing()
1172 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1173 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
1174 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0
1175 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1176 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP3]] to i256
1177 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP5]], 0
1178 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
1179 ; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
1181 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1182 ; CHECK-NEXT: unreachable
1184 ; CHECK-NEXT: call void @llvm.x86.avx2.maskstore.d.256(ptr [[A0:%.*]], <8 x i32> [[A1:%.*]], <8 x i32> [[A2:%.*]])
1185 ; CHECK-NEXT: ret void
1187 call void @llvm.x86.avx2.maskstore.d.256(ptr %a0, <8 x i32> %a1, <8 x i32> %a2)
1190 declare void @llvm.x86.avx2.maskstore.d.256(ptr, <8 x i32>, <8 x i32>) nounwind
1193 define <4 x i32> @test_x86_avx2_psllv_d(<4 x i32> %a0, <4 x i32> %a1) #0 {
1194 ; CHECK-LABEL: @test_x86_avx2_psllv_d(
1195 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
1196 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1197 ; CHECK-NEXT: call void @llvm.donothing()
1198 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
1199 ; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
1200 ; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
1201 ; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i32> [[TMP5]], [[TMP4]]
1202 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> [[A0:%.*]], <4 x i32> [[A1]])
1203 ; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8
1204 ; CHECK-NEXT: ret <4 x i32> [[RES]]
1206 %res = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1210 define <4 x i32> @test_x86_avx2_psllv_d_const() #0 {
1211 ; CHECK-LABEL: @test_x86_avx2_psllv_d_const(
1212 ; CHECK-NEXT: call void @llvm.donothing()
1213 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 0, i32 33, i32 -1>)
1214 ; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP1]], zeroinitializer
1215 ; CHECK-NEXT: [[RES0:%.*]] = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> <i32 2, i32 9, i32 0, i32 -1>, <4 x i32> <i32 1, i32 0, i32 33, i32 -1>)
1216 ; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 1, i32 1, i32 -1>)
1217 ; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i32> [[TMP3]], zeroinitializer
1218 ; CHECK-NEXT: [[RES1:%.*]] = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> <i32 1, i32 1, i32 1, i32 -1>, <4 x i32> <i32 1, i32 1, i32 1, i32 -1>)
1219 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP2]], [[TMP4]]
1220 ; CHECK-NEXT: [[RES2:%.*]] = add <4 x i32> [[RES0]], [[RES1]]
1221 ; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
1222 ; CHECK-NEXT: ret <4 x i32> [[RES2]]
1224 %res0 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> <i32 2, i32 9, i32 0, i32 -1>, <4 x i32> <i32 1, i32 0, i32 33, i32 -1>)
1225 %res1 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> <i32 1, i32 1, i32 1, i32 -1>, <4 x i32> <i32 1, i32 1, i32 1, i32 -1>)
1226 %res2 = add <4 x i32> %res0, %res1
1229 declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) nounwind readnone
1232 define <8 x i32> @test_x86_avx2_psllv_d_256(<8 x i32> %a0, <8 x i32> %a1) #0 {
1233 ; CHECK-LABEL: @test_x86_avx2_psllv_d_256(
1234 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
1235 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
1236 ; CHECK-NEXT: call void @llvm.donothing()
1237 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i32> [[TMP2]], zeroinitializer
1238 ; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i32>
1239 ; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> [[TMP1]], <8 x i32> [[A1:%.*]])
1240 ; CHECK-NEXT: [[TMP6:%.*]] = or <8 x i32> [[TMP5]], [[TMP4]]
1241 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> [[A0:%.*]], <8 x i32> [[A1]])
1242 ; CHECK-NEXT: store <8 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8
1243 ; CHECK-NEXT: ret <8 x i32> [[RES]]
1245 %res = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
1249 define <8 x i32> @test_x86_avx2_psllv_d_256_const() #0 {
1250 ; CHECK-LABEL: @test_x86_avx2_psllv_d_256_const(
1251 ; CHECK-NEXT: call void @llvm.donothing()
1252 ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 0, i32 33, i32 -1, i32 2, i32 0, i32 34, i32 -2>)
1253 ; CHECK-NEXT: [[TMP2:%.*]] = or <8 x i32> [[TMP1]], zeroinitializer
1254 ; CHECK-NEXT: [[RES0:%.*]] = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> <i32 2, i32 9, i32 0, i32 -1, i32 3, i32 7, i32 -1, i32 0>, <8 x i32> <i32 1, i32 0, i32 33, i32 -1, i32 2, i32 0, i32 34, i32 -2>)
1255 ; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 -1>)
1256 ; CHECK-NEXT: [[TMP4:%.*]] = or <8 x i32> [[TMP3]], zeroinitializer
1257 ; CHECK-NEXT: [[RES1:%.*]] = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 -1>, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 -1>)
1258 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP2]], [[TMP4]]
1259 ; CHECK-NEXT: [[RES2:%.*]] = add <8 x i32> [[RES0]], [[RES1]]
1260 ; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
1261 ; CHECK-NEXT: ret <8 x i32> [[RES2]]
1263 %res0 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> <i32 2, i32 9, i32 0, i32 -1, i32 3, i32 7, i32 -1, i32 0>, <8 x i32> <i32 1, i32 0, i32 33, i32 -1,i32 2, i32 0, i32 34, i32 -2>)
1264 %res1 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 -1>, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 -1>)
1265 %res2 = add <8 x i32> %res0, %res1
1268 declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) nounwind readnone
1271 define <2 x i64> @test_x86_avx2_psllv_q(<2 x i64> %a0, <2 x i64> %a1) #0 {
1272 ; CHECK-LABEL: @test_x86_avx2_psllv_q(
1273 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
1274 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1275 ; CHECK-NEXT: call void @llvm.donothing()
1276 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer
1277 ; CHECK-NEXT: [[TMP4:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
1278 ; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> [[TMP1]], <2 x i64> [[A1:%.*]])
1279 ; CHECK-NEXT: [[TMP6:%.*]] = or <2 x i64> [[TMP5]], [[TMP4]]
1280 ; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> [[A0:%.*]], <2 x i64> [[A1]])
1281 ; CHECK-NEXT: store <2 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8
1282 ; CHECK-NEXT: ret <2 x i64> [[RES]]
1284 %res = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
1287 define <2 x i64> @test_x86_avx2_psllv_q_const() #0 {
1288 ; CHECK-LABEL: @test_x86_avx2_psllv_q_const(
1289 ; CHECK-NEXT: call void @llvm.donothing()
1290 ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> zeroinitializer, <2 x i64> <i64 1, i64 -1>)
1291 ; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i64> [[TMP1]], zeroinitializer
1292 ; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> <i64 4, i64 -1>, <2 x i64> <i64 1, i64 -1>)
1293 ; CHECK-NEXT: store <2 x i64> [[TMP2]], ptr @__msan_retval_tls, align 8
1294 ; CHECK-NEXT: ret <2 x i64> [[RES]]
1296 %res = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> <i64 4, i64 -1>, <2 x i64> <i64 1, i64 -1>)
1299 declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) nounwind readnone
1302 define <4 x i64> @test_x86_avx2_psllv_q_256(<4 x i64> %a0, <4 x i64> %a1) #0 {
1303 ; CHECK-LABEL: @test_x86_avx2_psllv_q_256(
1304 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
1305 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
1306 ; CHECK-NEXT: call void @llvm.donothing()
1307 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i64> [[TMP2]], zeroinitializer
1308 ; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i64>
1309 ; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> [[TMP1]], <4 x i64> [[A1:%.*]])
1310 ; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i64> [[TMP5]], [[TMP4]]
1311 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> [[A0:%.*]], <4 x i64> [[A1]])
1312 ; CHECK-NEXT: store <4 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8
1313 ; CHECK-NEXT: ret <4 x i64> [[RES]]
1315 %res = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1]
1319 define <4 x i64> @test_x86_avx2_psllv_q_256_const() #0 {
1320 ; CHECK-LABEL: @test_x86_avx2_psllv_q_256_const(
1321 ; CHECK-NEXT: call void @llvm.donothing()
1322 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> zeroinitializer, <4 x i64> <i64 1, i64 1, i64 1, i64 -1>)
1323 ; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i64> [[TMP1]], zeroinitializer
1324 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> <i64 4, i64 4, i64 4, i64 -1>, <4 x i64> <i64 1, i64 1, i64 1, i64 -1>)
1325 ; CHECK-NEXT: store <4 x i64> [[TMP2]], ptr @__msan_retval_tls, align 8
1326 ; CHECK-NEXT: ret <4 x i64> [[RES]]
1328 %res = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> <i64 4, i64 4, i64 4, i64 -1>, <4 x i64> <i64 1, i64 1, i64 1, i64 -1>)
1331 declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) nounwind readnone
1334 define <4 x i32> @test_x86_avx2_psrlv_d(<4 x i32> %a0, <4 x i32> %a1) #0 {
1335 ; CHECK-LABEL: @test_x86_avx2_psrlv_d(
1336 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
1337 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1338 ; CHECK-NEXT: call void @llvm.donothing()
1339 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
1340 ; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
1341 ; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
1342 ; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i32> [[TMP5]], [[TMP4]]
1343 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> [[A0:%.*]], <4 x i32> [[A1]])
1344 ; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8
1345 ; CHECK-NEXT: ret <4 x i32> [[RES]]
1347 %res = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1351 define <4 x i32> @test_x86_avx2_psrlv_d_const() #0 {
1352 ; CHECK-LABEL: @test_x86_avx2_psrlv_d_const(
1353 ; CHECK-NEXT: call void @llvm.donothing()
1354 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 0, i32 33, i32 -1>)
1355 ; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP1]], zeroinitializer
1356 ; CHECK-NEXT: [[RES0:%.*]] = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> <i32 2, i32 9, i32 0, i32 -1>, <4 x i32> <i32 1, i32 0, i32 33, i32 -1>)
1357 ; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 1, i32 1, i32 -1>)
1358 ; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i32> [[TMP3]], zeroinitializer
1359 ; CHECK-NEXT: [[RES1:%.*]] = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> <i32 4, i32 4, i32 4, i32 -1>, <4 x i32> <i32 1, i32 1, i32 1, i32 -1>)
1360 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP2]], [[TMP4]]
1361 ; CHECK-NEXT: [[RES2:%.*]] = add <4 x i32> [[RES0]], [[RES1]]
1362 ; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
1363 ; CHECK-NEXT: ret <4 x i32> [[RES2]]
1365 %res0 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> <i32 2, i32 9, i32 0, i32 -1>, <4 x i32> <i32 1, i32 0, i32 33, i32 -1>)
1366 %res1 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> <i32 4, i32 4, i32 4, i32 -1>, <4 x i32> <i32 1, i32 1, i32 1, i32 -1>)
1367 %res2 = add <4 x i32> %res0, %res1
1370 declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) nounwind readnone
1373 define <8 x i32> @test_x86_avx2_psrlv_d_256(<8 x i32> %a0, <8 x i32> %a1) #0 {
1374 ; CHECK-LABEL: @test_x86_avx2_psrlv_d_256(
1375 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
1376 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
1377 ; CHECK-NEXT: call void @llvm.donothing()
1378 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i32> [[TMP2]], zeroinitializer
1379 ; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i32>
1380 ; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> [[TMP1]], <8 x i32> [[A1:%.*]])
1381 ; CHECK-NEXT: [[TMP6:%.*]] = or <8 x i32> [[TMP5]], [[TMP4]]
1382 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> [[A0:%.*]], <8 x i32> [[A1]])
1383 ; CHECK-NEXT: store <8 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8
1384 ; CHECK-NEXT: ret <8 x i32> [[RES]]
1386 %res = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
1390 define <8 x i32> @test_x86_avx2_psrlv_d_256_const() #0 {
1391 ; CHECK-LABEL: @test_x86_avx2_psrlv_d_256_const(
1392 ; CHECK-NEXT: call void @llvm.donothing()
1393 ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 0, i32 33, i32 -1, i32 2, i32 0, i32 34, i32 -2>)
1394 ; CHECK-NEXT: [[TMP2:%.*]] = or <8 x i32> [[TMP1]], zeroinitializer
1395 ; CHECK-NEXT: [[RES0:%.*]] = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> <i32 2, i32 9, i32 0, i32 -1, i32 3, i32 7, i32 -1, i32 0>, <8 x i32> <i32 1, i32 0, i32 33, i32 -1, i32 2, i32 0, i32 34, i32 -2>)
1396 ; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 -1>)
1397 ; CHECK-NEXT: [[TMP4:%.*]] = or <8 x i32> [[TMP3]], zeroinitializer
1398 ; CHECK-NEXT: [[RES1:%.*]] = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 -1>, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 -1>)
1399 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP2]], [[TMP4]]
1400 ; CHECK-NEXT: [[RES2:%.*]] = add <8 x i32> [[RES0]], [[RES1]]
1401 ; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
1402 ; CHECK-NEXT: ret <8 x i32> [[RES2]]
1404 %res0 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> <i32 2, i32 9, i32 0, i32 -1, i32 3, i32 7, i32 -1, i32 0>, <8 x i32> <i32 1, i32 0, i32 33, i32 -1,i32 2, i32 0, i32 34, i32 -2>)
1405 %res1 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 -1>, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 -1>)
1406 %res2 = add <8 x i32> %res0, %res1
1409 declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) nounwind readnone
1412 define <2 x i64> @test_x86_avx2_psrlv_q(<2 x i64> %a0, <2 x i64> %a1) #0 {
1413 ; CHECK-LABEL: @test_x86_avx2_psrlv_q(
1414 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
1415 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1416 ; CHECK-NEXT: call void @llvm.donothing()
1417 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer
1418 ; CHECK-NEXT: [[TMP4:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
1419 ; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> [[TMP1]], <2 x i64> [[A1:%.*]])
1420 ; CHECK-NEXT: [[TMP6:%.*]] = or <2 x i64> [[TMP5]], [[TMP4]]
1421 ; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> [[A0:%.*]], <2 x i64> [[A1]])
1422 ; CHECK-NEXT: store <2 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8
1423 ; CHECK-NEXT: ret <2 x i64> [[RES]]
1425 %res = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
1429 define <2 x i64> @test_x86_avx2_psrlv_q_const() #0 {
1430 ; CHECK-LABEL: @test_x86_avx2_psrlv_q_const(
1431 ; CHECK-NEXT: call void @llvm.donothing()
1432 ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> zeroinitializer, <2 x i64> <i64 1, i64 -1>)
1433 ; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i64> [[TMP1]], zeroinitializer
1434 ; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> <i64 4, i64 4>, <2 x i64> <i64 1, i64 -1>)
1435 ; CHECK-NEXT: store <2 x i64> [[TMP2]], ptr @__msan_retval_tls, align 8
1436 ; CHECK-NEXT: ret <2 x i64> [[RES]]
1438 %res = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> <i64 4, i64 4>, <2 x i64> <i64 1, i64 -1>)
1441 declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) nounwind readnone
1444 define <4 x i64> @test_x86_avx2_psrlv_q_256(<4 x i64> %a0, <4 x i64> %a1) #0 {
1445 ; CHECK-LABEL: @test_x86_avx2_psrlv_q_256(
1446 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
1447 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
1448 ; CHECK-NEXT: call void @llvm.donothing()
1449 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i64> [[TMP2]], zeroinitializer
1450 ; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i64>
1451 ; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> [[TMP1]], <4 x i64> [[A1:%.*]])
1452 ; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i64> [[TMP5]], [[TMP4]]
1453 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> [[A0:%.*]], <4 x i64> [[A1]])
1454 ; CHECK-NEXT: store <4 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8
1455 ; CHECK-NEXT: ret <4 x i64> [[RES]]
1457 %res = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1]
1462 define <4 x i64> @test_x86_avx2_psrlv_q_256_const() #0 {
1463 ; CHECK-LABEL: @test_x86_avx2_psrlv_q_256_const(
1464 ; CHECK-NEXT: call void @llvm.donothing()
1465 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> zeroinitializer, <4 x i64> <i64 1, i64 1, i64 1, i64 -1>)
1466 ; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i64> [[TMP1]], zeroinitializer
1467 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> <i64 4, i64 4, i64 4, i64 4>, <4 x i64> <i64 1, i64 1, i64 1, i64 -1>)
1468 ; CHECK-NEXT: store <4 x i64> [[TMP2]], ptr @__msan_retval_tls, align 8
1469 ; CHECK-NEXT: ret <4 x i64> [[RES]]
1471 %res = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> <i64 4, i64 4, i64 4, i64 4>, <4 x i64> <i64 1, i64 1, i64 1, i64 -1>)
1474 declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) nounwind readnone
1477 define <4 x i32> @test_x86_avx2_psrav_d(<4 x i32> %a0, <4 x i32> %a1) #0 {
1478 ; CHECK-LABEL: @test_x86_avx2_psrav_d(
1479 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
1480 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1481 ; CHECK-NEXT: call void @llvm.donothing()
1482 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
1483 ; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
1484 ; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
1485 ; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i32> [[TMP5]], [[TMP4]]
1486 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> [[A0:%.*]], <4 x i32> [[A1]])
1487 ; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8
1488 ; CHECK-NEXT: ret <4 x i32> [[RES]]
1490 %res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1494 define <4 x i32> @test_x86_avx2_psrav_d_const() #0 {
1495 ; CHECK-LABEL: @test_x86_avx2_psrav_d_const(
1496 ; CHECK-NEXT: call void @llvm.donothing()
1497 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 18, i32 35, i32 52>)
1498 ; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP1]], zeroinitializer
1499 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> <i32 2, i32 9, i32 -12, i32 23>, <4 x i32> <i32 1, i32 18, i32 35, i32 52>)
1500 ; CHECK-NEXT: store <4 x i32> [[TMP2]], ptr @__msan_retval_tls, align 8
1501 ; CHECK-NEXT: ret <4 x i32> [[RES]]
1503 %res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> <i32 2, i32 9, i32 -12, i32 23>, <4 x i32> <i32 1, i32 18, i32 35, i32 52>)
1506 declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) nounwind readnone
1508 define <8 x i32> @test_x86_avx2_psrav_d_256(<8 x i32> %a0, <8 x i32> %a1) #0 {
1509 ; CHECK-LABEL: @test_x86_avx2_psrav_d_256(
1510 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
1511 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
1512 ; CHECK-NEXT: call void @llvm.donothing()
1513 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i32> [[TMP2]], zeroinitializer
1514 ; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i32>
1515 ; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> [[TMP1]], <8 x i32> [[A1:%.*]])
1516 ; CHECK-NEXT: [[TMP6:%.*]] = or <8 x i32> [[TMP5]], [[TMP4]]
1517 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> [[A0:%.*]], <8 x i32> [[A1]])
1518 ; CHECK-NEXT: store <8 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8
1519 ; CHECK-NEXT: ret <8 x i32> [[RES]]
1521 %res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
1525 define <8 x i32> @test_x86_avx2_psrav_d_256_const() #0 {
1526 ; CHECK-LABEL: @test_x86_avx2_psrav_d_256_const(
1527 ; CHECK-NEXT: call void @llvm.donothing()
1528 ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 18, i32 35, i32 52, i32 69, i32 15, i32 32, i32 49>)
1529 ; CHECK-NEXT: [[TMP2:%.*]] = or <8 x i32> [[TMP1]], zeroinitializer
1530 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> <i32 2, i32 9, i32 -12, i32 23, i32 -26, i32 37, i32 -40, i32 51>, <8 x i32> <i32 1, i32 18, i32 35, i32 52, i32 69, i32 15, i32 32, i32 49>)
1531 ; CHECK-NEXT: store <8 x i32> [[TMP2]], ptr @__msan_retval_tls, align 8
1532 ; CHECK-NEXT: ret <8 x i32> [[RES]]
1534 %res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> <i32 2, i32 9, i32 -12, i32 23, i32 -26, i32 37, i32 -40, i32 51>, <8 x i32> <i32 1, i32 18, i32 35, i32 52, i32 69, i32 15, i32 32, i32 49>)
1537 declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) nounwind readnone
1539 define <2 x double> @test_x86_avx2_gather_d_pd(<2 x double> %a0, ptr %a1, <4 x i32> %idx, <2 x double> %mask) #0 {
1540 ; CHECK-LABEL: @test_x86_avx2_gather_d_pd(
1541 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
1542 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1543 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
1544 ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
1545 ; CHECK-NEXT: call void @llvm.donothing()
1546 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1547 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
1548 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
1549 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1550 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
1551 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0
1552 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
1553 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP4]] to i128
1554 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0
1555 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
1556 ; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
1558 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1559 ; CHECK-NEXT: unreachable
1561 ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> [[A0:%.*]], ptr [[A1:%.*]], <4 x i32> [[IDX:%.*]], <2 x double> [[MASK:%.*]], i8 2)
1562 ; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
1563 ; CHECK-NEXT: ret <2 x double> [[RES]]
1565 %res = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> %a0,
1566 ptr %a1, <4 x i32> %idx, <2 x double> %mask, i8 2) ;
1567 ret <2 x double> %res
1569 declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, ptr,
1570 <4 x i32>, <2 x double>, i8) nounwind readonly
1572 define <4 x double> @test_x86_avx2_gather_d_pd_256(<4 x double> %a0, ptr %a1, <4 x i32> %idx, <4 x double> %mask) #0 {
1573 ; CHECK-LABEL: @test_x86_avx2_gather_d_pd_256(
1574 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
1575 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
1576 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
1577 ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8
1578 ; CHECK-NEXT: call void @llvm.donothing()
1579 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i64> [[TMP1]] to i256
1580 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP5]], 0
1581 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
1582 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1583 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
1584 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0
1585 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
1586 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i64> [[TMP4]] to i256
1587 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP7]], 0
1588 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
1589 ; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
1591 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1592 ; CHECK-NEXT: unreachable
1594 ; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> [[A0:%.*]], ptr [[A1:%.*]], <4 x i32> [[IDX:%.*]], <4 x double> [[MASK:%.*]], i8 2)
1595 ; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
1596 ; CHECK-NEXT: ret <4 x double> [[RES]]
1598 %res = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %a0,
1599 ptr %a1, <4 x i32> %idx, <4 x double> %mask, i8 2) ;
1600 ret <4 x double> %res
1602 declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, ptr,
1603 <4 x i32>, <4 x double>, i8) nounwind readonly
1605 define <2 x double> @test_x86_avx2_gather_q_pd(<2 x double> %a0, ptr %a1, <2 x i64> %idx, <2 x double> %mask) #0 {
1606 ; CHECK-LABEL: @test_x86_avx2_gather_q_pd(
1607 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
1608 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1609 ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
1610 ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
1611 ; CHECK-NEXT: call void @llvm.donothing()
1612 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1613 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
1614 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
1615 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1616 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
1617 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0
1618 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
1619 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP4]] to i128
1620 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0
1621 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
1622 ; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
1624 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1625 ; CHECK-NEXT: unreachable
1627 ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> [[A0:%.*]], ptr [[A1:%.*]], <2 x i64> [[IDX:%.*]], <2 x double> [[MASK:%.*]], i8 2)
1628 ; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
1629 ; CHECK-NEXT: ret <2 x double> [[RES]]
1631 %res = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> %a0,
1632 ptr %a1, <2 x i64> %idx, <2 x double> %mask, i8 2) ;
1633 ret <2 x double> %res
1635 declare <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double>, ptr,
1636 <2 x i64>, <2 x double>, i8) nounwind readonly
1638 define <4 x double> @test_x86_avx2_gather_q_pd_256(<4 x double> %a0, ptr %a1, <4 x i64> %idx, <4 x double> %mask) #0 {
1639 ; CHECK-LABEL: @test_x86_avx2_gather_q_pd_256(
1640 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
1641 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
1642 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
1643 ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
1644 ; CHECK-NEXT: call void @llvm.donothing()
1645 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i64> [[TMP1]] to i256
1646 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP5]], 0
1647 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
1648 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1649 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP3]] to i256
1650 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP6]], 0
1651 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
1652 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i64> [[TMP4]] to i256
1653 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP7]], 0
1654 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
1655 ; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
1657 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1658 ; CHECK-NEXT: unreachable
1660 ; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> [[A0:%.*]], ptr [[A1:%.*]], <4 x i64> [[IDX:%.*]], <4 x double> [[MASK:%.*]], i8 2)
1661 ; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
1662 ; CHECK-NEXT: ret <4 x double> [[RES]]
1664 %res = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> %a0,
1665 ptr %a1, <4 x i64> %idx, <4 x double> %mask, i8 2) ;
1666 ret <4 x double> %res
1668 declare <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double>, ptr,
1669 <4 x i64>, <4 x double>, i8) nounwind readonly
1671 define <4 x float> @test_x86_avx2_gather_d_ps(<4 x float> %a0, ptr %a1, <4 x i32> %idx, <4 x float> %mask) #0 {
1672 ; CHECK-LABEL: @test_x86_avx2_gather_d_ps(
1673 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
1674 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1675 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
1676 ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
1677 ; CHECK-NEXT: call void @llvm.donothing()
1678 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1679 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
1680 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
1681 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1682 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
1683 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0
1684 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
1685 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to i128
1686 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0
1687 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
1688 ; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
1690 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1691 ; CHECK-NEXT: unreachable
1693 ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> [[A0:%.*]], ptr [[A1:%.*]], <4 x i32> [[IDX:%.*]], <4 x float> [[MASK:%.*]], i8 2)
1694 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
1695 ; CHECK-NEXT: ret <4 x float> [[RES]]
1697 %res = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %a0,
1698 ptr %a1, <4 x i32> %idx, <4 x float> %mask, i8 2) ;
1699 ret <4 x float> %res
1701 declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, ptr,
1702 <4 x i32>, <4 x float>, i8) nounwind readonly
1704 define <8 x float> @test_x86_avx2_gather_d_ps_256(<8 x float> %a0, ptr %a1, <8 x i32> %idx, <8 x float> %mask) #0 {
1705 ; CHECK-LABEL: @test_x86_avx2_gather_d_ps_256(
1706 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
1707 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
1708 ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
1709 ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
1710 ; CHECK-NEXT: call void @llvm.donothing()
1711 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP1]] to i256
1712 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP5]], 0
1713 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
1714 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1715 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i32> [[TMP3]] to i256
1716 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP6]], 0
1717 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
1718 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i32> [[TMP4]] to i256
1719 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP7]], 0
1720 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
1721 ; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
1723 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1724 ; CHECK-NEXT: unreachable
1726 ; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> [[A0:%.*]], ptr [[A1:%.*]], <8 x i32> [[IDX:%.*]], <8 x float> [[MASK:%.*]], i8 2)
1727 ; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
1728 ; CHECK-NEXT: ret <8 x float> [[RES]]
1730 %res = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0,
1731 ptr %a1, <8 x i32> %idx, <8 x float> %mask, i8 2) ;
1732 ret <8 x float> %res
1734 declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, ptr,
1735 <8 x i32>, <8 x float>, i8) nounwind readonly
1737 define <4 x float> @test_x86_avx2_gather_q_ps(<4 x float> %a0, ptr %a1, <2 x i64> %idx, <4 x float> %mask) #0 {
1738 ; CHECK-LABEL: @test_x86_avx2_gather_q_ps(
1739 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
1740 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1741 ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
1742 ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
1743 ; CHECK-NEXT: call void @llvm.donothing()
1744 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1745 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
1746 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
1747 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1748 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
1749 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0
1750 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
1751 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to i128
1752 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0
1753 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
1754 ; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
1756 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1757 ; CHECK-NEXT: unreachable
1759 ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> [[A0:%.*]], ptr [[A1:%.*]], <2 x i64> [[IDX:%.*]], <4 x float> [[MASK:%.*]], i8 2)
1760 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
1761 ; CHECK-NEXT: ret <4 x float> [[RES]]
1763 %res = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> %a0,
1764 ptr %a1, <2 x i64> %idx, <4 x float> %mask, i8 2) ;
1765 ret <4 x float> %res
1767 declare <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float>, ptr,
1768 <2 x i64>, <4 x float>, i8) nounwind readonly
1770 define <4 x float> @test_x86_avx2_gather_q_ps_256(<4 x float> %a0, ptr %a1, <4 x i64> %idx, <4 x float> %mask) #0 {
1771 ; CHECK-LABEL: @test_x86_avx2_gather_q_ps_256(
1772 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
1773 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1774 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
1775 ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8
1776 ; CHECK-NEXT: call void @llvm.donothing()
1777 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1778 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
1779 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
1780 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1781 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP3]] to i256
1782 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP6]], 0
1783 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
1784 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to i128
1785 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0
1786 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
1787 ; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
1789 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1790 ; CHECK-NEXT: unreachable
1792 ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> [[A0:%.*]], ptr [[A1:%.*]], <4 x i64> [[IDX:%.*]], <4 x float> [[MASK:%.*]], i8 2)
1793 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
1794 ; CHECK-NEXT: ret <4 x float> [[RES]]
1796 %res = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %a0,
1797 ptr %a1, <4 x i64> %idx, <4 x float> %mask, i8 2) ;
1798 ret <4 x float> %res
1800 declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float>, ptr,
1801 <4 x i64>, <4 x float>, i8) nounwind readonly
1803 define <2 x i64> @test_x86_avx2_gather_d_q(<2 x i64> %a0, ptr %a1, <4 x i32> %idx, <2 x i64> %mask) #0 {
1804 ; CHECK-LABEL: @test_x86_avx2_gather_d_q(
1805 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
1806 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1807 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
1808 ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
1809 ; CHECK-NEXT: call void @llvm.donothing()
1810 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1811 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
1812 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
1813 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1814 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
1815 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0
1816 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
1817 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP4]] to i128
1818 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0
1819 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
1820 ; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
1822 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1823 ; CHECK-NEXT: unreachable
1825 ; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> [[A0:%.*]], ptr [[A1:%.*]], <4 x i32> [[IDX:%.*]], <2 x i64> [[MASK:%.*]], i8 2)
1826 ; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
1827 ; CHECK-NEXT: ret <2 x i64> [[RES]]
1829 %res = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %a0,
1830 ptr %a1, <4 x i32> %idx, <2 x i64> %mask, i8 2) ;
1833 declare <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64>, ptr,
1834 <4 x i32>, <2 x i64>, i8) nounwind readonly
1836 define <4 x i64> @test_x86_avx2_gather_d_q_256(<4 x i64> %a0, ptr %a1, <4 x i32> %idx, <4 x i64> %mask) #0 {
1837 ; CHECK-LABEL: @test_x86_avx2_gather_d_q_256(
1838 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
1839 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
1840 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
1841 ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8
1842 ; CHECK-NEXT: call void @llvm.donothing()
1843 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i64> [[TMP1]] to i256
1844 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP5]], 0
1845 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
1846 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1847 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
1848 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0
1849 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
1850 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i64> [[TMP4]] to i256
1851 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP7]], 0
1852 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
1853 ; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
1855 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1856 ; CHECK-NEXT: unreachable
1858 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> [[A0:%.*]], ptr [[A1:%.*]], <4 x i32> [[IDX:%.*]], <4 x i64> [[MASK:%.*]], i8 2)
1859 ; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
1860 ; CHECK-NEXT: ret <4 x i64> [[RES]]
1862 %res = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %a0,
1863 ptr %a1, <4 x i32> %idx, <4 x i64> %mask, i8 2) ;
1866 declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64>, ptr,
1867 <4 x i32>, <4 x i64>, i8) nounwind readonly
1869 define <2 x i64> @test_x86_avx2_gather_q_q(<2 x i64> %a0, ptr %a1, <2 x i64> %idx, <2 x i64> %mask) #0 {
1870 ; CHECK-LABEL: @test_x86_avx2_gather_q_q(
1871 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
1872 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1873 ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
1874 ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
1875 ; CHECK-NEXT: call void @llvm.donothing()
1876 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1877 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
1878 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
1879 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1880 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
1881 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0
1882 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
1883 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP4]] to i128
1884 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0
1885 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
1886 ; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
1888 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1889 ; CHECK-NEXT: unreachable
1891 ; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> [[A0:%.*]], ptr [[A1:%.*]], <2 x i64> [[IDX:%.*]], <2 x i64> [[MASK:%.*]], i8 2)
1892 ; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
1893 ; CHECK-NEXT: ret <2 x i64> [[RES]]
1895 %res = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0,
1896 ptr %a1, <2 x i64> %idx, <2 x i64> %mask, i8 2) ;
1899 declare <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64>, ptr,
1900 <2 x i64>, <2 x i64>, i8) nounwind readonly
1902 define <4 x i64> @test_x86_avx2_gather_q_q_256(<4 x i64> %a0, ptr %a1, <4 x i64> %idx, <4 x i64> %mask) #0 {
1903 ; CHECK-LABEL: @test_x86_avx2_gather_q_q_256(
1904 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
1905 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
1906 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
1907 ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
1908 ; CHECK-NEXT: call void @llvm.donothing()
1909 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i64> [[TMP1]] to i256
1910 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP5]], 0
1911 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
1912 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1913 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP3]] to i256
1914 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP6]], 0
1915 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
1916 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i64> [[TMP4]] to i256
1917 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP7]], 0
1918 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
1919 ; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
1921 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1922 ; CHECK-NEXT: unreachable
1924 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> [[A0:%.*]], ptr [[A1:%.*]], <4 x i64> [[IDX:%.*]], <4 x i64> [[MASK:%.*]], i8 2)
1925 ; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
1926 ; CHECK-NEXT: ret <4 x i64> [[RES]]
1928 %res = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %a0,
1929 ptr %a1, <4 x i64> %idx, <4 x i64> %mask, i8 2) ;
1932 declare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64>, ptr,
1933 <4 x i64>, <4 x i64>, i8) nounwind readonly
1935 define <4 x i32> @test_x86_avx2_gather_d_d(<4 x i32> %a0, ptr %a1, <4 x i32> %idx, <4 x i32> %mask) #0 {
1936 ; CHECK-LABEL: @test_x86_avx2_gather_d_d(
1937 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
1938 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1939 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
1940 ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
1941 ; CHECK-NEXT: call void @llvm.donothing()
1942 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1943 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
1944 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
1945 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1946 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
1947 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0
1948 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
1949 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to i128
1950 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0
1951 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
1952 ; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
1954 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1955 ; CHECK-NEXT: unreachable
1957 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> [[A0:%.*]], ptr [[A1:%.*]], <4 x i32> [[IDX:%.*]], <4 x i32> [[MASK:%.*]], i8 2)
1958 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
1959 ; CHECK-NEXT: ret <4 x i32> [[RES]]
1961 %res = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> %a0,
1962 ptr %a1, <4 x i32> %idx, <4 x i32> %mask, i8 2) ;
1965 declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, ptr,
1966 <4 x i32>, <4 x i32>, i8) nounwind readonly
1968 define <8 x i32> @test_x86_avx2_gather_d_d_256(<8 x i32> %a0, ptr %a1, <8 x i32> %idx, <8 x i32> %mask) #0 {
1969 ; CHECK-LABEL: @test_x86_avx2_gather_d_d_256(
1970 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
1971 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
1972 ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
1973 ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
1974 ; CHECK-NEXT: call void @llvm.donothing()
1975 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP1]] to i256
1976 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP5]], 0
1977 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
1978 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1979 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i32> [[TMP3]] to i256
1980 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP6]], 0
1981 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
1982 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i32> [[TMP4]] to i256
1983 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP7]], 0
1984 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
1985 ; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
1987 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1988 ; CHECK-NEXT: unreachable
1990 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> [[A0:%.*]], ptr [[A1:%.*]], <8 x i32> [[IDX:%.*]], <8 x i32> [[MASK:%.*]], i8 2)
1991 ; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
1992 ; CHECK-NEXT: ret <8 x i32> [[RES]]
1994 %res = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> %a0,
1995 ptr %a1, <8 x i32> %idx, <8 x i32> %mask, i8 2) ;
1998 declare <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32>, ptr,
1999 <8 x i32>, <8 x i32>, i8) nounwind readonly
2001 define <4 x i32> @test_x86_avx2_gather_q_d(<4 x i32> %a0, ptr %a1, <2 x i64> %idx, <4 x i32> %mask) #0 {
2002 ; CHECK-LABEL: @test_x86_avx2_gather_q_d(
2003 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
2004 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
2005 ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
2006 ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
2007 ; CHECK-NEXT: call void @llvm.donothing()
2008 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
2009 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
2010 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
2011 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
2012 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
2013 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0
2014 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
2015 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to i128
2016 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0
2017 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
2018 ; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
2020 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
2021 ; CHECK-NEXT: unreachable
2023 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> [[A0:%.*]], ptr [[A1:%.*]], <2 x i64> [[IDX:%.*]], <4 x i32> [[MASK:%.*]], i8 2)
2024 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
2025 ; CHECK-NEXT: ret <4 x i32> [[RES]]
2027 %res = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> %a0,
2028 ptr %a1, <2 x i64> %idx, <4 x i32> %mask, i8 2) ;
2031 declare <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32>, ptr,
2032 <2 x i64>, <4 x i32>, i8) nounwind readonly
2034 define <4 x i32> @test_x86_avx2_gather_q_d_256(<4 x i32> %a0, ptr %a1, <4 x i64> %idx, <4 x i32> %mask) #0 {
2035 ; CHECK-LABEL: @test_x86_avx2_gather_q_d_256(
2036 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
2037 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
2038 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
2039 ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8
2040 ; CHECK-NEXT: call void @llvm.donothing()
2041 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
2042 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
2043 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
2044 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
2045 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP3]] to i256
2046 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP6]], 0
2047 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
2048 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to i128
2049 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0
2050 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
2051 ; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
2053 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
2054 ; CHECK-NEXT: unreachable
2056 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> [[A0:%.*]], ptr [[A1:%.*]], <4 x i64> [[IDX:%.*]], <4 x i32> [[MASK:%.*]], i8 2)
2057 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
2058 ; CHECK-NEXT: ret <4 x i32> [[RES]]
2060 %res = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> %a0,
2061 ptr %a1, <4 x i64> %idx, <4 x i32> %mask, i8 2) ;
2064 declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, ptr,
2065 <4 x i64>, <4 x i32>, i8) nounwind readonly
2067 define <8 x float> @test_gather_mask(<8 x float> %a0, ptr %a, <8 x i32> %idx, <8 x float> %mask, ptr nocapture %out) #0 {
2068 ; CHECK-LABEL: @test_gather_mask(
2069 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
2070 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
2071 ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
2072 ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
2073 ; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 104) to ptr), align 8
2074 ; CHECK-NEXT: call void @llvm.donothing()
2075 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i32> [[TMP1]] to i256
2076 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP6]], 0
2077 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
2078 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
2079 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i32> [[TMP3]] to i256
2080 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP7]], 0
2081 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
2082 ; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i32> [[TMP4]] to i256
2083 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP8]], 0
2084 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
2085 ; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]]
2087 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
2088 ; CHECK-NEXT: unreachable
2090 ; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> [[A0:%.*]], ptr [[A:%.*]], <8 x i32> [[IDX:%.*]], <8 x float> [[MASK:%.*]], i8 4)
2091 ; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i64 [[TMP5]], 0
2092 ; CHECK-NEXT: br i1 [[_MSCMP6]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF0]]
2094 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
2095 ; CHECK-NEXT: unreachable
2097 ; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[OUT:%.*]] to i64
2098 ; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], 87960930222080
2099 ; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr
2100 ; CHECK-NEXT: store <8 x i32> [[TMP4]], ptr [[TMP15]], align 4
2101 ; CHECK-NEXT: store <8 x float> [[MASK]], ptr [[OUT]], align 4
2102 ; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
2103 ; CHECK-NEXT: ret <8 x float> [[RES]]
2105 %res = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0,
2106 ptr %a, <8 x i32> %idx, <8 x float> %mask, i8 4) ;
2108 store <8 x float> %mask, ptr %out, align 4
2110 ret <8 x float> %res
2113 define <2 x i64> @test_mask_demanded_bits(<2 x i64> %a0, ptr %a1, <2 x i64> %idx, <2 x i1> %mask) #0 {
2114 ; CHECK-LABEL: @test_mask_demanded_bits(
2115 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i1>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
2116 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
2117 ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
2118 ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
2119 ; CHECK-NEXT: call void @llvm.donothing()
2120 ; CHECK-NEXT: [[_MSPROP:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
2121 ; CHECK-NEXT: [[MASK1:%.*]] = sext <2 x i1> [[MASK:%.*]] to <2 x i64>
2122 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2123 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
2124 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP3]], 0
2125 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
2126 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP4]] to i128
2127 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0
2128 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
2129 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSPROP]] to i128
2130 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0
2131 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
2132 ; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
2134 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
2135 ; CHECK-NEXT: unreachable
2137 ; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> [[A0:%.*]], ptr [[A1:%.*]], <2 x i64> [[IDX:%.*]], <2 x i64> [[MASK1]], i8 2)
2138 ; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
2139 ; CHECK-NEXT: ret <2 x i64> [[RES]]
2141 %mask1 = sext <2 x i1> %mask to <2 x i64>
2142 %res = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0,
2143 ptr %a1, <2 x i64> %idx, <2 x i64> %mask1, i8 2) ;
2147 attributes #0 = { sanitize_memory }