1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt %s -S -passes=msan 2>&1 | FileCheck %s
4 target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
5 target triple = "x86_64-unknown-linux-gnu"
7 define <16 x i16> @test_x86_avx2_packssdw(<8 x i32> %a0, <8 x i32> %a1) #0 {
8 ; CHECK-LABEL: @test_x86_avx2_packssdw(
9 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
10 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
11 ; CHECK-NEXT: call void @llvm.donothing()
12 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i32> [[TMP1]], zeroinitializer
13 ; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i32>
14 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i32> [[TMP2]], zeroinitializer
15 ; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i32>
16 ; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[TMP4]], <8 x i32> [[TMP6]])
17 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]])
18 ; CHECK-NEXT: store <16 x i16> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8
19 ; CHECK-NEXT: ret <16 x i16> [[RES]]
21 %res = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a0, <8 x i32> %a1) ; <<16 x i16>> [#uses=1]
24 declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone
27 define <16 x i16> @test_x86_avx2_packssdw_fold() #0 {
28 ; CHECK-LABEL: @test_x86_avx2_packssdw_fold(
29 ; CHECK-NEXT: call void @llvm.donothing()
30 ; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> zeroinitializer, <8 x i32> zeroinitializer)
31 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> zeroinitializer, <8 x i32> <i32 255, i32 32767, i32 65535, i32 -1, i32 -32767, i32 -65535, i32 0, i32 -256>)
32 ; CHECK-NEXT: store <16 x i16> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8
33 ; CHECK-NEXT: ret <16 x i16> [[RES]]
35 %res = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> zeroinitializer, <8 x i32> <i32 255, i32 32767, i32 65535, i32 -1, i32 -32767, i32 -65535, i32 0, i32 -256>)
40 define <32 x i8> @test_x86_avx2_packsswb(<16 x i16> %a0, <16 x i16> %a1) #0 {
41 ; CHECK-LABEL: @test_x86_avx2_packsswb(
42 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
43 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
44 ; CHECK-NEXT: call void @llvm.donothing()
45 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <16 x i16> [[TMP1]], zeroinitializer
46 ; CHECK-NEXT: [[TMP4:%.*]] = sext <16 x i1> [[TMP3]] to <16 x i16>
47 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i16> [[TMP2]], zeroinitializer
48 ; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i16>
49 ; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> [[TMP4]], <16 x i16> [[TMP6]])
50 ; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
51 ; CHECK-NEXT: store <32 x i8> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8
52 ; CHECK-NEXT: ret <32 x i8> [[RES]]
54 %res = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a0, <16 x i16> %a1) ; <<32 x i8>> [#uses=1]
57 declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone
60 define <32 x i8> @test_x86_avx2_packsswb_fold() #0 {
61 ; CHECK-LABEL: @test_x86_avx2_packsswb_fold(
62 ; CHECK-NEXT: call void @llvm.donothing()
63 ; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> zeroinitializer, <16 x i16> zeroinitializer)
64 ; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> <i16 0, i16 255, i16 256, i16 -1, i16 -1, i16 -255, i16 -256, i16 -32678, i16 0, i16 255, i16 256, i16 -1, i16 -1, i16 -255, i16 -256, i16 -32678>, <16 x i16> zeroinitializer)
65 ; CHECK-NEXT: store <32 x i8> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8
66 ; CHECK-NEXT: ret <32 x i8> [[RES]]
68 %res = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> <i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678, i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678>, <16 x i16> zeroinitializer)
73 define <32 x i8> @test_x86_avx2_packuswb(<16 x i16> %a0, <16 x i16> %a1) #0 {
74 ; CHECK-LABEL: @test_x86_avx2_packuswb(
75 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
76 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
77 ; CHECK-NEXT: call void @llvm.donothing()
78 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <16 x i16> [[TMP1]], zeroinitializer
79 ; CHECK-NEXT: [[TMP4:%.*]] = sext <16 x i1> [[TMP3]] to <16 x i16>
80 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i16> [[TMP2]], zeroinitializer
81 ; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i16>
82 ; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> [[TMP4]], <16 x i16> [[TMP6]])
83 ; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
84 ; CHECK-NEXT: store <32 x i8> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8
85 ; CHECK-NEXT: ret <32 x i8> [[RES]]
87 %res = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a0, <16 x i16> %a1) ; <<32 x i8>> [#uses=1]
90 declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone
93 define <32 x i8> @test_x86_avx2_packuswb_fold() #0 {
94 ; CHECK-LABEL: @test_x86_avx2_packuswb_fold(
95 ; CHECK-NEXT: call void @llvm.donothing()
96 ; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> zeroinitializer, <16 x i16> zeroinitializer)
97 ; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> <i16 0, i16 255, i16 256, i16 -1, i16 -1, i16 -255, i16 -256, i16 -32678, i16 0, i16 255, i16 256, i16 -1, i16 -1, i16 -255, i16 -256, i16 -32678>, <16 x i16> zeroinitializer)
98 ; CHECK-NEXT: store <32 x i8> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8
99 ; CHECK-NEXT: ret <32 x i8> [[RES]]
101 %res = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> <i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678, i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678>, <16 x i16> zeroinitializer)
106 define <32 x i8> @test_x86_avx2_pavg_b(<32 x i8> %a0, <32 x i8> %a1) #0 {
107 ; CHECK-LABEL: @test_x86_avx2_pavg_b(
108 ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8
109 ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
110 ; CHECK-NEXT: call void @llvm.donothing()
111 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]]
112 ; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]])
113 ; CHECK-NEXT: store <32 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
114 ; CHECK-NEXT: ret <32 x i8> [[RES]]
116 %res = call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
119 declare <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8>, <32 x i8>) nounwind readnone
122 define <16 x i16> @test_x86_avx2_pavg_w(<16 x i16> %a0, <16 x i16> %a1) #0 {
123 ; CHECK-LABEL: @test_x86_avx2_pavg_w(
124 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
125 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
126 ; CHECK-NEXT: call void @llvm.donothing()
127 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]]
128 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
129 ; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
130 ; CHECK-NEXT: ret <16 x i16> [[RES]]
132 %res = call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
135 declare <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16>, <16 x i16>) nounwind readnone
138 define <8 x i32> @test_x86_avx2_pmadd_wd(<16 x i16> %a0, <16 x i16> %a1) #0 {
139 ; CHECK-LABEL: @test_x86_avx2_pmadd_wd(
140 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
141 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
142 ; CHECK-NEXT: call void @llvm.donothing()
143 ; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]]
144 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i16> [[TMP3]] to <8 x i32>
145 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i32> [[TMP4]], zeroinitializer
146 ; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i32>
147 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
148 ; CHECK-NEXT: store <8 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8
149 ; CHECK-NEXT: ret <8 x i32> [[RES]]
151 %res = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %a0, <16 x i16> %a1) ; <<8 x i32>> [#uses=1]
154 declare <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>) nounwind readnone
157 define i32 @test_x86_avx2_pmovmskb(<32 x i8> %a0) #0 {
158 ; CHECK-LABEL: @test_x86_avx2_pmovmskb(
159 ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8
160 ; CHECK-NEXT: call void @llvm.donothing()
161 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <32 x i8> [[TMP1]] to i256
162 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0
163 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1:![0-9]+]]
165 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6:[0-9]+]]
166 ; CHECK-NEXT: unreachable
168 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> [[A0:%.*]])
169 ; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
170 ; CHECK-NEXT: ret i32 [[RES]]
172 %res = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> %a0) ; <i32> [#uses=1]
175 declare i32 @llvm.x86.avx2.pmovmskb(<32 x i8>) nounwind readnone
178 define <16 x i16> @test_x86_avx2_pmulh_w(<16 x i16> %a0, <16 x i16> %a1) #0 {
179 ; CHECK-LABEL: @test_x86_avx2_pmulh_w(
180 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
181 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
182 ; CHECK-NEXT: call void @llvm.donothing()
183 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]]
184 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
185 ; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
186 ; CHECK-NEXT: ret <16 x i16> [[RES]]
188 %res = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
191 declare <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16>, <16 x i16>) nounwind readnone
194 define <16 x i16> @test_x86_avx2_pmulhu_w(<16 x i16> %a0, <16 x i16> %a1) #0 {
195 ; CHECK-LABEL: @test_x86_avx2_pmulhu_w(
196 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
197 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
198 ; CHECK-NEXT: call void @llvm.donothing()
199 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]]
200 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
201 ; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
202 ; CHECK-NEXT: ret <16 x i16> [[RES]]
204 %res = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
207 declare <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16>, <16 x i16>) nounwind readnone
210 define <4 x i64> @test_x86_avx2_psad_bw(<32 x i8> %a0, <32 x i8> %a1) #0 {
211 ; CHECK-LABEL: @test_x86_avx2_psad_bw(
212 ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8
213 ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
214 ; CHECK-NEXT: call void @llvm.donothing()
215 ; CHECK-NEXT: [[TMP3:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]]
216 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i8> [[TMP3]] to <4 x i64>
217 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i64> [[TMP4]], zeroinitializer
218 ; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i64>
219 ; CHECK-NEXT: [[TMP7:%.*]] = lshr <4 x i64> [[TMP6]], splat (i64 48)
220 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]])
221 ; CHECK-NEXT: store <4 x i64> [[TMP7]], ptr @__msan_retval_tls, align 8
222 ; CHECK-NEXT: ret <4 x i64> [[RES]]
224 %res = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %a0, <32 x i8> %a1) ; <<4 x i64>> [#uses=1]
227 declare <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8>, <32 x i8>) nounwind readnone
230 define <8 x i32> @test_x86_avx2_psll_d(<8 x i32> %a0, <4 x i32> %a1) #0 {
231 ; CHECK-LABEL: @test_x86_avx2_psll_d(
232 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
233 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
234 ; CHECK-NEXT: call void @llvm.donothing()
235 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
236 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
237 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
238 ; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i256
239 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i256 [[TMP6]] to <8 x i32>
240 ; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
241 ; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i32> [[TMP8]], [[TMP7]]
242 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> [[A0:%.*]], <4 x i32> [[A1]])
243 ; CHECK-NEXT: store <8 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8
244 ; CHECK-NEXT: ret <8 x i32> [[RES]]
246 %res = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1]
249 declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone
252 define <4 x i64> @test_x86_avx2_psll_q(<4 x i64> %a0, <2 x i64> %a1) #0 {
253 ; CHECK-LABEL: @test_x86_avx2_psll_q(
254 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
255 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
256 ; CHECK-NEXT: call void @llvm.donothing()
257 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
258 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
259 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
260 ; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i256
261 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i256 [[TMP6]] to <4 x i64>
262 ; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> [[TMP1]], <2 x i64> [[A1:%.*]])
263 ; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i64> [[TMP8]], [[TMP7]]
264 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> [[A0:%.*]], <2 x i64> [[A1]])
265 ; CHECK-NEXT: store <4 x i64> [[TMP9]], ptr @__msan_retval_tls, align 8
266 ; CHECK-NEXT: ret <4 x i64> [[RES]]
268 %res = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1]
271 declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) nounwind readnone
274 define <16 x i16> @test_x86_avx2_psll_w(<16 x i16> %a0, <8 x i16> %a1) #0 {
275 ; CHECK-LABEL: @test_x86_avx2_psll_w(
276 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
277 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
278 ; CHECK-NEXT: call void @llvm.donothing()
279 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
280 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
281 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
282 ; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i256
283 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i256 [[TMP6]] to <16 x i16>
284 ; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> [[TMP1]], <8 x i16> [[A1:%.*]])
285 ; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i16> [[TMP8]], [[TMP7]]
286 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> [[A0:%.*]], <8 x i16> [[A1]])
287 ; CHECK-NEXT: store <16 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8
288 ; CHECK-NEXT: ret <16 x i16> [[RES]]
290 %res = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1]
293 declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) nounwind readnone
296 define <8 x i32> @test_x86_avx2_pslli_d(<8 x i32> %a0) #0 {
297 ; CHECK-LABEL: @test_x86_avx2_pslli_d(
298 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
299 ; CHECK-NEXT: call void @llvm.donothing()
300 ; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> [[TMP1]], i32 7)
301 ; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i32> [[TMP2]], zeroinitializer
302 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> [[A0:%.*]], i32 7)
303 ; CHECK-NEXT: store <8 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8
304 ; CHECK-NEXT: ret <8 x i32> [[RES]]
306 %res = call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1]
309 declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) nounwind readnone
312 define <4 x i64> @test_x86_avx2_pslli_q(<4 x i64> %a0) #0 {
313 ; CHECK-LABEL: @test_x86_avx2_pslli_q(
314 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
315 ; CHECK-NEXT: call void @llvm.donothing()
316 ; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> [[TMP1]], i32 7)
317 ; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i64> [[TMP2]], zeroinitializer
318 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> [[A0:%.*]], i32 7)
319 ; CHECK-NEXT: store <4 x i64> [[TMP3]], ptr @__msan_retval_tls, align 8
320 ; CHECK-NEXT: ret <4 x i64> [[RES]]
322 %res = call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
325 declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) nounwind readnone
328 define <16 x i16> @test_x86_avx2_pslli_w(<16 x i16> %a0) #0 {
329 ; CHECK-LABEL: @test_x86_avx2_pslli_w(
330 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
331 ; CHECK-NEXT: call void @llvm.donothing()
332 ; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> [[TMP1]], i32 7)
333 ; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i16> [[TMP2]], zeroinitializer
334 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> [[A0:%.*]], i32 7)
335 ; CHECK-NEXT: store <16 x i16> [[TMP3]], ptr @__msan_retval_tls, align 8
336 ; CHECK-NEXT: ret <16 x i16> [[RES]]
338 %res = call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1]
341 declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) nounwind readnone
344 define <8 x i32> @test_x86_avx2_psra_d(<8 x i32> %a0, <4 x i32> %a1) #0 {
345 ; CHECK-LABEL: @test_x86_avx2_psra_d(
346 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
347 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
348 ; CHECK-NEXT: call void @llvm.donothing()
349 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
350 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
351 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
352 ; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i256
353 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i256 [[TMP6]] to <8 x i32>
354 ; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
355 ; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i32> [[TMP8]], [[TMP7]]
356 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> [[A0:%.*]], <4 x i32> [[A1]])
357 ; CHECK-NEXT: store <8 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8
358 ; CHECK-NEXT: ret <8 x i32> [[RES]]
360 %res = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1]
363 declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) nounwind readnone
366 define <16 x i16> @test_x86_avx2_psra_w(<16 x i16> %a0, <8 x i16> %a1) #0 {
367 ; CHECK-LABEL: @test_x86_avx2_psra_w(
368 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
369 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
370 ; CHECK-NEXT: call void @llvm.donothing()
371 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
372 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
373 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
374 ; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i256
375 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i256 [[TMP6]] to <16 x i16>
376 ; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> [[TMP1]], <8 x i16> [[A1:%.*]])
377 ; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i16> [[TMP8]], [[TMP7]]
378 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> [[A0:%.*]], <8 x i16> [[A1]])
379 ; CHECK-NEXT: store <16 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8
380 ; CHECK-NEXT: ret <16 x i16> [[RES]]
382 %res = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1]
385 declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) nounwind readnone
388 define <8 x i32> @test_x86_avx2_psrai_d(<8 x i32> %a0) #0 {
389 ; CHECK-LABEL: @test_x86_avx2_psrai_d(
390 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
391 ; CHECK-NEXT: call void @llvm.donothing()
392 ; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> [[TMP1]], i32 7)
393 ; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i32> [[TMP2]], zeroinitializer
394 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> [[A0:%.*]], i32 7)
395 ; CHECK-NEXT: store <8 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8
396 ; CHECK-NEXT: ret <8 x i32> [[RES]]
398 %res = call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1]
401 declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32) nounwind readnone
404 define <16 x i16> @test_x86_avx2_psrai_w(<16 x i16> %a0) #0 {
405 ; CHECK-LABEL: @test_x86_avx2_psrai_w(
406 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
407 ; CHECK-NEXT: call void @llvm.donothing()
408 ; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> [[TMP1]], i32 7)
409 ; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i16> [[TMP2]], zeroinitializer
410 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> [[A0:%.*]], i32 7)
411 ; CHECK-NEXT: store <16 x i16> [[TMP3]], ptr @__msan_retval_tls, align 8
412 ; CHECK-NEXT: ret <16 x i16> [[RES]]
414 %res = call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1]
417 declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32) nounwind readnone
420 define <8 x i32> @test_x86_avx2_psrl_d(<8 x i32> %a0, <4 x i32> %a1) #0 {
421 ; CHECK-LABEL: @test_x86_avx2_psrl_d(
422 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
423 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
424 ; CHECK-NEXT: call void @llvm.donothing()
425 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
426 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
427 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
428 ; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i256
429 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i256 [[TMP6]] to <8 x i32>
430 ; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
431 ; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i32> [[TMP8]], [[TMP7]]
432 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> [[A0:%.*]], <4 x i32> [[A1]])
433 ; CHECK-NEXT: store <8 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8
434 ; CHECK-NEXT: ret <8 x i32> [[RES]]
436 %res = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1]
439 declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone
442 define <4 x i64> @test_x86_avx2_psrl_q(<4 x i64> %a0, <2 x i64> %a1) #0 {
443 ; CHECK-LABEL: @test_x86_avx2_psrl_q(
444 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
445 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
446 ; CHECK-NEXT: call void @llvm.donothing()
447 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
448 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
449 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
450 ; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i256
451 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i256 [[TMP6]] to <4 x i64>
452 ; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> [[TMP1]], <2 x i64> [[A1:%.*]])
453 ; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i64> [[TMP8]], [[TMP7]]
454 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> [[A0:%.*]], <2 x i64> [[A1]])
455 ; CHECK-NEXT: store <4 x i64> [[TMP9]], ptr @__msan_retval_tls, align 8
456 ; CHECK-NEXT: ret <4 x i64> [[RES]]
458 %res = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1]
461 declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) nounwind readnone
464 define <16 x i16> @test_x86_avx2_psrl_w(<16 x i16> %a0, <8 x i16> %a1) #0 {
465 ; CHECK-LABEL: @test_x86_avx2_psrl_w(
466 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
467 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
468 ; CHECK-NEXT: call void @llvm.donothing()
469 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
470 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
471 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
472 ; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i256
473 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i256 [[TMP6]] to <16 x i16>
474 ; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> [[TMP1]], <8 x i16> [[A1:%.*]])
475 ; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i16> [[TMP8]], [[TMP7]]
476 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> [[A0:%.*]], <8 x i16> [[A1]])
477 ; CHECK-NEXT: store <16 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8
478 ; CHECK-NEXT: ret <16 x i16> [[RES]]
480 %res = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1]
483 declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) nounwind readnone
486 define <16 x i16> @test_x86_avx2_psrl_w_load(<16 x i16> %a0, ptr %p) #0 {
487 ; CHECK-LABEL: @test_x86_avx2_psrl_w_load(
488 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
489 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
490 ; CHECK-NEXT: call void @llvm.donothing()
491 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
492 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
494 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
495 ; CHECK-NEXT: unreachable
497 ; CHECK-NEXT: [[A1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 16
498 ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64
499 ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
500 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
501 ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
502 ; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128
503 ; CHECK-NEXT: [[TMP9:%.*]] = trunc i128 [[TMP8]] to i64
504 ; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
505 ; CHECK-NEXT: [[TMP11:%.*]] = sext i1 [[TMP10]] to i256
506 ; CHECK-NEXT: [[TMP12:%.*]] = bitcast i256 [[TMP11]] to <16 x i16>
507 ; CHECK-NEXT: [[TMP13:%.*]] = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> [[TMP2]], <8 x i16> [[A1]])
508 ; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i16> [[TMP13]], [[TMP12]]
509 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> [[A0:%.*]], <8 x i16> [[A1]])
510 ; CHECK-NEXT: store <16 x i16> [[TMP14]], ptr @__msan_retval_tls, align 8
511 ; CHECK-NEXT: ret <16 x i16> [[RES]]
513 %a1 = load <8 x i16>, ptr %p
514 %res = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1]
519 define <8 x i32> @test_x86_avx2_psrli_d(<8 x i32> %a0) #0 {
520 ; CHECK-LABEL: @test_x86_avx2_psrli_d(
521 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
522 ; CHECK-NEXT: call void @llvm.donothing()
523 ; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> [[TMP1]], i32 7)
524 ; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i32> [[TMP2]], zeroinitializer
525 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> [[A0:%.*]], i32 7)
526 ; CHECK-NEXT: store <8 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8
527 ; CHECK-NEXT: ret <8 x i32> [[RES]]
529 %res = call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1]
532 declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) nounwind readnone
535 define <4 x i64> @test_x86_avx2_psrli_q(<4 x i64> %a0) #0 {
536 ; CHECK-LABEL: @test_x86_avx2_psrli_q(
537 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
538 ; CHECK-NEXT: call void @llvm.donothing()
539 ; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> [[TMP1]], i32 7)
540 ; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i64> [[TMP2]], zeroinitializer
541 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> [[A0:%.*]], i32 7)
542 ; CHECK-NEXT: store <4 x i64> [[TMP3]], ptr @__msan_retval_tls, align 8
543 ; CHECK-NEXT: ret <4 x i64> [[RES]]
545 %res = call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
548 declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) nounwind readnone
551 define <16 x i16> @test_x86_avx2_psrli_w(<16 x i16> %a0) #0 {
552 ; CHECK-LABEL: @test_x86_avx2_psrli_w(
553 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
554 ; CHECK-NEXT: call void @llvm.donothing()
555 ; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> [[TMP1]], i32 7)
556 ; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i16> [[TMP2]], zeroinitializer
557 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> [[A0:%.*]], i32 7)
558 ; CHECK-NEXT: store <16 x i16> [[TMP3]], ptr @__msan_retval_tls, align 8
559 ; CHECK-NEXT: ret <16 x i16> [[RES]]
561 %res = call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1]
564 declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) nounwind readnone
567 define <8 x i32> @test_x86_avx2_phadd_d(<8 x i32> %a0, <8 x i32> %a1) #0 {
568 ; CHECK-LABEL: @test_x86_avx2_phadd_d(
569 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
570 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
571 ; CHECK-NEXT: call void @llvm.donothing()
572 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]]
573 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]])
574 ; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
575 ; CHECK-NEXT: ret <8 x i32> [[RES]]
577 %res = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
580 declare <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32>, <8 x i32>) nounwind readnone
583 define <16 x i16> @test_x86_avx2_phadd_sw(<16 x i16> %a0, <16 x i16> %a1) #0 {
584 ; CHECK-LABEL: @test_x86_avx2_phadd_sw(
585 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
586 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
587 ; CHECK-NEXT: call void @llvm.donothing()
588 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]]
589 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
590 ; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
591 ; CHECK-NEXT: ret <16 x i16> [[RES]]
593 %res = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
596 declare <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16>, <16 x i16>) nounwind readnone
599 define <16 x i16> @test_x86_avx2_phadd_w(<16 x i16> %a0, <16 x i16> %a1) #0 {
600 ; CHECK-LABEL: @test_x86_avx2_phadd_w(
601 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
602 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
603 ; CHECK-NEXT: call void @llvm.donothing()
604 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]]
605 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
606 ; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
607 ; CHECK-NEXT: ret <16 x i16> [[RES]]
609 %res = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
612 declare <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16>, <16 x i16>) nounwind readnone
615 define <8 x i32> @test_x86_avx2_phsub_d(<8 x i32> %a0, <8 x i32> %a1) #0 {
616 ; CHECK-LABEL: @test_x86_avx2_phsub_d(
617 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
618 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
619 ; CHECK-NEXT: call void @llvm.donothing()
620 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]]
621 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]])
622 ; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
623 ; CHECK-NEXT: ret <8 x i32> [[RES]]
625 %res = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
628 declare <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32>, <8 x i32>) nounwind readnone
631 define <16 x i16> @test_x86_avx2_phsub_sw(<16 x i16> %a0, <16 x i16> %a1) #0 {
632 ; CHECK-LABEL: @test_x86_avx2_phsub_sw(
633 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
634 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
635 ; CHECK-NEXT: call void @llvm.donothing()
636 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]]
637 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
638 ; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
639 ; CHECK-NEXT: ret <16 x i16> [[RES]]
641 %res = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
644 declare <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16>, <16 x i16>) nounwind readnone
647 define <16 x i16> @test_x86_avx2_phsub_w(<16 x i16> %a0, <16 x i16> %a1) #0 {
648 ; CHECK-LABEL: @test_x86_avx2_phsub_w(
649 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
650 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
651 ; CHECK-NEXT: call void @llvm.donothing()
652 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]]
653 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
654 ; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
655 ; CHECK-NEXT: ret <16 x i16> [[RES]]
657 %res = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
660 declare <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16>, <16 x i16>) nounwind readnone
663 define <16 x i16> @test_x86_avx2_pmadd_ub_sw(<32 x i8> %a0, <32 x i8> %a1) #0 {
664 ; CHECK-LABEL: @test_x86_avx2_pmadd_ub_sw(
665 ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8
666 ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
667 ; CHECK-NEXT: call void @llvm.donothing()
668 ; CHECK-NEXT: [[TMP3:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]]
669 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i8> [[TMP3]] to <16 x i16>
670 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i16> [[TMP4]], zeroinitializer
671 ; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i16>
672 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]])
673 ; CHECK-NEXT: store <16 x i16> [[TMP6]], ptr @__msan_retval_tls, align 8
674 ; CHECK-NEXT: ret <16 x i16> [[RES]]
676 %res = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i16>> [#uses=1]
679 declare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) nounwind readnone
681 define <16 x i16> @test_x86_avx2_pmadd_ub_sw_load_op0(ptr %ptr, <32 x i8> %a1) #0 {
682 ; CHECK-LABEL: @test_x86_avx2_pmadd_ub_sw_load_op0(
683 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
684 ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
685 ; CHECK-NEXT: call void @llvm.donothing()
686 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
687 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
689 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
690 ; CHECK-NEXT: unreachable
692 ; CHECK-NEXT: [[A0:%.*]] = load <32 x i8>, ptr [[PTR:%.*]], align 32
693 ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR]] to i64
694 ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
695 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
696 ; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i8>, ptr [[TMP7]], align 32
697 ; CHECK-NEXT: [[TMP8:%.*]] = or <32 x i8> [[_MSLD]], [[TMP2]]
698 ; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i8> [[TMP8]] to <16 x i16>
699 ; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i16> [[TMP9]], zeroinitializer
700 ; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i16>
701 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1:%.*]])
702 ; CHECK-NEXT: store <16 x i16> [[TMP11]], ptr @__msan_retval_tls, align 8
703 ; CHECK-NEXT: ret <16 x i16> [[RES]]
705 %a0 = load <32 x i8>, ptr %ptr
706 %res = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i16>> [#uses=1]
710 define <16 x i16> @test_x86_avx2_pmul_hr_sw(<16 x i16> %a0, <16 x i16> %a1) #0 {
711 ; CHECK-LABEL: @test_x86_avx2_pmul_hr_sw(
712 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
713 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
714 ; CHECK-NEXT: call void @llvm.donothing()
715 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]]
716 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
717 ; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
718 ; CHECK-NEXT: ret <16 x i16> [[RES]]
720 %res = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
723 declare <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16>, <16 x i16>) nounwind readnone
726 define <32 x i8> @test_x86_avx2_pshuf_b(<32 x i8> %a0, <32 x i8> %a1) #0 {
727 ; CHECK-LABEL: @test_x86_avx2_pshuf_b(
728 ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8
729 ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
730 ; CHECK-NEXT: call void @llvm.donothing()
731 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]]
732 ; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]])
733 ; CHECK-NEXT: store <32 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
734 ; CHECK-NEXT: ret <32 x i8> [[RES]]
736 %res = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i8>> [#uses=1]
739 declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) nounwind readnone
742 define <32 x i8> @test_x86_avx2_psign_b(<32 x i8> %a0, <32 x i8> %a1) #0 {
743 ; CHECK-LABEL: @test_x86_avx2_psign_b(
744 ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8
745 ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
746 ; CHECK-NEXT: call void @llvm.donothing()
747 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]]
748 ; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]])
749 ; CHECK-NEXT: store <32 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
750 ; CHECK-NEXT: ret <32 x i8> [[RES]]
752 %res = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
755 declare <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8>, <32 x i8>) nounwind readnone
758 define <8 x i32> @test_x86_avx2_psign_d(<8 x i32> %a0, <8 x i32> %a1) #0 {
759 ; CHECK-LABEL: @test_x86_avx2_psign_d(
760 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
761 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
762 ; CHECK-NEXT: call void @llvm.donothing()
763 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]]
764 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]])
765 ; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
766 ; CHECK-NEXT: ret <8 x i32> [[RES]]
768 %res = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %a0, <8 x i32> %a1) ; <<4 x i32>> [#uses=1]
771 declare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) nounwind readnone
774 define <16 x i16> @test_x86_avx2_psign_w(<16 x i16> %a0, <16 x i16> %a1) #0 {
775 ; CHECK-LABEL: @test_x86_avx2_psign_w(
776 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
777 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
778 ; CHECK-NEXT: call void @llvm.donothing()
779 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]]
780 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
781 ; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
782 ; CHECK-NEXT: ret <16 x i16> [[RES]]
784 %res = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
787 declare <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16>, <16 x i16>) nounwind readnone
790 define <16 x i16> @test_x86_avx2_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) #0 {
791 ; CHECK-LABEL: @test_x86_avx2_mpsadbw(
792 ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8
793 ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
794 ; CHECK-NEXT: call void @llvm.donothing()
795 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <32 x i8> [[TMP1]] to i256
796 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0
797 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i8> [[TMP2]] to i256
798 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0
799 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
800 ; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
802 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
803 ; CHECK-NEXT: unreachable
805 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]], i8 7)
806 ; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
807 ; CHECK-NEXT: ret <16 x i16> [[RES]]
809 %res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i8 7) ; <<16 x i16>> [#uses=1]
812 declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i8) nounwind readnone
814 define <16 x i16> @test_x86_avx2_mpsadbw_load_op0(ptr %ptr, <32 x i8> %a1) #0 {
815 ; CHECK-LABEL: @test_x86_avx2_mpsadbw_load_op0(
816 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
817 ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
818 ; CHECK-NEXT: call void @llvm.donothing()
819 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
820 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
822 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
823 ; CHECK-NEXT: unreachable
825 ; CHECK-NEXT: [[A0:%.*]] = load <32 x i8>, ptr [[PTR:%.*]], align 32
826 ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR]] to i64
827 ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
828 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
829 ; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i8>, ptr [[TMP7]], align 32
830 ; CHECK-NEXT: [[TMP8:%.*]] = bitcast <32 x i8> [[_MSLD]] to i256
831 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP8]], 0
832 ; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i8> [[TMP2]] to i256
833 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP9]], 0
834 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
835 ; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
837 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
838 ; CHECK-NEXT: unreachable
840 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> [[A0]], <32 x i8> [[A1:%.*]], i8 7)
841 ; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
842 ; CHECK-NEXT: ret <16 x i16> [[RES]]
844 %a0 = load <32 x i8>, ptr %ptr
845 %res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i8 7) ; <<16 x i16>> [#uses=1]
849 define <16 x i16> @test_x86_avx2_packusdw(<8 x i32> %a0, <8 x i32> %a1) #0 {
850 ; CHECK-LABEL: @test_x86_avx2_packusdw(
851 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
852 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
853 ; CHECK-NEXT: call void @llvm.donothing()
854 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i32> [[TMP1]], zeroinitializer
855 ; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i32>
856 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i32> [[TMP2]], zeroinitializer
857 ; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i32>
858 ; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[TMP4]], <8 x i32> [[TMP6]])
859 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]])
860 ; CHECK-NEXT: store <16 x i16> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8
861 ; CHECK-NEXT: ret <16 x i16> [[RES]]
863 %res = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a0, <8 x i32> %a1) ; <<16 x i16>> [#uses=1]
866 declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone
869 define <16 x i16> @test_x86_avx2_packusdw_fold() #0 {
870 ; CHECK-LABEL: @test_x86_avx2_packusdw_fold(
871 ; CHECK-NEXT: call void @llvm.donothing()
872 ; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> zeroinitializer, <8 x i32> zeroinitializer)
873 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> zeroinitializer, <8 x i32> <i32 255, i32 32767, i32 65535, i32 -1, i32 -32767, i32 -65535, i32 0, i32 -256>)
874 ; CHECK-NEXT: store <16 x i16> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8
875 ; CHECK-NEXT: ret <16 x i16> [[RES]]
877 %res = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> zeroinitializer, <8 x i32> <i32 255, i32 32767, i32 65535, i32 -1, i32 -32767, i32 -65535, i32 0, i32 -256>)
882 define <32 x i8> @test_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) #0 {
883 ; CHECK-LABEL: @test_x86_avx2_pblendvb(
884 ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
885 ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
886 ; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8
887 ; CHECK-NEXT: call void @llvm.donothing()
888 ; CHECK-NEXT: [[TMP4:%.*]] = ashr <32 x i8> [[A2:%.*]], splat (i8 7)
889 ; CHECK-NEXT: [[TMP5:%.*]] = trunc <32 x i8> [[TMP4]] to <32 x i1>
890 ; CHECK-NEXT: [[TMP6:%.*]] = ashr <32 x i8> [[TMP1]], splat (i8 7)
891 ; CHECK-NEXT: [[TMP7:%.*]] = trunc <32 x i8> [[TMP6]] to <32 x i1>
892 ; CHECK-NEXT: [[TMP8:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> [[TMP2]], <32 x i8> [[TMP3]]
893 ; CHECK-NEXT: [[TMP9:%.*]] = xor <32 x i8> [[A1:%.*]], [[A0:%.*]]
894 ; CHECK-NEXT: [[TMP10:%.*]] = or <32 x i8> [[TMP9]], [[TMP2]]
895 ; CHECK-NEXT: [[TMP11:%.*]] = or <32 x i8> [[TMP10]], [[TMP3]]
896 ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP7]], <32 x i8> [[TMP11]], <32 x i8> [[TMP8]]
897 ; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> [[A0]], <32 x i8> [[A1]], <32 x i8> [[A2]])
898 ; CHECK-NEXT: store <32 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
899 ; CHECK-NEXT: ret <32 x i8> [[RES]]
901 %res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) ; <<32 x i8>> [#uses=1]
904 declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>) nounwind readnone
907 define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) #0 {
908 ; CHECK-LABEL: @test_x86_avx2_pblendw(
909 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
910 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
911 ; CHECK-NEXT: call void @llvm.donothing()
912 ; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> [[TMP2]], <16 x i32> <i32 16, i32 17, i32 18, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 11, i32 12, i32 13, i32 14, i32 15>
913 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]], <16 x i32> <i32 16, i32 17, i32 18, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 11, i32 12, i32 13, i32 14, i32 15>
914 ; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
915 ; CHECK-NEXT: ret <16 x i16> [[TMP3]]
917 %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i8 7) ; <<16 x i16>> [#uses=1]
920 declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i8) nounwind readnone
923 define <4 x i32> @test_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) #0 {
924 ; CHECK-LABEL: @test_x86_avx2_pblendd_128(
925 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
926 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
927 ; CHECK-NEXT: call void @llvm.donothing()
928 ; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 4, i32 5, i32 6, i32 3>
929 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]], <4 x i32> <i32 4, i32 5, i32 6, i32 3>
930 ; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
931 ; CHECK-NEXT: ret <4 x i32> [[TMP3]]
933 %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i8 7) ; <<4 x i32>> [#uses=1]
936 declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i8) nounwind readnone
939 define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) #0 {
940 ; CHECK-LABEL: @test_x86_avx2_pblendd_256(
941 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
942 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
943 ; CHECK-NEXT: call void @llvm.donothing()
944 ; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 8, i32 9, i32 10, i32 3, i32 4, i32 5, i32 6, i32 7>
945 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]], <8 x i32> <i32 8, i32 9, i32 10, i32 3, i32 4, i32 5, i32 6, i32 7>
946 ; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
947 ; CHECK-NEXT: ret <8 x i32> [[TMP3]]
949 %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1]
952 declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
955 define <8 x i32> @test_x86_avx2_permd(<8 x i32> %a0, <8 x i32> %a1) #0 {
956 ; CHECK-LABEL: @test_x86_avx2_permd(
957 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
958 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
959 ; CHECK-NEXT: call void @llvm.donothing()
960 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]]
961 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> [[A0:%.*]], <8 x i32> [[A1:%.*]])
962 ; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
963 ; CHECK-NEXT: ret <8 x i32> [[RES]]
965 %res = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
968 declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly
971 define <8 x float> @test_x86_avx2_permps(<8 x float> %a0, <8 x i32> %a1) #0 {
972 ; CHECK-LABEL: @test_x86_avx2_permps(
973 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
974 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
975 ; CHECK-NEXT: call void @llvm.donothing()
976 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP1]] to i256
977 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0
978 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
979 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0
980 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
981 ; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
983 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
984 ; CHECK-NEXT: unreachable
986 ; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx2.permps(<8 x float> [[A0:%.*]], <8 x i32> [[A1:%.*]])
987 ; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
988 ; CHECK-NEXT: ret <8 x float> [[RES]]
990 %res = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1]
993 declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) nounwind readonly
996 define <2 x i64> @test_x86_avx2_maskload_q(ptr %a0, <2 x i64> %a1) #0 {
997 ; CHECK-LABEL: @test_x86_avx2_maskload_q(
998 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
999 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1000 ; CHECK-NEXT: call void @llvm.donothing()
1001 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1002 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1003 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP3]], 0
1004 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1005 ; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
1007 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1008 ; CHECK-NEXT: unreachable
1010 ; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.avx2.maskload.q(ptr [[A0:%.*]], <2 x i64> [[A1:%.*]])
1011 ; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
1012 ; CHECK-NEXT: ret <2 x i64> [[RES]]
1014 %res = call <2 x i64> @llvm.x86.avx2.maskload.q(ptr %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
1017 declare <2 x i64> @llvm.x86.avx2.maskload.q(ptr, <2 x i64>) nounwind readonly
1020 define <4 x i64> @test_x86_avx2_maskload_q_256(ptr %a0, <4 x i64> %a1) #0 {
1021 ; CHECK-LABEL: @test_x86_avx2_maskload_q_256(
1022 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
1023 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1024 ; CHECK-NEXT: call void @llvm.donothing()
1025 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1026 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[TMP2]] to i256
1027 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP3]], 0
1028 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1029 ; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
1031 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1032 ; CHECK-NEXT: unreachable
1034 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.maskload.q.256(ptr [[A0:%.*]], <4 x i64> [[A1:%.*]])
1035 ; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
1036 ; CHECK-NEXT: ret <4 x i64> [[RES]]
1038 %res = call <4 x i64> @llvm.x86.avx2.maskload.q.256(ptr %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1]
1041 declare <4 x i64> @llvm.x86.avx2.maskload.q.256(ptr, <4 x i64>) nounwind readonly
1044 define <4 x i32> @test_x86_avx2_maskload_d(ptr %a0, <4 x i32> %a1) #0 {
1045 ; CHECK-LABEL: @test_x86_avx2_maskload_d(
1046 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
1047 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1048 ; CHECK-NEXT: call void @llvm.donothing()
1049 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1050 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1051 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP3]], 0
1052 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1053 ; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
1055 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1056 ; CHECK-NEXT: unreachable
1058 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx2.maskload.d(ptr [[A0:%.*]], <4 x i32> [[A1:%.*]])
1059 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
1060 ; CHECK-NEXT: ret <4 x i32> [[RES]]
1062 %res = call <4 x i32> @llvm.x86.avx2.maskload.d(ptr %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1065 declare <4 x i32> @llvm.x86.avx2.maskload.d(ptr, <4 x i32>) nounwind readonly
1068 define <8 x i32> @test_x86_avx2_maskload_d_256(ptr %a0, <8 x i32> %a1) #0 {
1069 ; CHECK-LABEL: @test_x86_avx2_maskload_d_256(
1070 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
1071 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1072 ; CHECK-NEXT: call void @llvm.donothing()
1073 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1074 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
1075 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP3]], 0
1076 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1077 ; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
1079 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1080 ; CHECK-NEXT: unreachable
1082 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.maskload.d.256(ptr [[A0:%.*]], <8 x i32> [[A1:%.*]])
1083 ; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
1084 ; CHECK-NEXT: ret <8 x i32> [[RES]]
1086 %res = call <8 x i32> @llvm.x86.avx2.maskload.d.256(ptr %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
1089 declare <8 x i32> @llvm.x86.avx2.maskload.d.256(ptr, <8 x i32>) nounwind readonly
1092 define void @test_x86_avx2_maskstore_q(ptr %a0, <2 x i64> %a1, <2 x i64> %a2) #0 {
1093 ; CHECK-LABEL: @test_x86_avx2_maskstore_q(
1094 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
1095 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1096 ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
1097 ; CHECK-NEXT: call void @llvm.donothing()
1098 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1099 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1100 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
1101 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1102 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
1103 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP5]], 0
1104 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
1105 ; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
1107 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1108 ; CHECK-NEXT: unreachable
1110 ; CHECK-NEXT: call void @llvm.x86.avx2.maskstore.q(ptr [[A0:%.*]], <2 x i64> [[A1:%.*]], <2 x i64> [[A2:%.*]])
1111 ; CHECK-NEXT: ret void
1113 call void @llvm.x86.avx2.maskstore.q(ptr %a0, <2 x i64> %a1, <2 x i64> %a2)
1116 declare void @llvm.x86.avx2.maskstore.q(ptr, <2 x i64>, <2 x i64>) nounwind
1119 define void @test_x86_avx2_maskstore_q_256(ptr %a0, <4 x i64> %a1, <4 x i64> %a2) #0 {
1120 ; CHECK-LABEL: @test_x86_avx2_maskstore_q_256(
1121 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
1122 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1123 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
1124 ; CHECK-NEXT: call void @llvm.donothing()
1125 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1126 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i64> [[TMP2]] to i256
1127 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0
1128 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1129 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i64> [[TMP3]] to i256
1130 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP5]], 0
1131 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
1132 ; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
1134 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1135 ; CHECK-NEXT: unreachable
1137 ; CHECK-NEXT: call void @llvm.x86.avx2.maskstore.q.256(ptr [[A0:%.*]], <4 x i64> [[A1:%.*]], <4 x i64> [[A2:%.*]])
1138 ; CHECK-NEXT: ret void
1140 call void @llvm.x86.avx2.maskstore.q.256(ptr %a0, <4 x i64> %a1, <4 x i64> %a2)
1143 declare void @llvm.x86.avx2.maskstore.q.256(ptr, <4 x i64>, <4 x i64>) nounwind
1146 define void @test_x86_avx2_maskstore_d(ptr %a0, <4 x i32> %a1, <4 x i32> %a2) #0 {
1147 ; CHECK-LABEL: @test_x86_avx2_maskstore_d(
1148 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
1149 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1150 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
1151 ; CHECK-NEXT: call void @llvm.donothing()
1152 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1153 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1154 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
1155 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1156 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
1157 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP5]], 0
1158 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
1159 ; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
1161 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1162 ; CHECK-NEXT: unreachable
1164 ; CHECK-NEXT: call void @llvm.x86.avx2.maskstore.d(ptr [[A0:%.*]], <4 x i32> [[A1:%.*]], <4 x i32> [[A2:%.*]])
1165 ; CHECK-NEXT: ret void
1167 call void @llvm.x86.avx2.maskstore.d(ptr %a0, <4 x i32> %a1, <4 x i32> %a2)
1170 declare void @llvm.x86.avx2.maskstore.d(ptr, <4 x i32>, <4 x i32>) nounwind
1173 define void @test_x86_avx2_maskstore_d_256(ptr %a0, <8 x i32> %a1, <8 x i32> %a2) #0 {
1174 ; CHECK-LABEL: @test_x86_avx2_maskstore_d_256(
1175 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
1176 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1177 ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
1178 ; CHECK-NEXT: call void @llvm.donothing()
1179 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1180 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
1181 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0
1182 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1183 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP3]] to i256
1184 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP5]], 0
1185 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
1186 ; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
1188 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1189 ; CHECK-NEXT: unreachable
1191 ; CHECK-NEXT: call void @llvm.x86.avx2.maskstore.d.256(ptr [[A0:%.*]], <8 x i32> [[A1:%.*]], <8 x i32> [[A2:%.*]])
1192 ; CHECK-NEXT: ret void
1194 call void @llvm.x86.avx2.maskstore.d.256(ptr %a0, <8 x i32> %a1, <8 x i32> %a2)
1197 declare void @llvm.x86.avx2.maskstore.d.256(ptr, <8 x i32>, <8 x i32>) nounwind
1200 define <4 x i32> @test_x86_avx2_psllv_d(<4 x i32> %a0, <4 x i32> %a1) #0 {
1201 ; CHECK-LABEL: @test_x86_avx2_psllv_d(
1202 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
1203 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1204 ; CHECK-NEXT: call void @llvm.donothing()
1205 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
1206 ; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
1207 ; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
1208 ; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i32> [[TMP5]], [[TMP4]]
1209 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> [[A0:%.*]], <4 x i32> [[A1]])
1210 ; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8
1211 ; CHECK-NEXT: ret <4 x i32> [[RES]]
1213 %res = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1217 define <4 x i32> @test_x86_avx2_psllv_d_const() #0 {
1218 ; CHECK-LABEL: @test_x86_avx2_psllv_d_const(
1219 ; CHECK-NEXT: call void @llvm.donothing()
1220 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 0, i32 33, i32 -1>)
1221 ; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP1]], zeroinitializer
1222 ; CHECK-NEXT: [[RES0:%.*]] = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> <i32 2, i32 9, i32 0, i32 -1>, <4 x i32> <i32 1, i32 0, i32 33, i32 -1>)
1223 ; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 1, i32 1, i32 -1>)
1224 ; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i32> [[TMP3]], zeroinitializer
1225 ; CHECK-NEXT: [[RES1:%.*]] = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> <i32 1, i32 1, i32 1, i32 -1>, <4 x i32> <i32 1, i32 1, i32 1, i32 -1>)
1226 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP2]], [[TMP4]]
1227 ; CHECK-NEXT: [[RES2:%.*]] = add <4 x i32> [[RES0]], [[RES1]]
1228 ; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
1229 ; CHECK-NEXT: ret <4 x i32> [[RES2]]
1231 %res0 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> <i32 2, i32 9, i32 0, i32 -1>, <4 x i32> <i32 1, i32 0, i32 33, i32 -1>)
1232 %res1 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> <i32 1, i32 1, i32 1, i32 -1>, <4 x i32> <i32 1, i32 1, i32 1, i32 -1>)
1233 %res2 = add <4 x i32> %res0, %res1
1236 declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) nounwind readnone
1239 define <8 x i32> @test_x86_avx2_psllv_d_256(<8 x i32> %a0, <8 x i32> %a1) #0 {
1240 ; CHECK-LABEL: @test_x86_avx2_psllv_d_256(
1241 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
1242 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
1243 ; CHECK-NEXT: call void @llvm.donothing()
1244 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i32> [[TMP2]], zeroinitializer
1245 ; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i32>
1246 ; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> [[TMP1]], <8 x i32> [[A1:%.*]])
1247 ; CHECK-NEXT: [[TMP6:%.*]] = or <8 x i32> [[TMP5]], [[TMP4]]
1248 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> [[A0:%.*]], <8 x i32> [[A1]])
1249 ; CHECK-NEXT: store <8 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8
1250 ; CHECK-NEXT: ret <8 x i32> [[RES]]
1252 %res = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
1256 define <8 x i32> @test_x86_avx2_psllv_d_256_const() #0 {
1257 ; CHECK-LABEL: @test_x86_avx2_psllv_d_256_const(
1258 ; CHECK-NEXT: call void @llvm.donothing()
1259 ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 0, i32 33, i32 -1, i32 2, i32 0, i32 34, i32 -2>)
1260 ; CHECK-NEXT: [[TMP2:%.*]] = or <8 x i32> [[TMP1]], zeroinitializer
1261 ; CHECK-NEXT: [[RES0:%.*]] = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> <i32 2, i32 9, i32 0, i32 -1, i32 3, i32 7, i32 -1, i32 0>, <8 x i32> <i32 1, i32 0, i32 33, i32 -1, i32 2, i32 0, i32 34, i32 -2>)
1262 ; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 -1>)
1263 ; CHECK-NEXT: [[TMP4:%.*]] = or <8 x i32> [[TMP3]], zeroinitializer
1264 ; CHECK-NEXT: [[RES1:%.*]] = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 -1>, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 -1>)
1265 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP2]], [[TMP4]]
1266 ; CHECK-NEXT: [[RES2:%.*]] = add <8 x i32> [[RES0]], [[RES1]]
1267 ; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
1268 ; CHECK-NEXT: ret <8 x i32> [[RES2]]
1270 %res0 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> <i32 2, i32 9, i32 0, i32 -1, i32 3, i32 7, i32 -1, i32 0>, <8 x i32> <i32 1, i32 0, i32 33, i32 -1,i32 2, i32 0, i32 34, i32 -2>)
1271 %res1 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 -1>, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 -1>)
1272 %res2 = add <8 x i32> %res0, %res1
1275 declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) nounwind readnone
1278 define <2 x i64> @test_x86_avx2_psllv_q(<2 x i64> %a0, <2 x i64> %a1) #0 {
1279 ; CHECK-LABEL: @test_x86_avx2_psllv_q(
1280 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
1281 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1282 ; CHECK-NEXT: call void @llvm.donothing()
1283 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer
1284 ; CHECK-NEXT: [[TMP4:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
1285 ; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> [[TMP1]], <2 x i64> [[A1:%.*]])
1286 ; CHECK-NEXT: [[TMP6:%.*]] = or <2 x i64> [[TMP5]], [[TMP4]]
1287 ; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> [[A0:%.*]], <2 x i64> [[A1]])
1288 ; CHECK-NEXT: store <2 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8
1289 ; CHECK-NEXT: ret <2 x i64> [[RES]]
1291 %res = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
1294 define <2 x i64> @test_x86_avx2_psllv_q_const() #0 {
1295 ; CHECK-LABEL: @test_x86_avx2_psllv_q_const(
1296 ; CHECK-NEXT: call void @llvm.donothing()
1297 ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> zeroinitializer, <2 x i64> <i64 1, i64 -1>)
1298 ; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i64> [[TMP1]], zeroinitializer
1299 ; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> <i64 4, i64 -1>, <2 x i64> <i64 1, i64 -1>)
1300 ; CHECK-NEXT: store <2 x i64> [[TMP2]], ptr @__msan_retval_tls, align 8
1301 ; CHECK-NEXT: ret <2 x i64> [[RES]]
1303 %res = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> <i64 4, i64 -1>, <2 x i64> <i64 1, i64 -1>)
1306 declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) nounwind readnone
1309 define <4 x i64> @test_x86_avx2_psllv_q_256(<4 x i64> %a0, <4 x i64> %a1) #0 {
1310 ; CHECK-LABEL: @test_x86_avx2_psllv_q_256(
1311 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
1312 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
1313 ; CHECK-NEXT: call void @llvm.donothing()
1314 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i64> [[TMP2]], zeroinitializer
1315 ; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i64>
1316 ; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> [[TMP1]], <4 x i64> [[A1:%.*]])
1317 ; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i64> [[TMP5]], [[TMP4]]
1318 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> [[A0:%.*]], <4 x i64> [[A1]])
1319 ; CHECK-NEXT: store <4 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8
1320 ; CHECK-NEXT: ret <4 x i64> [[RES]]
1322 %res = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1]
1326 define <4 x i64> @test_x86_avx2_psllv_q_256_const() #0 {
1327 ; CHECK-LABEL: @test_x86_avx2_psllv_q_256_const(
1328 ; CHECK-NEXT: call void @llvm.donothing()
1329 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> zeroinitializer, <4 x i64> <i64 1, i64 1, i64 1, i64 -1>)
1330 ; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i64> [[TMP1]], zeroinitializer
1331 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> <i64 4, i64 4, i64 4, i64 -1>, <4 x i64> <i64 1, i64 1, i64 1, i64 -1>)
1332 ; CHECK-NEXT: store <4 x i64> [[TMP2]], ptr @__msan_retval_tls, align 8
1333 ; CHECK-NEXT: ret <4 x i64> [[RES]]
1335 %res = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> <i64 4, i64 4, i64 4, i64 -1>, <4 x i64> <i64 1, i64 1, i64 1, i64 -1>)
1338 declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) nounwind readnone
1341 define <4 x i32> @test_x86_avx2_psrlv_d(<4 x i32> %a0, <4 x i32> %a1) #0 {
1342 ; CHECK-LABEL: @test_x86_avx2_psrlv_d(
1343 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
1344 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1345 ; CHECK-NEXT: call void @llvm.donothing()
1346 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
1347 ; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
1348 ; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
1349 ; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i32> [[TMP5]], [[TMP4]]
1350 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> [[A0:%.*]], <4 x i32> [[A1]])
1351 ; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8
1352 ; CHECK-NEXT: ret <4 x i32> [[RES]]
1354 %res = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1358 define <4 x i32> @test_x86_avx2_psrlv_d_const() #0 {
1359 ; CHECK-LABEL: @test_x86_avx2_psrlv_d_const(
1360 ; CHECK-NEXT: call void @llvm.donothing()
1361 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 0, i32 33, i32 -1>)
1362 ; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP1]], zeroinitializer
1363 ; CHECK-NEXT: [[RES0:%.*]] = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> <i32 2, i32 9, i32 0, i32 -1>, <4 x i32> <i32 1, i32 0, i32 33, i32 -1>)
1364 ; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 1, i32 1, i32 -1>)
1365 ; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i32> [[TMP3]], zeroinitializer
1366 ; CHECK-NEXT: [[RES1:%.*]] = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> <i32 4, i32 4, i32 4, i32 -1>, <4 x i32> <i32 1, i32 1, i32 1, i32 -1>)
1367 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP2]], [[TMP4]]
1368 ; CHECK-NEXT: [[RES2:%.*]] = add <4 x i32> [[RES0]], [[RES1]]
1369 ; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
1370 ; CHECK-NEXT: ret <4 x i32> [[RES2]]
1372 %res0 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> <i32 2, i32 9, i32 0, i32 -1>, <4 x i32> <i32 1, i32 0, i32 33, i32 -1>)
1373 %res1 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> <i32 4, i32 4, i32 4, i32 -1>, <4 x i32> <i32 1, i32 1, i32 1, i32 -1>)
1374 %res2 = add <4 x i32> %res0, %res1
1377 declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) nounwind readnone
1380 define <8 x i32> @test_x86_avx2_psrlv_d_256(<8 x i32> %a0, <8 x i32> %a1) #0 {
1381 ; CHECK-LABEL: @test_x86_avx2_psrlv_d_256(
1382 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
1383 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
1384 ; CHECK-NEXT: call void @llvm.donothing()
1385 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i32> [[TMP2]], zeroinitializer
1386 ; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i32>
1387 ; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> [[TMP1]], <8 x i32> [[A1:%.*]])
1388 ; CHECK-NEXT: [[TMP6:%.*]] = or <8 x i32> [[TMP5]], [[TMP4]]
1389 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> [[A0:%.*]], <8 x i32> [[A1]])
1390 ; CHECK-NEXT: store <8 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8
1391 ; CHECK-NEXT: ret <8 x i32> [[RES]]
1393 %res = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
1397 define <8 x i32> @test_x86_avx2_psrlv_d_256_const() #0 {
1398 ; CHECK-LABEL: @test_x86_avx2_psrlv_d_256_const(
1399 ; CHECK-NEXT: call void @llvm.donothing()
1400 ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 0, i32 33, i32 -1, i32 2, i32 0, i32 34, i32 -2>)
1401 ; CHECK-NEXT: [[TMP2:%.*]] = or <8 x i32> [[TMP1]], zeroinitializer
1402 ; CHECK-NEXT: [[RES0:%.*]] = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> <i32 2, i32 9, i32 0, i32 -1, i32 3, i32 7, i32 -1, i32 0>, <8 x i32> <i32 1, i32 0, i32 33, i32 -1, i32 2, i32 0, i32 34, i32 -2>)
1403 ; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 -1>)
1404 ; CHECK-NEXT: [[TMP4:%.*]] = or <8 x i32> [[TMP3]], zeroinitializer
1405 ; CHECK-NEXT: [[RES1:%.*]] = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 -1>, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 -1>)
1406 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP2]], [[TMP4]]
1407 ; CHECK-NEXT: [[RES2:%.*]] = add <8 x i32> [[RES0]], [[RES1]]
1408 ; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
1409 ; CHECK-NEXT: ret <8 x i32> [[RES2]]
1411 %res0 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> <i32 2, i32 9, i32 0, i32 -1, i32 3, i32 7, i32 -1, i32 0>, <8 x i32> <i32 1, i32 0, i32 33, i32 -1,i32 2, i32 0, i32 34, i32 -2>)
1412 %res1 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 -1>, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 -1>)
1413 %res2 = add <8 x i32> %res0, %res1
1416 declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) nounwind readnone
1419 define <2 x i64> @test_x86_avx2_psrlv_q(<2 x i64> %a0, <2 x i64> %a1) #0 {
1420 ; CHECK-LABEL: @test_x86_avx2_psrlv_q(
1421 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
1422 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1423 ; CHECK-NEXT: call void @llvm.donothing()
1424 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer
1425 ; CHECK-NEXT: [[TMP4:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64>
1426 ; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> [[TMP1]], <2 x i64> [[A1:%.*]])
1427 ; CHECK-NEXT: [[TMP6:%.*]] = or <2 x i64> [[TMP5]], [[TMP4]]
1428 ; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> [[A0:%.*]], <2 x i64> [[A1]])
1429 ; CHECK-NEXT: store <2 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8
1430 ; CHECK-NEXT: ret <2 x i64> [[RES]]
1432 %res = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
1436 define <2 x i64> @test_x86_avx2_psrlv_q_const() #0 {
1437 ; CHECK-LABEL: @test_x86_avx2_psrlv_q_const(
1438 ; CHECK-NEXT: call void @llvm.donothing()
1439 ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> zeroinitializer, <2 x i64> <i64 1, i64 -1>)
1440 ; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i64> [[TMP1]], zeroinitializer
1441 ; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> splat (i64 4), <2 x i64> <i64 1, i64 -1>)
1442 ; CHECK-NEXT: store <2 x i64> [[TMP2]], ptr @__msan_retval_tls, align 8
1443 ; CHECK-NEXT: ret <2 x i64> [[RES]]
1445 %res = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> <i64 4, i64 4>, <2 x i64> <i64 1, i64 -1>)
1448 declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) nounwind readnone
1451 define <4 x i64> @test_x86_avx2_psrlv_q_256(<4 x i64> %a0, <4 x i64> %a1) #0 {
1452 ; CHECK-LABEL: @test_x86_avx2_psrlv_q_256(
1453 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
1454 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
1455 ; CHECK-NEXT: call void @llvm.donothing()
1456 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i64> [[TMP2]], zeroinitializer
1457 ; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i64>
1458 ; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> [[TMP1]], <4 x i64> [[A1:%.*]])
1459 ; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i64> [[TMP5]], [[TMP4]]
1460 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> [[A0:%.*]], <4 x i64> [[A1]])
1461 ; CHECK-NEXT: store <4 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8
1462 ; CHECK-NEXT: ret <4 x i64> [[RES]]
1464 %res = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1]
1469 define <4 x i64> @test_x86_avx2_psrlv_q_256_const() #0 {
1470 ; CHECK-LABEL: @test_x86_avx2_psrlv_q_256_const(
1471 ; CHECK-NEXT: call void @llvm.donothing()
1472 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> zeroinitializer, <4 x i64> <i64 1, i64 1, i64 1, i64 -1>)
1473 ; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i64> [[TMP1]], zeroinitializer
1474 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> splat (i64 4), <4 x i64> <i64 1, i64 1, i64 1, i64 -1>)
1475 ; CHECK-NEXT: store <4 x i64> [[TMP2]], ptr @__msan_retval_tls, align 8
1476 ; CHECK-NEXT: ret <4 x i64> [[RES]]
1478 %res = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> <i64 4, i64 4, i64 4, i64 4>, <4 x i64> <i64 1, i64 1, i64 1, i64 -1>)
1481 declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) nounwind readnone
1484 define <4 x i32> @test_x86_avx2_psrav_d(<4 x i32> %a0, <4 x i32> %a1) #0 {
1485 ; CHECK-LABEL: @test_x86_avx2_psrav_d(
1486 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
1487 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1488 ; CHECK-NEXT: call void @llvm.donothing()
1489 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
1490 ; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
1491 ; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
1492 ; CHECK-NEXT: [[TMP6:%.*]] = or <4 x i32> [[TMP5]], [[TMP4]]
1493 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> [[A0:%.*]], <4 x i32> [[A1]])
1494 ; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8
1495 ; CHECK-NEXT: ret <4 x i32> [[RES]]
1497 %res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1501 define <4 x i32> @test_x86_avx2_psrav_d_const() #0 {
1502 ; CHECK-LABEL: @test_x86_avx2_psrav_d_const(
1503 ; CHECK-NEXT: call void @llvm.donothing()
1504 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 18, i32 35, i32 52>)
1505 ; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP1]], zeroinitializer
1506 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> <i32 2, i32 9, i32 -12, i32 23>, <4 x i32> <i32 1, i32 18, i32 35, i32 52>)
1507 ; CHECK-NEXT: store <4 x i32> [[TMP2]], ptr @__msan_retval_tls, align 8
1508 ; CHECK-NEXT: ret <4 x i32> [[RES]]
1510 %res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> <i32 2, i32 9, i32 -12, i32 23>, <4 x i32> <i32 1, i32 18, i32 35, i32 52>)
1513 declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) nounwind readnone
1515 define <8 x i32> @test_x86_avx2_psrav_d_256(<8 x i32> %a0, <8 x i32> %a1) #0 {
1516 ; CHECK-LABEL: @test_x86_avx2_psrav_d_256(
1517 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
1518 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
1519 ; CHECK-NEXT: call void @llvm.donothing()
1520 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i32> [[TMP2]], zeroinitializer
1521 ; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i32>
1522 ; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> [[TMP1]], <8 x i32> [[A1:%.*]])
1523 ; CHECK-NEXT: [[TMP6:%.*]] = or <8 x i32> [[TMP5]], [[TMP4]]
1524 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> [[A0:%.*]], <8 x i32> [[A1]])
1525 ; CHECK-NEXT: store <8 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8
1526 ; CHECK-NEXT: ret <8 x i32> [[RES]]
1528 %res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
1532 define <8 x i32> @test_x86_avx2_psrav_d_256_const() #0 {
1533 ; CHECK-LABEL: @test_x86_avx2_psrav_d_256_const(
1534 ; CHECK-NEXT: call void @llvm.donothing()
1535 ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 18, i32 35, i32 52, i32 69, i32 15, i32 32, i32 49>)
1536 ; CHECK-NEXT: [[TMP2:%.*]] = or <8 x i32> [[TMP1]], zeroinitializer
1537 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> <i32 2, i32 9, i32 -12, i32 23, i32 -26, i32 37, i32 -40, i32 51>, <8 x i32> <i32 1, i32 18, i32 35, i32 52, i32 69, i32 15, i32 32, i32 49>)
1538 ; CHECK-NEXT: store <8 x i32> [[TMP2]], ptr @__msan_retval_tls, align 8
1539 ; CHECK-NEXT: ret <8 x i32> [[RES]]
1541 %res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> <i32 2, i32 9, i32 -12, i32 23, i32 -26, i32 37, i32 -40, i32 51>, <8 x i32> <i32 1, i32 18, i32 35, i32 52, i32 69, i32 15, i32 32, i32 49>)
1544 declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) nounwind readnone
1546 define <2 x double> @test_x86_avx2_gather_d_pd(<2 x double> %a0, ptr %a1, <4 x i32> %idx, <2 x double> %mask) #0 {
1547 ; CHECK-LABEL: @test_x86_avx2_gather_d_pd(
1548 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
1549 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1550 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
1551 ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
1552 ; CHECK-NEXT: call void @llvm.donothing()
1553 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1554 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
1555 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
1556 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1557 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
1558 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0
1559 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
1560 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP4]] to i128
1561 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0
1562 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
1563 ; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
1565 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1566 ; CHECK-NEXT: unreachable
1568 ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> [[A0:%.*]], ptr [[A1:%.*]], <4 x i32> [[IDX:%.*]], <2 x double> [[MASK:%.*]], i8 2)
1569 ; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
1570 ; CHECK-NEXT: ret <2 x double> [[RES]]
1572 %res = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> %a0,
1573 ptr %a1, <4 x i32> %idx, <2 x double> %mask, i8 2) ;
1574 ret <2 x double> %res
1576 declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, ptr,
1577 <4 x i32>, <2 x double>, i8) nounwind readonly
1579 define <4 x double> @test_x86_avx2_gather_d_pd_256(<4 x double> %a0, ptr %a1, <4 x i32> %idx, <4 x double> %mask) #0 {
1580 ; CHECK-LABEL: @test_x86_avx2_gather_d_pd_256(
1581 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
1582 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
1583 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
1584 ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8
1585 ; CHECK-NEXT: call void @llvm.donothing()
1586 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i64> [[TMP1]] to i256
1587 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP5]], 0
1588 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
1589 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1590 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
1591 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0
1592 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
1593 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i64> [[TMP4]] to i256
1594 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP7]], 0
1595 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
1596 ; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
1598 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1599 ; CHECK-NEXT: unreachable
1601 ; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> [[A0:%.*]], ptr [[A1:%.*]], <4 x i32> [[IDX:%.*]], <4 x double> [[MASK:%.*]], i8 2)
1602 ; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
1603 ; CHECK-NEXT: ret <4 x double> [[RES]]
1605 %res = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %a0,
1606 ptr %a1, <4 x i32> %idx, <4 x double> %mask, i8 2) ;
1607 ret <4 x double> %res
1609 declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, ptr,
1610 <4 x i32>, <4 x double>, i8) nounwind readonly
1612 define <2 x double> @test_x86_avx2_gather_q_pd(<2 x double> %a0, ptr %a1, <2 x i64> %idx, <2 x double> %mask) #0 {
1613 ; CHECK-LABEL: @test_x86_avx2_gather_q_pd(
1614 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
1615 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1616 ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
1617 ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
1618 ; CHECK-NEXT: call void @llvm.donothing()
1619 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1620 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
1621 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
1622 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1623 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
1624 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0
1625 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
1626 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP4]] to i128
1627 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0
1628 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
1629 ; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
1631 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1632 ; CHECK-NEXT: unreachable
1634 ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> [[A0:%.*]], ptr [[A1:%.*]], <2 x i64> [[IDX:%.*]], <2 x double> [[MASK:%.*]], i8 2)
1635 ; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
1636 ; CHECK-NEXT: ret <2 x double> [[RES]]
1638 %res = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> %a0,
1639 ptr %a1, <2 x i64> %idx, <2 x double> %mask, i8 2) ;
1640 ret <2 x double> %res
1642 declare <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double>, ptr,
1643 <2 x i64>, <2 x double>, i8) nounwind readonly
1645 define <4 x double> @test_x86_avx2_gather_q_pd_256(<4 x double> %a0, ptr %a1, <4 x i64> %idx, <4 x double> %mask) #0 {
1646 ; CHECK-LABEL: @test_x86_avx2_gather_q_pd_256(
1647 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
1648 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
1649 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
1650 ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
1651 ; CHECK-NEXT: call void @llvm.donothing()
1652 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i64> [[TMP1]] to i256
1653 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP5]], 0
1654 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
1655 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1656 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP3]] to i256
1657 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP6]], 0
1658 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
1659 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i64> [[TMP4]] to i256
1660 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP7]], 0
1661 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
1662 ; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
1664 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1665 ; CHECK-NEXT: unreachable
1667 ; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> [[A0:%.*]], ptr [[A1:%.*]], <4 x i64> [[IDX:%.*]], <4 x double> [[MASK:%.*]], i8 2)
1668 ; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
1669 ; CHECK-NEXT: ret <4 x double> [[RES]]
1671 %res = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> %a0,
1672 ptr %a1, <4 x i64> %idx, <4 x double> %mask, i8 2) ;
1673 ret <4 x double> %res
1675 declare <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double>, ptr,
1676 <4 x i64>, <4 x double>, i8) nounwind readonly
1678 define <4 x float> @test_x86_avx2_gather_d_ps(<4 x float> %a0, ptr %a1, <4 x i32> %idx, <4 x float> %mask) #0 {
1679 ; CHECK-LABEL: @test_x86_avx2_gather_d_ps(
1680 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
1681 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1682 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
1683 ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
1684 ; CHECK-NEXT: call void @llvm.donothing()
1685 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1686 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
1687 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
1688 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1689 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
1690 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0
1691 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
1692 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to i128
1693 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0
1694 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
1695 ; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
1697 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1698 ; CHECK-NEXT: unreachable
1700 ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> [[A0:%.*]], ptr [[A1:%.*]], <4 x i32> [[IDX:%.*]], <4 x float> [[MASK:%.*]], i8 2)
1701 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
1702 ; CHECK-NEXT: ret <4 x float> [[RES]]
1704 %res = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %a0,
1705 ptr %a1, <4 x i32> %idx, <4 x float> %mask, i8 2) ;
1706 ret <4 x float> %res
1708 declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, ptr,
1709 <4 x i32>, <4 x float>, i8) nounwind readonly
1711 define <8 x float> @test_x86_avx2_gather_d_ps_256(<8 x float> %a0, ptr %a1, <8 x i32> %idx, <8 x float> %mask) #0 {
1712 ; CHECK-LABEL: @test_x86_avx2_gather_d_ps_256(
1713 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
1714 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
1715 ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
1716 ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
1717 ; CHECK-NEXT: call void @llvm.donothing()
1718 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP1]] to i256
1719 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP5]], 0
1720 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
1721 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1722 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i32> [[TMP3]] to i256
1723 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP6]], 0
1724 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
1725 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i32> [[TMP4]] to i256
1726 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP7]], 0
1727 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
1728 ; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
1730 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1731 ; CHECK-NEXT: unreachable
1733 ; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> [[A0:%.*]], ptr [[A1:%.*]], <8 x i32> [[IDX:%.*]], <8 x float> [[MASK:%.*]], i8 2)
1734 ; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
1735 ; CHECK-NEXT: ret <8 x float> [[RES]]
1737 %res = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0,
1738 ptr %a1, <8 x i32> %idx, <8 x float> %mask, i8 2) ;
1739 ret <8 x float> %res
1741 declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, ptr,
1742 <8 x i32>, <8 x float>, i8) nounwind readonly
1744 define <4 x float> @test_x86_avx2_gather_q_ps(<4 x float> %a0, ptr %a1, <2 x i64> %idx, <4 x float> %mask) #0 {
1745 ; CHECK-LABEL: @test_x86_avx2_gather_q_ps(
1746 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
1747 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1748 ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
1749 ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
1750 ; CHECK-NEXT: call void @llvm.donothing()
1751 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1752 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
1753 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
1754 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1755 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
1756 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0
1757 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
1758 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to i128
1759 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0
1760 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
1761 ; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
1763 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1764 ; CHECK-NEXT: unreachable
1766 ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> [[A0:%.*]], ptr [[A1:%.*]], <2 x i64> [[IDX:%.*]], <4 x float> [[MASK:%.*]], i8 2)
1767 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
1768 ; CHECK-NEXT: ret <4 x float> [[RES]]
1770 %res = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> %a0,
1771 ptr %a1, <2 x i64> %idx, <4 x float> %mask, i8 2) ;
1772 ret <4 x float> %res
1774 declare <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float>, ptr,
1775 <2 x i64>, <4 x float>, i8) nounwind readonly
1777 define <4 x float> @test_x86_avx2_gather_q_ps_256(<4 x float> %a0, ptr %a1, <4 x i64> %idx, <4 x float> %mask) #0 {
1778 ; CHECK-LABEL: @test_x86_avx2_gather_q_ps_256(
1779 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
1780 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1781 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
1782 ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8
1783 ; CHECK-NEXT: call void @llvm.donothing()
1784 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1785 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
1786 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
1787 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1788 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP3]] to i256
1789 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP6]], 0
1790 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
1791 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to i128
1792 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0
1793 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
1794 ; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
1796 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1797 ; CHECK-NEXT: unreachable
1799 ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> [[A0:%.*]], ptr [[A1:%.*]], <4 x i64> [[IDX:%.*]], <4 x float> [[MASK:%.*]], i8 2)
1800 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
1801 ; CHECK-NEXT: ret <4 x float> [[RES]]
1803 %res = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %a0,
1804 ptr %a1, <4 x i64> %idx, <4 x float> %mask, i8 2) ;
1805 ret <4 x float> %res
1807 declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float>, ptr,
1808 <4 x i64>, <4 x float>, i8) nounwind readonly
1810 define <2 x i64> @test_x86_avx2_gather_d_q(<2 x i64> %a0, ptr %a1, <4 x i32> %idx, <2 x i64> %mask) #0 {
1811 ; CHECK-LABEL: @test_x86_avx2_gather_d_q(
1812 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
1813 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1814 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
1815 ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
1816 ; CHECK-NEXT: call void @llvm.donothing()
1817 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1818 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
1819 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
1820 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1821 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
1822 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0
1823 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
1824 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP4]] to i128
1825 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0
1826 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
1827 ; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
1829 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1830 ; CHECK-NEXT: unreachable
1832 ; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> [[A0:%.*]], ptr [[A1:%.*]], <4 x i32> [[IDX:%.*]], <2 x i64> [[MASK:%.*]], i8 2)
1833 ; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
1834 ; CHECK-NEXT: ret <2 x i64> [[RES]]
1836 %res = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %a0,
1837 ptr %a1, <4 x i32> %idx, <2 x i64> %mask, i8 2) ;
1840 declare <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64>, ptr,
1841 <4 x i32>, <2 x i64>, i8) nounwind readonly
1843 define <4 x i64> @test_x86_avx2_gather_d_q_256(<4 x i64> %a0, ptr %a1, <4 x i32> %idx, <4 x i64> %mask) #0 {
1844 ; CHECK-LABEL: @test_x86_avx2_gather_d_q_256(
1845 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
1846 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
1847 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
1848 ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8
1849 ; CHECK-NEXT: call void @llvm.donothing()
1850 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i64> [[TMP1]] to i256
1851 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP5]], 0
1852 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
1853 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1854 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
1855 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0
1856 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
1857 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i64> [[TMP4]] to i256
1858 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP7]], 0
1859 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
1860 ; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
1862 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1863 ; CHECK-NEXT: unreachable
1865 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> [[A0:%.*]], ptr [[A1:%.*]], <4 x i32> [[IDX:%.*]], <4 x i64> [[MASK:%.*]], i8 2)
1866 ; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
1867 ; CHECK-NEXT: ret <4 x i64> [[RES]]
1869 %res = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %a0,
1870 ptr %a1, <4 x i32> %idx, <4 x i64> %mask, i8 2) ;
1873 declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64>, ptr,
1874 <4 x i32>, <4 x i64>, i8) nounwind readonly
1876 define <2 x i64> @test_x86_avx2_gather_q_q(<2 x i64> %a0, ptr %a1, <2 x i64> %idx, <2 x i64> %mask) #0 {
1877 ; CHECK-LABEL: @test_x86_avx2_gather_q_q(
1878 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
1879 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1880 ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
1881 ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
1882 ; CHECK-NEXT: call void @llvm.donothing()
1883 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
1884 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
1885 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
1886 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1887 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
1888 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0
1889 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
1890 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP4]] to i128
1891 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0
1892 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
1893 ; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
1895 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1896 ; CHECK-NEXT: unreachable
1898 ; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> [[A0:%.*]], ptr [[A1:%.*]], <2 x i64> [[IDX:%.*]], <2 x i64> [[MASK:%.*]], i8 2)
1899 ; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
1900 ; CHECK-NEXT: ret <2 x i64> [[RES]]
1902 %res = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0,
1903 ptr %a1, <2 x i64> %idx, <2 x i64> %mask, i8 2) ;
1906 declare <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64>, ptr,
1907 <2 x i64>, <2 x i64>, i8) nounwind readonly
1909 define <4 x i64> @test_x86_avx2_gather_q_q_256(<4 x i64> %a0, ptr %a1, <4 x i64> %idx, <4 x i64> %mask) #0 {
1910 ; CHECK-LABEL: @test_x86_avx2_gather_q_q_256(
1911 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
1912 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
1913 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
1914 ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
1915 ; CHECK-NEXT: call void @llvm.donothing()
1916 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i64> [[TMP1]] to i256
1917 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP5]], 0
1918 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
1919 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1920 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP3]] to i256
1921 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP6]], 0
1922 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
1923 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i64> [[TMP4]] to i256
1924 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP7]], 0
1925 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
1926 ; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
1928 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1929 ; CHECK-NEXT: unreachable
1931 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> [[A0:%.*]], ptr [[A1:%.*]], <4 x i64> [[IDX:%.*]], <4 x i64> [[MASK:%.*]], i8 2)
1932 ; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
1933 ; CHECK-NEXT: ret <4 x i64> [[RES]]
1935 %res = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %a0,
1936 ptr %a1, <4 x i64> %idx, <4 x i64> %mask, i8 2) ;
1939 declare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64>, ptr,
1940 <4 x i64>, <4 x i64>, i8) nounwind readonly
1942 define <4 x i32> @test_x86_avx2_gather_d_d(<4 x i32> %a0, ptr %a1, <4 x i32> %idx, <4 x i32> %mask) #0 {
1943 ; CHECK-LABEL: @test_x86_avx2_gather_d_d(
1944 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
1945 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1946 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
1947 ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
1948 ; CHECK-NEXT: call void @llvm.donothing()
1949 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1950 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
1951 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
1952 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1953 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
1954 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0
1955 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
1956 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to i128
1957 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0
1958 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
1959 ; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
1961 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1962 ; CHECK-NEXT: unreachable
1964 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> [[A0:%.*]], ptr [[A1:%.*]], <4 x i32> [[IDX:%.*]], <4 x i32> [[MASK:%.*]], i8 2)
1965 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
1966 ; CHECK-NEXT: ret <4 x i32> [[RES]]
1968 %res = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> %a0,
1969 ptr %a1, <4 x i32> %idx, <4 x i32> %mask, i8 2) ;
1972 declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, ptr,
1973 <4 x i32>, <4 x i32>, i8) nounwind readonly
1975 define <8 x i32> @test_x86_avx2_gather_d_d_256(<8 x i32> %a0, ptr %a1, <8 x i32> %idx, <8 x i32> %mask) #0 {
1976 ; CHECK-LABEL: @test_x86_avx2_gather_d_d_256(
1977 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
1978 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
1979 ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
1980 ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
1981 ; CHECK-NEXT: call void @llvm.donothing()
1982 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP1]] to i256
1983 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP5]], 0
1984 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
1985 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1986 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i32> [[TMP3]] to i256
1987 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP6]], 0
1988 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
1989 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i32> [[TMP4]] to i256
1990 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP7]], 0
1991 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
1992 ; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
1994 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
1995 ; CHECK-NEXT: unreachable
1997 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> [[A0:%.*]], ptr [[A1:%.*]], <8 x i32> [[IDX:%.*]], <8 x i32> [[MASK:%.*]], i8 2)
1998 ; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
1999 ; CHECK-NEXT: ret <8 x i32> [[RES]]
2001 %res = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> %a0,
2002 ptr %a1, <8 x i32> %idx, <8 x i32> %mask, i8 2) ;
2005 declare <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32>, ptr,
2006 <8 x i32>, <8 x i32>, i8) nounwind readonly
2008 define <4 x i32> @test_x86_avx2_gather_q_d(<4 x i32> %a0, ptr %a1, <2 x i64> %idx, <4 x i32> %mask) #0 {
2009 ; CHECK-LABEL: @test_x86_avx2_gather_q_d(
2010 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
2011 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
2012 ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
2013 ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
2014 ; CHECK-NEXT: call void @llvm.donothing()
2015 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
2016 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
2017 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
2018 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
2019 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
2020 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0
2021 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
2022 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to i128
2023 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0
2024 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
2025 ; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
2027 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
2028 ; CHECK-NEXT: unreachable
2030 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> [[A0:%.*]], ptr [[A1:%.*]], <2 x i64> [[IDX:%.*]], <4 x i32> [[MASK:%.*]], i8 2)
2031 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
2032 ; CHECK-NEXT: ret <4 x i32> [[RES]]
2034 %res = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> %a0,
2035 ptr %a1, <2 x i64> %idx, <4 x i32> %mask, i8 2) ;
2038 declare <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32>, ptr,
2039 <2 x i64>, <4 x i32>, i8) nounwind readonly
2041 define <4 x i32> @test_x86_avx2_gather_q_d_256(<4 x i32> %a0, ptr %a1, <4 x i64> %idx, <4 x i32> %mask) #0 {
2042 ; CHECK-LABEL: @test_x86_avx2_gather_q_d_256(
2043 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
2044 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
2045 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
2046 ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8
2047 ; CHECK-NEXT: call void @llvm.donothing()
2048 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
2049 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
2050 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
2051 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
2052 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i64> [[TMP3]] to i256
2053 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP6]], 0
2054 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
2055 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to i128
2056 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0
2057 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
2058 ; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
2060 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
2061 ; CHECK-NEXT: unreachable
2063 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> [[A0:%.*]], ptr [[A1:%.*]], <4 x i64> [[IDX:%.*]], <4 x i32> [[MASK:%.*]], i8 2)
2064 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
2065 ; CHECK-NEXT: ret <4 x i32> [[RES]]
2067 %res = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> %a0,
2068 ptr %a1, <4 x i64> %idx, <4 x i32> %mask, i8 2) ;
2071 declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, ptr,
2072 <4 x i64>, <4 x i32>, i8) nounwind readonly
2074 define <8 x float> @test_gather_mask(<8 x float> %a0, ptr %a, <8 x i32> %idx, <8 x float> %mask, ptr nocapture %out) #0 {
2075 ; CHECK-LABEL: @test_gather_mask(
2076 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
2077 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
2078 ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
2079 ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
2080 ; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 104) to ptr), align 8
2081 ; CHECK-NEXT: call void @llvm.donothing()
2082 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i32> [[TMP1]] to i256
2083 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP6]], 0
2084 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP2]], 0
2085 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
2086 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i32> [[TMP3]] to i256
2087 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP7]], 0
2088 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
2089 ; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i32> [[TMP4]] to i256
2090 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP8]], 0
2091 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
2092 ; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
2094 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
2095 ; CHECK-NEXT: unreachable
2097 ; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> [[A0:%.*]], ptr [[A:%.*]], <8 x i32> [[IDX:%.*]], <8 x float> [[MASK:%.*]], i8 4)
2098 ; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i64 [[TMP5]], 0
2099 ; CHECK-NEXT: br i1 [[_MSCMP6]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]]
2101 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
2102 ; CHECK-NEXT: unreachable
2104 ; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[OUT:%.*]] to i64
2105 ; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], 87960930222080
2106 ; CHECK-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr
2107 ; CHECK-NEXT: store <8 x i32> [[TMP4]], ptr [[TMP15]], align 4
2108 ; CHECK-NEXT: store <8 x float> [[MASK]], ptr [[OUT]], align 4
2109 ; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
2110 ; CHECK-NEXT: ret <8 x float> [[RES]]
2112 %res = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0,
2113 ptr %a, <8 x i32> %idx, <8 x float> %mask, i8 4) ;
2115 store <8 x float> %mask, ptr %out, align 4
2117 ret <8 x float> %res
2120 define <2 x i64> @test_mask_demanded_bits(<2 x i64> %a0, ptr %a1, <2 x i64> %idx, <2 x i1> %mask) #0 {
2121 ; CHECK-LABEL: @test_mask_demanded_bits(
2122 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i1>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
2123 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
2124 ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
2125 ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
2126 ; CHECK-NEXT: call void @llvm.donothing()
2127 ; CHECK-NEXT: [[_MSPROP:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
2128 ; CHECK-NEXT: [[MASK1:%.*]] = sext <2 x i1> [[MASK:%.*]] to <2 x i64>
2129 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
2130 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
2131 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP3]], 0
2132 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
2133 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP4]] to i128
2134 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP6]], 0
2135 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
2136 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSPROP]] to i128
2137 ; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0
2138 ; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
2139 ; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
2141 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
2142 ; CHECK-NEXT: unreachable
2144 ; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> [[A0:%.*]], ptr [[A1:%.*]], <2 x i64> [[IDX:%.*]], <2 x i64> [[MASK1]], i8 2)
2145 ; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
2146 ; CHECK-NEXT: ret <2 x i64> [[RES]]
2148 %mask1 = sext <2 x i1> %mask to <2 x i64>
2149 %res = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0,
2150 ptr %a1, <2 x i64> %idx, <2 x i64> %mask1, i8 2) ;
2154 attributes #0 = { sanitize_memory }