1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes
2 ; RUN: opt %s -S -passes=msan 2>&1 | FileCheck %s
4 target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
5 target triple = "x86_64-unknown-linux-gnu"
7 define <4 x double> @test_x86_avx_addsub_pd_256(<4 x double> %a0, <4 x double> %a1) #0 {
8 ; CHECK-LABEL: @test_x86_avx_addsub_pd_256(
9 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
10 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
11 ; CHECK-NEXT: call void @llvm.donothing()
12 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i64> [[TMP1]], [[TMP2]]
13 ; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> [[A0:%.*]], <4 x double> [[A1:%.*]])
14 ; CHECK-NEXT: store <4 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
15 ; CHECK-NEXT: ret <4 x double> [[RES]]
17 %res = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
20 declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
23 define <8 x float> @test_x86_avx_addsub_ps_256(<8 x float> %a0, <8 x float> %a1) #0 {
24 ; CHECK-LABEL: @test_x86_avx_addsub_ps_256(
25 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
26 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
27 ; CHECK-NEXT: call void @llvm.donothing()
28 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]]
29 ; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]])
30 ; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
31 ; CHECK-NEXT: ret <8 x float> [[RES]]
33 %res = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
36 declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
39 define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
40 ; CHECK-LABEL: @test_x86_avx_blendv_pd_256(
41 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
42 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
43 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
44 ; CHECK-NEXT: call void @llvm.donothing()
45 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x double> [[A2:%.*]] to <4 x i64>
46 ; CHECK-NEXT: [[TMP5:%.*]] = ashr <4 x i64> [[TMP4]], splat (i64 63)
47 ; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i64> [[TMP5]] to <4 x i1>
48 ; CHECK-NEXT: [[TMP7:%.*]] = ashr <4 x i64> [[TMP1]], splat (i64 63)
49 ; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i64> [[TMP7]] to <4 x i1>
50 ; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP2]], <4 x i64> [[TMP3]]
51 ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x double> [[A1:%.*]] to <4 x i64>
52 ; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x double> [[A0:%.*]] to <4 x i64>
53 ; CHECK-NEXT: [[TMP12:%.*]] = xor <4 x i64> [[TMP10]], [[TMP11]]
54 ; CHECK-NEXT: [[TMP13:%.*]] = or <4 x i64> [[TMP12]], [[TMP2]]
55 ; CHECK-NEXT: [[TMP14:%.*]] = or <4 x i64> [[TMP13]], [[TMP3]]
56 ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <4 x i1> [[TMP8]], <4 x i64> [[TMP14]], <4 x i64> [[TMP9]]
57 ; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> [[A0]], <4 x double> [[A1]], <4 x double> [[A2]])
58 ; CHECK-NEXT: store <4 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
59 ; CHECK-NEXT: ret <4 x double> [[RES]]
61 %res = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ; <<4 x double>> [#uses=1]
64 declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
67 define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
68 ; CHECK-LABEL: @test_x86_avx_blendv_ps_256(
69 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
70 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
71 ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
72 ; CHECK-NEXT: call void @llvm.donothing()
73 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x float> [[A2:%.*]] to <8 x i32>
74 ; CHECK-NEXT: [[TMP5:%.*]] = ashr <8 x i32> [[TMP4]], splat (i32 31)
75 ; CHECK-NEXT: [[TMP6:%.*]] = trunc <8 x i32> [[TMP5]] to <8 x i1>
76 ; CHECK-NEXT: [[TMP7:%.*]] = ashr <8 x i32> [[TMP1]], splat (i32 31)
77 ; CHECK-NEXT: [[TMP8:%.*]] = trunc <8 x i32> [[TMP7]] to <8 x i1>
78 ; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[TMP6]], <8 x i32> [[TMP2]], <8 x i32> [[TMP3]]
79 ; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x float> [[A1:%.*]] to <8 x i32>
80 ; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x float> [[A0:%.*]] to <8 x i32>
81 ; CHECK-NEXT: [[TMP12:%.*]] = xor <8 x i32> [[TMP10]], [[TMP11]]
82 ; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i32> [[TMP12]], [[TMP2]]
83 ; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i32> [[TMP13]], [[TMP3]]
84 ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP8]], <8 x i32> [[TMP14]], <8 x i32> [[TMP9]]
85 ; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> [[A0]], <8 x float> [[A1]], <8 x float> [[A2]])
86 ; CHECK-NEXT: store <8 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
87 ; CHECK-NEXT: ret <8 x float> [[RES]]
89 %res = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ; <<8 x float>> [#uses=1]
92 declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
95 define <4 x double> @test_x86_avx_cmp_pd_256(<4 x double> %a0, <4 x double> %a1) #0 {
96 ; CHECK-LABEL: @test_x86_avx_cmp_pd_256(
97 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
98 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
99 ; CHECK-NEXT: call void @llvm.donothing()
100 ; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i64> [[TMP1]], [[TMP2]]
101 ; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i64> [[TMP3]], zeroinitializer
102 ; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i1> [[TMP4]] to <4 x i64>
103 ; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> [[A0:%.*]], <4 x double> [[A1:%.*]], i8 7)
104 ; CHECK-NEXT: store <4 x i64> [[TMP5]], ptr @__msan_retval_tls, align 8
105 ; CHECK-NEXT: ret <4 x double> [[RES]]
107 %res = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
108 ret <4 x double> %res
110 declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
113 define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) #0 {
114 ; CHECK-LABEL: @test_x86_avx_cmp_ps_256(
115 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
116 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
117 ; CHECK-NEXT: call void @llvm.donothing()
118 ; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]]
119 ; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <8 x i32> [[TMP3]], zeroinitializer
120 ; CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i32>
121 ; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]], i8 7)
122 ; CHECK-NEXT: store <8 x i32> [[TMP5]], ptr @__msan_retval_tls, align 8
123 ; CHECK-NEXT: ret <8 x float> [[RES]]
125 %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
129 define <8 x float> @test_x86_avx_cmp_ps_256_pseudo_op(<8 x float> %a0, <8 x float> %a1) #0 {
130 ; CHECK-LABEL: @test_x86_avx_cmp_ps_256_pseudo_op(
131 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
132 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
133 ; CHECK-NEXT: call void @llvm.donothing()
134 ; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]]
135 ; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <8 x i32> [[TMP3]], zeroinitializer
136 ; CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i32>
137 ; CHECK-NEXT: [[A2:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]], i8 0)
138 ; CHECK-NEXT: [[TMP6:%.*]] = or <8 x i32> [[TMP1]], [[TMP5]]
139 ; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <8 x i32> [[TMP6]], zeroinitializer
140 ; CHECK-NEXT: [[TMP8:%.*]] = sext <8 x i1> [[TMP7]] to <8 x i32>
141 ; CHECK-NEXT: [[A3:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A2]], i8 1)
142 ; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i32> [[TMP1]], [[TMP8]]
143 ; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <8 x i32> [[TMP9]], zeroinitializer
144 ; CHECK-NEXT: [[TMP11:%.*]] = sext <8 x i1> [[TMP10]] to <8 x i32>
145 ; CHECK-NEXT: [[A4:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A3]], i8 2)
146 ; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i32> [[TMP1]], [[TMP11]]
147 ; CHECK-NEXT: [[TMP13:%.*]] = icmp ne <8 x i32> [[TMP12]], zeroinitializer
148 ; CHECK-NEXT: [[TMP14:%.*]] = sext <8 x i1> [[TMP13]] to <8 x i32>
149 ; CHECK-NEXT: [[A5:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A4]], i8 3)
150 ; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i32> [[TMP1]], [[TMP14]]
151 ; CHECK-NEXT: [[TMP16:%.*]] = icmp ne <8 x i32> [[TMP15]], zeroinitializer
152 ; CHECK-NEXT: [[TMP17:%.*]] = sext <8 x i1> [[TMP16]] to <8 x i32>
153 ; CHECK-NEXT: [[A6:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A5]], i8 4)
154 ; CHECK-NEXT: [[TMP18:%.*]] = or <8 x i32> [[TMP1]], [[TMP17]]
155 ; CHECK-NEXT: [[TMP19:%.*]] = icmp ne <8 x i32> [[TMP18]], zeroinitializer
156 ; CHECK-NEXT: [[TMP20:%.*]] = sext <8 x i1> [[TMP19]] to <8 x i32>
157 ; CHECK-NEXT: [[A7:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A6]], i8 5)
158 ; CHECK-NEXT: [[TMP21:%.*]] = or <8 x i32> [[TMP1]], [[TMP20]]
159 ; CHECK-NEXT: [[TMP22:%.*]] = icmp ne <8 x i32> [[TMP21]], zeroinitializer
160 ; CHECK-NEXT: [[TMP23:%.*]] = sext <8 x i1> [[TMP22]] to <8 x i32>
161 ; CHECK-NEXT: [[A8:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A7]], i8 6)
162 ; CHECK-NEXT: [[TMP24:%.*]] = or <8 x i32> [[TMP1]], [[TMP23]]
163 ; CHECK-NEXT: [[TMP25:%.*]] = icmp ne <8 x i32> [[TMP24]], zeroinitializer
164 ; CHECK-NEXT: [[TMP26:%.*]] = sext <8 x i1> [[TMP25]] to <8 x i32>
165 ; CHECK-NEXT: [[A9:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A8]], i8 7)
166 ; CHECK-NEXT: [[TMP27:%.*]] = or <8 x i32> [[TMP1]], [[TMP26]]
167 ; CHECK-NEXT: [[TMP28:%.*]] = icmp ne <8 x i32> [[TMP27]], zeroinitializer
168 ; CHECK-NEXT: [[TMP29:%.*]] = sext <8 x i1> [[TMP28]] to <8 x i32>
169 ; CHECK-NEXT: [[A10:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A9]], i8 8)
170 ; CHECK-NEXT: [[TMP30:%.*]] = or <8 x i32> [[TMP1]], [[TMP29]]
171 ; CHECK-NEXT: [[TMP31:%.*]] = icmp ne <8 x i32> [[TMP30]], zeroinitializer
172 ; CHECK-NEXT: [[TMP32:%.*]] = sext <8 x i1> [[TMP31]] to <8 x i32>
173 ; CHECK-NEXT: [[A11:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A10]], i8 9)
174 ; CHECK-NEXT: [[TMP33:%.*]] = or <8 x i32> [[TMP1]], [[TMP32]]
175 ; CHECK-NEXT: [[TMP34:%.*]] = icmp ne <8 x i32> [[TMP33]], zeroinitializer
176 ; CHECK-NEXT: [[TMP35:%.*]] = sext <8 x i1> [[TMP34]] to <8 x i32>
177 ; CHECK-NEXT: [[A12:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A11]], i8 10)
178 ; CHECK-NEXT: [[TMP36:%.*]] = or <8 x i32> [[TMP1]], [[TMP35]]
179 ; CHECK-NEXT: [[TMP37:%.*]] = icmp ne <8 x i32> [[TMP36]], zeroinitializer
180 ; CHECK-NEXT: [[TMP38:%.*]] = sext <8 x i1> [[TMP37]] to <8 x i32>
181 ; CHECK-NEXT: [[A13:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A12]], i8 11)
182 ; CHECK-NEXT: [[TMP39:%.*]] = or <8 x i32> [[TMP1]], [[TMP38]]
183 ; CHECK-NEXT: [[TMP40:%.*]] = icmp ne <8 x i32> [[TMP39]], zeroinitializer
184 ; CHECK-NEXT: [[TMP41:%.*]] = sext <8 x i1> [[TMP40]] to <8 x i32>
185 ; CHECK-NEXT: [[A14:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A13]], i8 12)
186 ; CHECK-NEXT: [[TMP42:%.*]] = or <8 x i32> [[TMP1]], [[TMP41]]
187 ; CHECK-NEXT: [[TMP43:%.*]] = icmp ne <8 x i32> [[TMP42]], zeroinitializer
188 ; CHECK-NEXT: [[TMP44:%.*]] = sext <8 x i1> [[TMP43]] to <8 x i32>
189 ; CHECK-NEXT: [[A15:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A14]], i8 13)
190 ; CHECK-NEXT: [[TMP45:%.*]] = or <8 x i32> [[TMP1]], [[TMP44]]
191 ; CHECK-NEXT: [[TMP46:%.*]] = icmp ne <8 x i32> [[TMP45]], zeroinitializer
192 ; CHECK-NEXT: [[TMP47:%.*]] = sext <8 x i1> [[TMP46]] to <8 x i32>
193 ; CHECK-NEXT: [[A16:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A15]], i8 14)
194 ; CHECK-NEXT: [[TMP48:%.*]] = or <8 x i32> [[TMP1]], [[TMP47]]
195 ; CHECK-NEXT: [[TMP49:%.*]] = icmp ne <8 x i32> [[TMP48]], zeroinitializer
196 ; CHECK-NEXT: [[TMP50:%.*]] = sext <8 x i1> [[TMP49]] to <8 x i32>
197 ; CHECK-NEXT: [[A17:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A16]], i8 15)
198 ; CHECK-NEXT: [[TMP51:%.*]] = or <8 x i32> [[TMP1]], [[TMP50]]
199 ; CHECK-NEXT: [[TMP52:%.*]] = icmp ne <8 x i32> [[TMP51]], zeroinitializer
200 ; CHECK-NEXT: [[TMP53:%.*]] = sext <8 x i1> [[TMP52]] to <8 x i32>
201 ; CHECK-NEXT: [[A18:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A17]], i8 16)
202 ; CHECK-NEXT: [[TMP54:%.*]] = or <8 x i32> [[TMP1]], [[TMP53]]
203 ; CHECK-NEXT: [[TMP55:%.*]] = icmp ne <8 x i32> [[TMP54]], zeroinitializer
204 ; CHECK-NEXT: [[TMP56:%.*]] = sext <8 x i1> [[TMP55]] to <8 x i32>
205 ; CHECK-NEXT: [[A19:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A18]], i8 17)
206 ; CHECK-NEXT: [[TMP57:%.*]] = or <8 x i32> [[TMP1]], [[TMP56]]
207 ; CHECK-NEXT: [[TMP58:%.*]] = icmp ne <8 x i32> [[TMP57]], zeroinitializer
208 ; CHECK-NEXT: [[TMP59:%.*]] = sext <8 x i1> [[TMP58]] to <8 x i32>
209 ; CHECK-NEXT: [[A20:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A19]], i8 18)
210 ; CHECK-NEXT: [[TMP60:%.*]] = or <8 x i32> [[TMP1]], [[TMP59]]
211 ; CHECK-NEXT: [[TMP61:%.*]] = icmp ne <8 x i32> [[TMP60]], zeroinitializer
212 ; CHECK-NEXT: [[TMP62:%.*]] = sext <8 x i1> [[TMP61]] to <8 x i32>
213 ; CHECK-NEXT: [[A21:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A20]], i8 19)
214 ; CHECK-NEXT: [[TMP63:%.*]] = or <8 x i32> [[TMP1]], [[TMP62]]
215 ; CHECK-NEXT: [[TMP64:%.*]] = icmp ne <8 x i32> [[TMP63]], zeroinitializer
216 ; CHECK-NEXT: [[TMP65:%.*]] = sext <8 x i1> [[TMP64]] to <8 x i32>
217 ; CHECK-NEXT: [[A22:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A21]], i8 20)
218 ; CHECK-NEXT: [[TMP66:%.*]] = or <8 x i32> [[TMP1]], [[TMP65]]
219 ; CHECK-NEXT: [[TMP67:%.*]] = icmp ne <8 x i32> [[TMP66]], zeroinitializer
220 ; CHECK-NEXT: [[TMP68:%.*]] = sext <8 x i1> [[TMP67]] to <8 x i32>
221 ; CHECK-NEXT: [[A23:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A22]], i8 21)
222 ; CHECK-NEXT: [[TMP69:%.*]] = or <8 x i32> [[TMP1]], [[TMP68]]
223 ; CHECK-NEXT: [[TMP70:%.*]] = icmp ne <8 x i32> [[TMP69]], zeroinitializer
224 ; CHECK-NEXT: [[TMP71:%.*]] = sext <8 x i1> [[TMP70]] to <8 x i32>
225 ; CHECK-NEXT: [[A24:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A23]], i8 22)
226 ; CHECK-NEXT: [[TMP72:%.*]] = or <8 x i32> [[TMP1]], [[TMP71]]
227 ; CHECK-NEXT: [[TMP73:%.*]] = icmp ne <8 x i32> [[TMP72]], zeroinitializer
228 ; CHECK-NEXT: [[TMP74:%.*]] = sext <8 x i1> [[TMP73]] to <8 x i32>
229 ; CHECK-NEXT: [[A25:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A24]], i8 23)
230 ; CHECK-NEXT: [[TMP75:%.*]] = or <8 x i32> [[TMP1]], [[TMP74]]
231 ; CHECK-NEXT: [[TMP76:%.*]] = icmp ne <8 x i32> [[TMP75]], zeroinitializer
232 ; CHECK-NEXT: [[TMP77:%.*]] = sext <8 x i1> [[TMP76]] to <8 x i32>
233 ; CHECK-NEXT: [[A26:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A25]], i8 24)
234 ; CHECK-NEXT: [[TMP78:%.*]] = or <8 x i32> [[TMP1]], [[TMP77]]
235 ; CHECK-NEXT: [[TMP79:%.*]] = icmp ne <8 x i32> [[TMP78]], zeroinitializer
236 ; CHECK-NEXT: [[TMP80:%.*]] = sext <8 x i1> [[TMP79]] to <8 x i32>
237 ; CHECK-NEXT: [[A27:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A26]], i8 25)
238 ; CHECK-NEXT: [[TMP81:%.*]] = or <8 x i32> [[TMP1]], [[TMP80]]
239 ; CHECK-NEXT: [[TMP82:%.*]] = icmp ne <8 x i32> [[TMP81]], zeroinitializer
240 ; CHECK-NEXT: [[TMP83:%.*]] = sext <8 x i1> [[TMP82]] to <8 x i32>
241 ; CHECK-NEXT: [[A28:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A27]], i8 26)
242 ; CHECK-NEXT: [[TMP84:%.*]] = or <8 x i32> [[TMP1]], [[TMP83]]
243 ; CHECK-NEXT: [[TMP85:%.*]] = icmp ne <8 x i32> [[TMP84]], zeroinitializer
244 ; CHECK-NEXT: [[TMP86:%.*]] = sext <8 x i1> [[TMP85]] to <8 x i32>
245 ; CHECK-NEXT: [[A29:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A28]], i8 27)
246 ; CHECK-NEXT: [[TMP87:%.*]] = or <8 x i32> [[TMP1]], [[TMP86]]
247 ; CHECK-NEXT: [[TMP88:%.*]] = icmp ne <8 x i32> [[TMP87]], zeroinitializer
248 ; CHECK-NEXT: [[TMP89:%.*]] = sext <8 x i1> [[TMP88]] to <8 x i32>
249 ; CHECK-NEXT: [[A30:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A29]], i8 28)
250 ; CHECK-NEXT: [[TMP90:%.*]] = or <8 x i32> [[TMP1]], [[TMP89]]
251 ; CHECK-NEXT: [[TMP91:%.*]] = icmp ne <8 x i32> [[TMP90]], zeroinitializer
252 ; CHECK-NEXT: [[TMP92:%.*]] = sext <8 x i1> [[TMP91]] to <8 x i32>
253 ; CHECK-NEXT: [[A31:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A30]], i8 29)
254 ; CHECK-NEXT: [[TMP93:%.*]] = or <8 x i32> [[TMP1]], [[TMP92]]
255 ; CHECK-NEXT: [[TMP94:%.*]] = icmp ne <8 x i32> [[TMP93]], zeroinitializer
256 ; CHECK-NEXT: [[TMP95:%.*]] = sext <8 x i1> [[TMP94]] to <8 x i32>
257 ; CHECK-NEXT: [[A32:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A31]], i8 30)
258 ; CHECK-NEXT: [[TMP96:%.*]] = or <8 x i32> [[TMP1]], [[TMP95]]
259 ; CHECK-NEXT: [[TMP97:%.*]] = icmp ne <8 x i32> [[TMP96]], zeroinitializer
260 ; CHECK-NEXT: [[TMP98:%.*]] = sext <8 x i1> [[TMP97]] to <8 x i32>
261 ; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> [[A0]], <8 x float> [[A32]], i8 31)
262 ; CHECK-NEXT: store <8 x i32> [[TMP98]], ptr @__msan_retval_tls, align 8
263 ; CHECK-NEXT: ret <8 x float> [[RES]]
265 %a2 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 0) ; <<8 x float>> [#uses=1]
266 %a3 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a2, i8 1) ; <<8 x float>> [#uses=1]
267 %a4 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a3, i8 2) ; <<8 x float>> [#uses=1]
268 %a5 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a4, i8 3) ; <<8 x float>> [#uses=1]
269 %a6 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a5, i8 4) ; <<8 x float>> [#uses=1]
270 %a7 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a6, i8 5) ; <<8 x float>> [#uses=1]
271 %a8 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a7, i8 6) ; <<8 x float>> [#uses=1]
272 %a9 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a8, i8 7) ; <<8 x float>> [#uses=1]
273 %a10 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a9, i8 8) ; <<8 x float>> [#uses=1]
274 %a11 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a10, i8 9) ; <<8 x float>> [#uses=1]
275 %a12 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a11, i8 10) ; <<8 x float>> [#uses=1]
276 %a13 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a12, i8 11) ; <<8 x float>> [#uses=1]
277 %a14 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a13, i8 12) ; <<8 x float>> [#uses=1]
278 %a15 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a14, i8 13) ; <<8 x float>> [#uses=1]
279 %a16 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a15, i8 14) ; <<8 x float>> [#uses=1]
280 %a17 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a16, i8 15) ; <<8 x float>> [#uses=1]
281 %a18 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a17, i8 16) ; <<8 x float>> [#uses=1]
282 %a19 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a18, i8 17) ; <<8 x float>> [#uses=1]
283 %a20 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a19, i8 18) ; <<8 x float>> [#uses=1]
284 %a21 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a20, i8 19) ; <<8 x float>> [#uses=1]
285 %a22 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a21, i8 20) ; <<8 x float>> [#uses=1]
286 %a23 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a22, i8 21) ; <<8 x float>> [#uses=1]
287 %a24 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a23, i8 22) ; <<8 x float>> [#uses=1]
288 %a25 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a24, i8 23) ; <<8 x float>> [#uses=1]
289 %a26 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a25, i8 24) ; <<8 x float>> [#uses=1]
290 %a27 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a26, i8 25) ; <<8 x float>> [#uses=1]
291 %a28 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a27, i8 26) ; <<8 x float>> [#uses=1]
292 %a29 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a28, i8 27) ; <<8 x float>> [#uses=1]
293 %a30 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a29, i8 28) ; <<8 x float>> [#uses=1]
294 %a31 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a30, i8 29) ; <<8 x float>> [#uses=1]
295 %a32 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a31, i8 30) ; <<8 x float>> [#uses=1]
296 %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a32, i8 31) ; <<8 x float>> [#uses=1]
299 declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
302 define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) #0 {
303 ; CHECK-LABEL: @test_x86_avx_cvt_pd2_ps_256(
304 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
305 ; CHECK-NEXT: call void @llvm.donothing()
306 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256
307 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0
308 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1:![0-9]+]]
310 ; CHECK-NEXT: call void @__msan_warning_noreturn()
311 ; CHECK-NEXT: unreachable
313 ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> [[A0:%.*]])
314 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
315 ; CHECK-NEXT: ret <4 x float> [[RES]]
317 %res = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) ; <<4 x float>> [#uses=1]
320 declare <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double>) nounwind readnone
323 define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) #0 {
324 ; CHECK-LABEL: @test_x86_avx_cvt_pd2dq_256(
325 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
326 ; CHECK-NEXT: call void @llvm.donothing()
327 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256
328 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0
329 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
331 ; CHECK-NEXT: call void @__msan_warning_noreturn()
332 ; CHECK-NEXT: unreachable
334 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> [[A0:%.*]])
335 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
336 ; CHECK-NEXT: ret <4 x i32> [[RES]]
338 %res = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
341 declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone
344 define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) #0 {
345 ; CHECK-LABEL: @test_x86_avx_cvt_ps2dq_256(
346 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
347 ; CHECK-NEXT: call void @llvm.donothing()
348 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[TMP1]] to i256
349 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0
350 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
352 ; CHECK-NEXT: call void @__msan_warning_noreturn()
353 ; CHECK-NEXT: unreachable
355 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> [[A0:%.*]])
356 ; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
357 ; CHECK-NEXT: ret <8 x i32> [[RES]]
359 %res = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
362 declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone
365 define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) #0 {
366 ; CHECK-LABEL: @test_x86_avx_cvtt_pd2dq_256(
367 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
368 ; CHECK-NEXT: call void @llvm.donothing()
369 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256
370 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0
371 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
373 ; CHECK-NEXT: call void @__msan_warning_noreturn()
374 ; CHECK-NEXT: unreachable
376 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> [[A0:%.*]])
377 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
378 ; CHECK-NEXT: ret <4 x i32> [[RES]]
380 %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
383 declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone
386 define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) #0 {
387 ; CHECK-LABEL: @test_x86_avx_cvtt_ps2dq_256(
388 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
389 ; CHECK-NEXT: call void @llvm.donothing()
390 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[TMP1]] to i256
391 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0
392 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
394 ; CHECK-NEXT: call void @__msan_warning_noreturn()
395 ; CHECK-NEXT: unreachable
397 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> [[A0:%.*]])
398 ; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
399 ; CHECK-NEXT: ret <8 x i32> [[RES]]
401 %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
404 declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone
407 define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) #0 {
408 ; CHECK-LABEL: @test_x86_avx_dp_ps_256(
409 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
410 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
411 ; CHECK-NEXT: call void @llvm.donothing()
412 ; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]]
413 ; CHECK-NEXT: [[TMP4:%.*]] = select <8 x i1> <i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x i32> [[TMP3]], <8 x i32> zeroinitializer
414 ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> [[TMP4]])
415 ; CHECK-NEXT: [[_MSDPP:%.*]] = icmp eq i32 [[TMP5]], 0
416 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[_MSDPP]], <8 x i1> zeroinitializer, <8 x i1> <i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>
417 ; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true>, <8 x i32> [[TMP3]], <8 x i32> zeroinitializer
418 ; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> [[TMP7]])
419 ; CHECK-NEXT: [[_MSDPP1:%.*]] = icmp eq i32 [[TMP8]], 0
420 ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[_MSDPP1]], <8 x i1> zeroinitializer, <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true>
421 ; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i1> [[TMP6]], [[TMP9]]
422 ; CHECK-NEXT: [[_MSDPP2:%.*]] = sext <8 x i1> [[TMP10]] to <8 x i32>
423 ; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]], i8 -18)
424 ; CHECK-NEXT: store <8 x i32> [[_MSDPP2]], ptr @__msan_retval_tls, align 8
425 ; CHECK-NEXT: ret <8 x float> [[RES]]
427 %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 -18) ; <<8 x float>> [#uses=1]
430 declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
433 define <4 x double> @test_x86_avx_hadd_pd_256(<4 x double> %a0, <4 x double> %a1) #0 {
434 ; CHECK-LABEL: @test_x86_avx_hadd_pd_256(
435 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
436 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
437 ; CHECK-NEXT: call void @llvm.donothing()
438 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i64> [[TMP1]], [[TMP2]]
439 ; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> [[A0:%.*]], <4 x double> [[A1:%.*]])
440 ; CHECK-NEXT: store <4 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
441 ; CHECK-NEXT: ret <4 x double> [[RES]]
443 %res = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
444 ret <4 x double> %res
446 declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone
449 define <8 x float> @test_x86_avx_hadd_ps_256(<8 x float> %a0, <8 x float> %a1) #0 {
450 ; CHECK-LABEL: @test_x86_avx_hadd_ps_256(
451 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
452 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
453 ; CHECK-NEXT: call void @llvm.donothing()
454 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]]
455 ; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]])
456 ; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
457 ; CHECK-NEXT: ret <8 x float> [[RES]]
459 %res = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
462 declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone
465 define <4 x double> @test_x86_avx_hsub_pd_256(<4 x double> %a0, <4 x double> %a1) #0 {
466 ; CHECK-LABEL: @test_x86_avx_hsub_pd_256(
467 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
468 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
469 ; CHECK-NEXT: call void @llvm.donothing()
470 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i64> [[TMP1]], [[TMP2]]
471 ; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> [[A0:%.*]], <4 x double> [[A1:%.*]])
472 ; CHECK-NEXT: store <4 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
473 ; CHECK-NEXT: ret <4 x double> [[RES]]
475 %res = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
476 ret <4 x double> %res
478 declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
481 define <8 x float> @test_x86_avx_hsub_ps_256(<8 x float> %a0, <8 x float> %a1) #0 {
482 ; CHECK-LABEL: @test_x86_avx_hsub_ps_256(
483 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
484 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
485 ; CHECK-NEXT: call void @llvm.donothing()
486 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]]
487 ; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]])
488 ; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
489 ; CHECK-NEXT: ret <8 x float> [[RES]]
491 %res = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
494 declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
497 define <32 x i8> @test_x86_avx_ldu_dq_256(ptr %a0) #0 {
498 ; CHECK-LABEL: @test_x86_avx_ldu_dq_256(
499 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
500 ; CHECK-NEXT: call void @llvm.donothing()
501 ; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[A0:%.*]] to i64
502 ; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080
503 ; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
504 ; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i8>, ptr [[TMP4]], align 1
505 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
506 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
508 ; CHECK-NEXT: call void @__msan_warning_noreturn()
509 ; CHECK-NEXT: unreachable
511 ; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx.ldu.dq.256(ptr [[A0]])
512 ; CHECK-NEXT: store <32 x i8> [[_MSLD]], ptr @__msan_retval_tls, align 8
513 ; CHECK-NEXT: ret <32 x i8> [[RES]]
515 %res = call <32 x i8> @llvm.x86.avx.ldu.dq.256(ptr %a0) ; <<32 x i8>> [#uses=1]
518 declare <32 x i8> @llvm.x86.avx.ldu.dq.256(ptr) nounwind readonly
521 define <2 x double> @test_x86_avx_maskload_pd(ptr %a0, <2 x i64> %mask) #0 {
522 ; CHECK-LABEL: @test_x86_avx_maskload_pd(
523 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
524 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
525 ; CHECK-NEXT: call void @llvm.donothing()
526 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
527 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
528 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP3]], 0
529 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
530 ; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
532 ; CHECK-NEXT: call void @__msan_warning_noreturn()
533 ; CHECK-NEXT: unreachable
535 ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx.maskload.pd(ptr [[A0:%.*]], <2 x i64> [[MASK:%.*]])
536 ; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
537 ; CHECK-NEXT: ret <2 x double> [[RES]]
539 %res = call <2 x double> @llvm.x86.avx.maskload.pd(ptr %a0, <2 x i64> %mask) ; <<2 x double>> [#uses=1]
540 ret <2 x double> %res
542 declare <2 x double> @llvm.x86.avx.maskload.pd(ptr, <2 x i64>) nounwind readonly
545 define <4 x double> @test_x86_avx_maskload_pd_256(ptr %a0, <4 x i64> %mask) #0 {
546 ; CHECK-LABEL: @test_x86_avx_maskload_pd_256(
547 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
548 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
549 ; CHECK-NEXT: call void @llvm.donothing()
550 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
551 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[TMP2]] to i256
552 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP3]], 0
553 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
554 ; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
556 ; CHECK-NEXT: call void @__msan_warning_noreturn()
557 ; CHECK-NEXT: unreachable
559 ; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.maskload.pd.256(ptr [[A0:%.*]], <4 x i64> [[MASK:%.*]])
560 ; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
561 ; CHECK-NEXT: ret <4 x double> [[RES]]
563 %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(ptr %a0, <4 x i64> %mask) ; <<4 x double>> [#uses=1]
564 ret <4 x double> %res
566 declare <4 x double> @llvm.x86.avx.maskload.pd.256(ptr, <4 x i64>) nounwind readonly
569 define <4 x float> @test_x86_avx_maskload_ps(ptr %a0, <4 x i32> %mask) #0 {
570 ; CHECK-LABEL: @test_x86_avx_maskload_ps(
571 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
572 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
573 ; CHECK-NEXT: call void @llvm.donothing()
574 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
575 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
576 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP3]], 0
577 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
578 ; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
580 ; CHECK-NEXT: call void @__msan_warning_noreturn()
581 ; CHECK-NEXT: unreachable
583 ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.maskload.ps(ptr [[A0:%.*]], <4 x i32> [[MASK:%.*]])
584 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
585 ; CHECK-NEXT: ret <4 x float> [[RES]]
587 %res = call <4 x float> @llvm.x86.avx.maskload.ps(ptr %a0, <4 x i32> %mask) ; <<4 x float>> [#uses=1]
590 declare <4 x float> @llvm.x86.avx.maskload.ps(ptr, <4 x i32>) nounwind readonly
593 define <8 x float> @test_x86_avx_maskload_ps_256(ptr %a0, <8 x i32> %mask) #0 {
594 ; CHECK-LABEL: @test_x86_avx_maskload_ps_256(
595 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
596 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
597 ; CHECK-NEXT: call void @llvm.donothing()
598 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
599 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
600 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP3]], 0
601 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
602 ; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
604 ; CHECK-NEXT: call void @__msan_warning_noreturn()
605 ; CHECK-NEXT: unreachable
607 ; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.maskload.ps.256(ptr [[A0:%.*]], <8 x i32> [[MASK:%.*]])
608 ; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
609 ; CHECK-NEXT: ret <8 x float> [[RES]]
611 %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(ptr %a0, <8 x i32> %mask) ; <<8 x float>> [#uses=1]
614 declare <8 x float> @llvm.x86.avx.maskload.ps.256(ptr, <8 x i32>) nounwind readonly
617 define void @test_x86_avx_maskstore_pd(ptr %a0, <2 x i64> %mask, <2 x double> %a2) #0 {
618 ; CHECK-LABEL: @test_x86_avx_maskstore_pd(
619 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
620 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
621 ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
622 ; CHECK-NEXT: call void @llvm.donothing()
623 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
624 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
625 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
626 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
627 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
628 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP5]], 0
629 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
630 ; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
632 ; CHECK-NEXT: call void @__msan_warning_noreturn()
633 ; CHECK-NEXT: unreachable
635 ; CHECK-NEXT: call void @llvm.x86.avx.maskstore.pd(ptr [[A0:%.*]], <2 x i64> [[MASK:%.*]], <2 x double> [[A2:%.*]])
636 ; CHECK-NEXT: ret void
638 call void @llvm.x86.avx.maskstore.pd(ptr %a0, <2 x i64> %mask, <2 x double> %a2)
641 declare void @llvm.x86.avx.maskstore.pd(ptr, <2 x i64>, <2 x double>) nounwind
644 define void @test_x86_avx_maskstore_pd_256(ptr %a0, <4 x i64> %mask, <4 x double> %a2) #0 {
645 ; CHECK-LABEL: @test_x86_avx_maskstore_pd_256(
646 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
647 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
648 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
649 ; CHECK-NEXT: call void @llvm.donothing()
650 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
651 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i64> [[TMP2]] to i256
652 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0
653 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
654 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i64> [[TMP3]] to i256
655 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP5]], 0
656 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
657 ; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
659 ; CHECK-NEXT: call void @__msan_warning_noreturn()
660 ; CHECK-NEXT: unreachable
662 ; CHECK-NEXT: call void @llvm.x86.avx.maskstore.pd.256(ptr [[A0:%.*]], <4 x i64> [[MASK:%.*]], <4 x double> [[A2:%.*]])
663 ; CHECK-NEXT: ret void
665 call void @llvm.x86.avx.maskstore.pd.256(ptr %a0, <4 x i64> %mask, <4 x double> %a2)
668 declare void @llvm.x86.avx.maskstore.pd.256(ptr, <4 x i64>, <4 x double>) nounwind
671 define void @test_x86_avx_maskstore_ps(ptr %a0, <4 x i32> %mask, <4 x float> %a2) #0 {
672 ; CHECK-LABEL: @test_x86_avx_maskstore_ps(
673 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
674 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
675 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
676 ; CHECK-NEXT: call void @llvm.donothing()
677 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
678 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
679 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
680 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
681 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
682 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP5]], 0
683 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
684 ; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
686 ; CHECK-NEXT: call void @__msan_warning_noreturn()
687 ; CHECK-NEXT: unreachable
689 ; CHECK-NEXT: call void @llvm.x86.avx.maskstore.ps(ptr [[A0:%.*]], <4 x i32> [[MASK:%.*]], <4 x float> [[A2:%.*]])
690 ; CHECK-NEXT: ret void
692 call void @llvm.x86.avx.maskstore.ps(ptr %a0, <4 x i32> %mask, <4 x float> %a2)
695 declare void @llvm.x86.avx.maskstore.ps(ptr, <4 x i32>, <4 x float>) nounwind
698 define void @test_x86_avx_maskstore_ps_256(ptr %a0, <8 x i32> %mask, <8 x float> %a2) #0 {
699 ; CHECK-LABEL: @test_x86_avx_maskstore_ps_256(
700 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
701 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
702 ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
703 ; CHECK-NEXT: call void @llvm.donothing()
704 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
705 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
706 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0
707 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
708 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP3]] to i256
709 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i256 [[TMP5]], 0
710 ; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
711 ; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
713 ; CHECK-NEXT: call void @__msan_warning_noreturn()
714 ; CHECK-NEXT: unreachable
716 ; CHECK-NEXT: call void @llvm.x86.avx.maskstore.ps.256(ptr [[A0:%.*]], <8 x i32> [[MASK:%.*]], <8 x float> [[A2:%.*]])
717 ; CHECK-NEXT: ret void
719 call void @llvm.x86.avx.maskstore.ps.256(ptr %a0, <8 x i32> %mask, <8 x float> %a2)
722 declare void @llvm.x86.avx.maskstore.ps.256(ptr, <8 x i32>, <8 x float>) nounwind
725 define <4 x double> @test_x86_avx_max_pd_256(<4 x double> %a0, <4 x double> %a1) #0 {
726 ; CHECK-LABEL: @test_x86_avx_max_pd_256(
727 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
728 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
729 ; CHECK-NEXT: call void @llvm.donothing()
730 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i64> [[TMP1]], [[TMP2]]
731 ; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> [[A0:%.*]], <4 x double> [[A1:%.*]])
732 ; CHECK-NEXT: store <4 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
733 ; CHECK-NEXT: ret <4 x double> [[RES]]
735 %res = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
736 ret <4 x double> %res
738 declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone
741 define <8 x float> @test_x86_avx_max_ps_256(<8 x float> %a0, <8 x float> %a1) #0 {
742 ; CHECK-LABEL: @test_x86_avx_max_ps_256(
743 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
744 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
745 ; CHECK-NEXT: call void @llvm.donothing()
746 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]]
747 ; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]])
748 ; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
749 ; CHECK-NEXT: ret <8 x float> [[RES]]
751 %res = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
754 declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone
757 define <4 x double> @test_x86_avx_min_pd_256(<4 x double> %a0, <4 x double> %a1) #0 {
758 ; CHECK-LABEL: @test_x86_avx_min_pd_256(
759 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
760 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
761 ; CHECK-NEXT: call void @llvm.donothing()
762 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i64> [[TMP1]], [[TMP2]]
763 ; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> [[A0:%.*]], <4 x double> [[A1:%.*]])
764 ; CHECK-NEXT: store <4 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
765 ; CHECK-NEXT: ret <4 x double> [[RES]]
767 %res = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
768 ret <4 x double> %res
770 declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone
773 define <8 x float> @test_x86_avx_min_ps_256(<8 x float> %a0, <8 x float> %a1) #0 {
774 ; CHECK-LABEL: @test_x86_avx_min_ps_256(
775 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
776 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
777 ; CHECK-NEXT: call void @llvm.donothing()
778 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]]
779 ; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]])
780 ; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
781 ; CHECK-NEXT: ret <8 x float> [[RES]]
783 %res = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
786 declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone
789 define i32 @test_x86_avx_movmsk_pd_256(<4 x double> %a0) #0 {
790 ; CHECK-LABEL: @test_x86_avx_movmsk_pd_256(
791 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
792 ; CHECK-NEXT: call void @llvm.donothing()
793 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256
794 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0
795 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
797 ; CHECK-NEXT: call void @__msan_warning_noreturn()
798 ; CHECK-NEXT: unreachable
800 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> [[A0:%.*]])
801 ; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
802 ; CHECK-NEXT: ret i32 [[RES]]
804 %res = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) ; <i32> [#uses=1]
807 declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone
810 define i32 @test_x86_avx_movmsk_ps_256(<8 x float> %a0) #0 {
811 ; CHECK-LABEL: @test_x86_avx_movmsk_ps_256(
812 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
813 ; CHECK-NEXT: call void @llvm.donothing()
814 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[TMP1]] to i256
815 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0
816 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
818 ; CHECK-NEXT: call void @__msan_warning_noreturn()
819 ; CHECK-NEXT: unreachable
821 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> [[A0:%.*]])
822 ; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
823 ; CHECK-NEXT: ret i32 [[RES]]
825 %res = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) ; <i32> [#uses=1]
828 declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
831 define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) #0 {
832 ; CHECK-LABEL: @test_x86_avx_ptestc_256(
833 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
834 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
835 ; CHECK-NEXT: call void @llvm.donothing()
836 ; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i64> [[TMP1]], [[TMP2]]
837 ; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i64> [[TMP3]], zeroinitializer
838 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4
839 ; CHECK-NEXT: [[TMP6:%.*]] = zext i4 [[TMP5]] to i32
840 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> [[A0:%.*]], <4 x i64> [[A1:%.*]])
841 ; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
842 ; CHECK-NEXT: ret i32 [[RES]]
844 %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
847 declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) nounwind readnone
850 define i32 @test_x86_avx_ptestnzc_256(<4 x i64> %a0, <4 x i64> %a1) #0 {
851 ; CHECK-LABEL: @test_x86_avx_ptestnzc_256(
852 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
853 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
854 ; CHECK-NEXT: call void @llvm.donothing()
855 ; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i64> [[TMP1]], [[TMP2]]
856 ; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i64> [[TMP3]], zeroinitializer
857 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4
858 ; CHECK-NEXT: [[TMP6:%.*]] = zext i4 [[TMP5]] to i32
859 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> [[A0:%.*]], <4 x i64> [[A1:%.*]])
860 ; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
861 ; CHECK-NEXT: ret i32 [[RES]]
863 %res = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
866 declare i32 @llvm.x86.avx.ptestnzc.256(<4 x i64>, <4 x i64>) nounwind readnone
869 define i32 @test_x86_avx_ptestz_256(<4 x i64> %a0, <4 x i64> %a1) #0 {
870 ; CHECK-LABEL: @test_x86_avx_ptestz_256(
871 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
872 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
873 ; CHECK-NEXT: call void @llvm.donothing()
874 ; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i64> [[TMP1]], [[TMP2]]
875 ; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i64> [[TMP3]], zeroinitializer
876 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4
877 ; CHECK-NEXT: [[TMP6:%.*]] = zext i4 [[TMP5]] to i32
878 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> [[A0:%.*]], <4 x i64> [[A1:%.*]])
879 ; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
880 ; CHECK-NEXT: ret i32 [[RES]]
882 %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
885 declare i32 @llvm.x86.avx.ptestz.256(<4 x i64>, <4 x i64>) nounwind readnone
888 define <8 x float> @test_x86_avx_rcp_ps_256(<8 x float> %a0) #0 {
889 ; CHECK-LABEL: @test_x86_avx_rcp_ps_256(
890 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
891 ; CHECK-NEXT: call void @llvm.donothing()
892 ; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> [[A0:%.*]])
893 ; CHECK-NEXT: store <8 x i32> [[TMP1]], ptr @__msan_retval_tls, align 8
894 ; CHECK-NEXT: ret <8 x float> [[RES]]
896 %res = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
899 declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone
902 define <4 x double> @test_x86_avx_round_pd_256(<4 x double> %a0) #0 {
903 ; CHECK-LABEL: @test_x86_avx_round_pd_256(
904 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
905 ; CHECK-NEXT: call void @llvm.donothing()
906 ; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> [[A0:%.*]], i32 7)
907 ; CHECK-NEXT: store <4 x i64> [[TMP1]], ptr @__msan_retval_tls, align 8
908 ; CHECK-NEXT: ret <4 x double> [[RES]]
910 %res = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7) ; <<4 x double>> [#uses=1]
911 ret <4 x double> %res
913 declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone
916 define <8 x float> @test_x86_avx_round_ps_256(<8 x float> %a0) #0 {
917 ; CHECK-LABEL: @test_x86_avx_round_ps_256(
918 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
919 ; CHECK-NEXT: call void @llvm.donothing()
920 ; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> [[A0:%.*]], i32 7)
921 ; CHECK-NEXT: store <8 x i32> [[TMP1]], ptr @__msan_retval_tls, align 8
922 ; CHECK-NEXT: ret <8 x float> [[RES]]
924 %res = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7) ; <<8 x float>> [#uses=1]
927 declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone
930 define <8 x float> @test_x86_avx_rsqrt_ps_256(<8 x float> %a0) #0 {
931 ; CHECK-LABEL: @test_x86_avx_rsqrt_ps_256(
932 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
933 ; CHECK-NEXT: call void @llvm.donothing()
934 ; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> [[A0:%.*]])
935 ; CHECK-NEXT: store <8 x i32> [[TMP1]], ptr @__msan_retval_tls, align 8
936 ; CHECK-NEXT: ret <8 x float> [[RES]]
938 %res = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
941 declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone
943 define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) #0 {
944 ; CHECK-LABEL: @test_x86_avx_vpermilvar_pd(
945 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
946 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
947 ; CHECK-NEXT: call void @llvm.donothing()
948 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
949 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
950 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
951 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
952 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
953 ; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
955 ; CHECK-NEXT: call void @__msan_warning_noreturn()
956 ; CHECK-NEXT: unreachable
958 ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[A0:%.*]], <2 x i64> [[A1:%.*]])
959 ; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
960 ; CHECK-NEXT: ret <2 x double> [[RES]]
962 %res = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) ; <<2 x double>> [#uses=1]
963 ret <2 x double> %res
965 declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone
968 define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) #0 {
969 ; CHECK-LABEL: @test_x86_avx_vpermilvar_pd_256(
970 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
971 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
972 ; CHECK-NEXT: call void @llvm.donothing()
973 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[TMP1]] to i256
974 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0
975 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i64> [[TMP2]] to i256
976 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0
977 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
978 ; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
980 ; CHECK-NEXT: call void @__msan_warning_noreturn()
981 ; CHECK-NEXT: unreachable
983 ; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[A0:%.*]], <4 x i64> [[A1:%.*]])
984 ; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
985 ; CHECK-NEXT: ret <4 x double> [[RES]]
987 %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) ; <<4 x double>> [#uses=1]
988 ret <4 x double> %res
990 declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone
992 define <4 x double> @test_x86_avx_vpermilvar_pd_256_2(<4 x double> %a0) #0 {
993 ; CHECK-LABEL: @test_x86_avx_vpermilvar_pd_256_2(
994 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
995 ; CHECK-NEXT: call void @llvm.donothing()
996 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256
997 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0
998 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
1000 ; CHECK-NEXT: call void @__msan_warning_noreturn()
1001 ; CHECK-NEXT: unreachable
1003 ; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[A0:%.*]], <4 x i64> <i64 2, i64 0, i64 0, i64 2>)
1004 ; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
1005 ; CHECK-NEXT: ret <4 x double> [[RES]]
1007 %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 2, i64 0, i64 0, i64 2>) ; <<4 x double>> [#uses=1]
1008 ret <4 x double> %res
1011 define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) #0 {
1012 ; CHECK-LABEL: @test_x86_avx_vpermilvar_ps(
1013 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
1014 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1015 ; CHECK-NEXT: call void @llvm.donothing()
1016 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
1017 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
1018 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1019 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
1020 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1021 ; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
1023 ; CHECK-NEXT: call void @__msan_warning_noreturn()
1024 ; CHECK-NEXT: unreachable
1026 ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A0:%.*]], <4 x i32> [[A1:%.*]])
1027 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
1028 ; CHECK-NEXT: ret <4 x float> [[RES]]
1030 %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1]
1031 ret <4 x float> %res
1033 define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, ptr %a1) #0 {
1034 ; CHECK-LABEL: @test_x86_avx_vpermilvar_ps_load(
1035 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1036 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
1037 ; CHECK-NEXT: call void @llvm.donothing()
1038 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1039 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
1041 ; CHECK-NEXT: call void @__msan_warning_noreturn()
1042 ; CHECK-NEXT: unreachable
1044 ; CHECK-NEXT: [[A2:%.*]] = load <4 x i32>, ptr [[A1:%.*]], align 16
1045 ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A1]] to i64
1046 ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
1047 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
1048 ; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
1049 ; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1050 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP8]], 0
1051 ; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128
1052 ; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0
1053 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
1054 ; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
1056 ; CHECK-NEXT: call void @__msan_warning_noreturn()
1057 ; CHECK-NEXT: unreachable
1059 ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A0:%.*]], <4 x i32> [[A2]])
1060 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
1061 ; CHECK-NEXT: ret <4 x float> [[RES]]
1063 %a2 = load <4 x i32>, ptr %a1
1064 %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a2) ; <<4 x float>> [#uses=1]
1065 ret <4 x float> %res
1067 declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone
1070 define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) #0 {
1071 ; CHECK-LABEL: @test_x86_avx_vpermilvar_ps_256(
1072 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
1073 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
1074 ; CHECK-NEXT: call void @llvm.donothing()
1075 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i32> [[TMP1]] to i256
1076 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP3]], 0
1077 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
1078 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0
1079 ; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
1080 ; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
1082 ; CHECK-NEXT: call void @__msan_warning_noreturn()
1083 ; CHECK-NEXT: unreachable
1085 ; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[A0:%.*]], <8 x i32> [[A1:%.*]])
1086 ; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
1087 ; CHECK-NEXT: ret <8 x float> [[RES]]
1089 %res = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1]
1090 ret <8 x float> %res
1092 declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone
1095 define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) #0 {
1096 ; CHECK-LABEL: @test_x86_avx_vtestc_pd(
1097 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
1098 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1099 ; CHECK-NEXT: call void @llvm.donothing()
1100 ; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
1101 ; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <2 x i64> [[TMP3]], zeroinitializer
1102 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i1> [[TMP4]] to i2
1103 ; CHECK-NEXT: [[TMP6:%.*]] = zext i2 [[TMP5]] to i32
1104 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]])
1105 ; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
1106 ; CHECK-NEXT: ret i32 [[RES]]
1108 %res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1111 declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone
1114 define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) #0 {
1115 ; CHECK-LABEL: @test_x86_avx_vtestc_pd_256(
1116 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
1117 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
1118 ; CHECK-NEXT: call void @llvm.donothing()
1119 ; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i64> [[TMP1]], [[TMP2]]
1120 ; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i64> [[TMP3]], zeroinitializer
1121 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4
1122 ; CHECK-NEXT: [[TMP6:%.*]] = zext i4 [[TMP5]] to i32
1123 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> [[A0:%.*]], <4 x double> [[A1:%.*]])
1124 ; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
1125 ; CHECK-NEXT: ret i32 [[RES]]
1127 %res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
1130 declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone
1133 define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) #0 {
1134 ; CHECK-LABEL: @test_x86_avx_vtestc_ps(
1135 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
1136 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1137 ; CHECK-NEXT: call void @llvm.donothing()
1138 ; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
1139 ; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i32> [[TMP3]], zeroinitializer
1140 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4
1141 ; CHECK-NEXT: [[TMP6:%.*]] = zext i4 [[TMP5]] to i32
1142 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]])
1143 ; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
1144 ; CHECK-NEXT: ret i32 [[RES]]
1146 %res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1149 declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone
1152 define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) #0 {
1153 ; CHECK-LABEL: @test_x86_avx_vtestc_ps_256(
1154 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
1155 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
1156 ; CHECK-NEXT: call void @llvm.donothing()
1157 ; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]]
1158 ; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <8 x i32> [[TMP3]], zeroinitializer
1159 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i1> [[TMP4]] to i8
1160 ; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
1161 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]])
1162 ; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
1163 ; CHECK-NEXT: ret i32 [[RES]]
1165 %res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
1168 declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone
1171 define i32 @test_x86_avx_vtestnzc_pd(<2 x double> %a0, <2 x double> %a1) #0 {
1172 ; CHECK-LABEL: @test_x86_avx_vtestnzc_pd(
1173 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
1174 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1175 ; CHECK-NEXT: call void @llvm.donothing()
1176 ; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
1177 ; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <2 x i64> [[TMP3]], zeroinitializer
1178 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i1> [[TMP4]] to i2
1179 ; CHECK-NEXT: [[TMP6:%.*]] = zext i2 [[TMP5]] to i32
1180 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]])
1181 ; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
1182 ; CHECK-NEXT: ret i32 [[RES]]
1184 %res = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1187 declare i32 @llvm.x86.avx.vtestnzc.pd(<2 x double>, <2 x double>) nounwind readnone
1190 define i32 @test_x86_avx_vtestnzc_pd_256(<4 x double> %a0, <4 x double> %a1) #0 {
1191 ; CHECK-LABEL: @test_x86_avx_vtestnzc_pd_256(
1192 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
1193 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
1194 ; CHECK-NEXT: call void @llvm.donothing()
1195 ; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i64> [[TMP1]], [[TMP2]]
1196 ; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i64> [[TMP3]], zeroinitializer
1197 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4
1198 ; CHECK-NEXT: [[TMP6:%.*]] = zext i4 [[TMP5]] to i32
1199 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> [[A0:%.*]], <4 x double> [[A1:%.*]])
1200 ; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
1201 ; CHECK-NEXT: ret i32 [[RES]]
1203 %res = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
1206 declare i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double>, <4 x double>) nounwind readnone
1209 define i32 @test_x86_avx_vtestnzc_ps(<4 x float> %a0, <4 x float> %a1) #0 {
1210 ; CHECK-LABEL: @test_x86_avx_vtestnzc_ps(
1211 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
1212 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1213 ; CHECK-NEXT: call void @llvm.donothing()
1214 ; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
1215 ; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i32> [[TMP3]], zeroinitializer
1216 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4
1217 ; CHECK-NEXT: [[TMP6:%.*]] = zext i4 [[TMP5]] to i32
1218 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]])
1219 ; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
1220 ; CHECK-NEXT: ret i32 [[RES]]
1222 %res = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1225 declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnone
1228 define i32 @test_x86_avx_vtestnzc_ps_256(<8 x float> %a0, <8 x float> %a1) #0 {
1229 ; CHECK-LABEL: @test_x86_avx_vtestnzc_ps_256(
1230 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
1231 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
1232 ; CHECK-NEXT: call void @llvm.donothing()
1233 ; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]]
1234 ; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <8 x i32> [[TMP3]], zeroinitializer
1235 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i1> [[TMP4]] to i8
1236 ; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
1237 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]])
1238 ; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
1239 ; CHECK-NEXT: ret i32 [[RES]]
1241 %res = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
1244 declare i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float>, <8 x float>) nounwind readnone
1247 define i32 @test_x86_avx_vtestz_pd(<2 x double> %a0, <2 x double> %a1) #0 {
1248 ; CHECK-LABEL: @test_x86_avx_vtestz_pd(
1249 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
1250 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1251 ; CHECK-NEXT: call void @llvm.donothing()
1252 ; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
1253 ; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <2 x i64> [[TMP3]], zeroinitializer
1254 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i1> [[TMP4]] to i2
1255 ; CHECK-NEXT: [[TMP6:%.*]] = zext i2 [[TMP5]] to i32
1256 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]])
1257 ; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
1258 ; CHECK-NEXT: ret i32 [[RES]]
1260 %res = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1263 declare i32 @llvm.x86.avx.vtestz.pd(<2 x double>, <2 x double>) nounwind readnone
1266 define i32 @test_x86_avx_vtestz_pd_256(<4 x double> %a0, <4 x double> %a1) #0 {
1267 ; CHECK-LABEL: @test_x86_avx_vtestz_pd_256(
1268 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
1269 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
1270 ; CHECK-NEXT: call void @llvm.donothing()
1271 ; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i64> [[TMP1]], [[TMP2]]
1272 ; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i64> [[TMP3]], zeroinitializer
1273 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4
1274 ; CHECK-NEXT: [[TMP6:%.*]] = zext i4 [[TMP5]] to i32
1275 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> [[A0:%.*]], <4 x double> [[A1:%.*]])
1276 ; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
1277 ; CHECK-NEXT: ret i32 [[RES]]
1279 %res = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
1282 declare i32 @llvm.x86.avx.vtestz.pd.256(<4 x double>, <4 x double>) nounwind readnone
1285 define i32 @test_x86_avx_vtestz_ps(<4 x float> %a0, <4 x float> %a1) #0 {
1286 ; CHECK-LABEL: @test_x86_avx_vtestz_ps(
1287 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
1288 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1289 ; CHECK-NEXT: call void @llvm.donothing()
1290 ; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
1291 ; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i32> [[TMP3]], zeroinitializer
1292 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i1> [[TMP4]] to i4
1293 ; CHECK-NEXT: [[TMP6:%.*]] = zext i4 [[TMP5]] to i32
1294 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]])
1295 ; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
1296 ; CHECK-NEXT: ret i32 [[RES]]
1298 %res = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
1301 declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone
1304 define i32 @test_x86_avx_vtestz_ps_256(<8 x float> %a0, <8 x float> %a1) #0 {
1305 ; CHECK-LABEL: @test_x86_avx_vtestz_ps_256(
1306 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
1307 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
1308 ; CHECK-NEXT: call void @llvm.donothing()
1309 ; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i32> [[TMP1]], [[TMP2]]
1310 ; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <8 x i32> [[TMP3]], zeroinitializer
1311 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i1> [[TMP4]] to i8
1312 ; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
1313 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> [[A0:%.*]], <8 x float> [[A1:%.*]])
1314 ; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
1315 ; CHECK-NEXT: ret i32 [[RES]]
1317 %res = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
1320 declare i32 @llvm.x86.avx.vtestz.ps.256(<8 x float>, <8 x float>) nounwind readnone
1323 define void @test_x86_avx_vzeroall() #0 {
1324 ; CHECK-LABEL: @test_x86_avx_vzeroall(
1325 ; CHECK-NEXT: call void @llvm.donothing()
1326 ; CHECK-NEXT: call void @llvm.x86.avx.vzeroall()
1327 ; CHECK-NEXT: ret void
1329 call void @llvm.x86.avx.vzeroall()
1332 declare void @llvm.x86.avx.vzeroall() nounwind
1335 define void @test_x86_avx_vzeroupper() #0 {
1336 ; CHECK-LABEL: @test_x86_avx_vzeroupper(
1337 ; CHECK-NEXT: call void @llvm.donothing()
1338 ; CHECK-NEXT: call void @llvm.x86.avx.vzeroupper()
1339 ; CHECK-NEXT: ret void
1341 call void @llvm.x86.avx.vzeroupper()
1344 declare void @llvm.x86.avx.vzeroupper() nounwind
1346 define void @movnt_dq(ptr %p, <2 x i64> %a1) nounwind #0 {
1347 ; CHECK-LABEL: @movnt_dq(
1348 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1349 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @__msan_param_tls, align 8
1350 ; CHECK-NEXT: call void @llvm.donothing()
1351 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP1]], zeroinitializer
1352 ; CHECK-NEXT: [[A2:%.*]] = add <2 x i64> [[A1:%.*]], splat (i64 1)
1353 ; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <2 x i64> [[_MSPROP]], <2 x i64> splat (i64 -1), <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
1354 ; CHECK-NEXT: [[A3:%.*]] = shufflevector <2 x i64> [[A2]], <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
1355 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
1356 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
1358 ; CHECK-NEXT: call void @__msan_warning_noreturn()
1359 ; CHECK-NEXT: unreachable
1361 ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P:%.*]] to i64
1362 ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
1363 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
1364 ; CHECK-NEXT: store <4 x i64> [[_MSPROP1]], ptr [[TMP7]], align 32
1365 ; CHECK-NEXT: store <4 x i64> [[A3]], ptr [[P]], align 32, !nontemporal [[META2:![0-9]+]]
1366 ; CHECK-NEXT: ret void
1368 %a2 = add <2 x i64> %a1, <i64 1, i64 1>
1369 %a3 = shufflevector <2 x i64> %a2, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
1370 tail call void @llvm.x86.avx.movnt.dq.256(ptr %p, <4 x i64> %a3) nounwind
1373 declare void @llvm.x86.avx.movnt.dq.256(ptr, <4 x i64>) nounwind
1375 define void @movnt_ps(ptr %p, <8 x float> %a) nounwind #0 {
1376 ; CHECK-LABEL: @movnt_ps(
1377 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
1378 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1379 ; CHECK-NEXT: call void @llvm.donothing()
1380 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1381 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
1383 ; CHECK-NEXT: call void @__msan_warning_noreturn()
1384 ; CHECK-NEXT: unreachable
1386 ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P:%.*]] to i64
1387 ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
1388 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
1389 ; CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[TMP7]], align 32
1390 ; CHECK-NEXT: store <8 x float> [[A:%.*]], ptr [[P]], align 32, !nontemporal [[META2]]
1391 ; CHECK-NEXT: ret void
1393 tail call void @llvm.x86.avx.movnt.ps.256(ptr %p, <8 x float> %a) nounwind
1396 declare void @llvm.x86.avx.movnt.ps.256(ptr, <8 x float>) nounwind
1398 define void @movnt_pd(ptr %p, <4 x double> %a1) nounwind #0 {
1399 ; add operation forces the execution domain.
1400 ; CHECK-LABEL: @movnt_pd(
1401 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
1402 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @__msan_param_tls, align 8
1403 ; CHECK-NEXT: call void @llvm.donothing()
1404 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i64> [[TMP1]], zeroinitializer
1405 ; CHECK-NEXT: [[A2:%.*]] = fadd <4 x double> [[A1:%.*]], zeroinitializer
1406 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
1407 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
1409 ; CHECK-NEXT: call void @__msan_warning_noreturn()
1410 ; CHECK-NEXT: unreachable
1412 ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P:%.*]] to i64
1413 ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
1414 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
1415 ; CHECK-NEXT: store <4 x i64> [[_MSPROP]], ptr [[TMP7]], align 32
1416 ; CHECK-NEXT: store <4 x double> [[A2]], ptr [[P]], align 32, !nontemporal [[META2]]
1417 ; CHECK-NEXT: ret void
1419 %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
1420 tail call void @llvm.x86.avx.movnt.pd.256(ptr %p, <4 x double> %a2) nounwind
1423 declare void @llvm.x86.avx.movnt.pd.256(ptr, <4 x double>) nounwind
1426 define <2 x i64> @test_x86_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1) #0 {
1427 ; CHECK-LABEL: @test_x86_pclmulqdq(
1428 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
1429 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1430 ; CHECK-NEXT: call void @llvm.donothing()
1431 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <2 x i32> zeroinitializer
1432 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <2 x i32> zeroinitializer
1433 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP3]], [[TMP4]]
1434 ; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> [[A0:%.*]], <2 x i64> [[A1:%.*]], i8 0)
1435 ; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
1436 ; CHECK-NEXT: ret <2 x i64> [[RES]]
1438 %res = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ; <<2 x i64>> [#uses=1]
1441 declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8) nounwind readnone
1443 attributes #0 = { sanitize_memory }