1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt %s -S -passes=msan 2>&1 | FileCheck %s
4 target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
5 target triple = "x86_64-unknown-linux-gnu"
7 define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) #0 {
8 ; CHECK-LABEL: @test_x86_sse2_cmp_pd(
9 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
10 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
11 ; CHECK-NEXT: call void @llvm.donothing()
12 ; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
13 ; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <2 x i64> [[TMP3]], zeroinitializer
14 ; CHECK-NEXT: [[TMP5:%.*]] = sext <2 x i1> [[TMP4]] to <2 x i64>
15 ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i8 7)
16 ; CHECK-NEXT: store <2 x i64> [[TMP5]], ptr @__msan_retval_tls, align 8
17 ; CHECK-NEXT: ret <2 x double> [[RES]]
19 %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
22 declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone
25 define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) #0 {
26 ; CHECK-LABEL: @test_x86_sse2_cmp_sd(
27 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
28 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
29 ; CHECK-NEXT: call void @llvm.donothing()
30 ; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
31 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
32 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
33 ; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i128
34 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <2 x i64>
35 ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i8 7)
36 ; CHECK-NEXT: store <2 x i64> [[TMP7]], ptr @__msan_retval_tls, align 8
37 ; CHECK-NEXT: ret <2 x double> [[RES]]
39 %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
42 declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
45 define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) #0 {
46 ; CHECK-LABEL: @test_x86_sse2_comieq_sd(
47 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
48 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
49 ; CHECK-NEXT: call void @llvm.donothing()
50 ; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
51 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
52 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
53 ; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32
54 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]])
55 ; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
56 ; CHECK-NEXT: ret i32 [[RES]]
58 %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
61 declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
64 define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) #0 {
65 ; CHECK-LABEL: @test_x86_sse2_comige_sd(
66 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
67 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
68 ; CHECK-NEXT: call void @llvm.donothing()
69 ; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
70 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
71 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
72 ; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32
73 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.comige.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]])
74 ; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
75 ; CHECK-NEXT: ret i32 [[RES]]
77 %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
80 declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone
83 define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) #0 {
84 ; CHECK-LABEL: @test_x86_sse2_comigt_sd(
85 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
86 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
87 ; CHECK-NEXT: call void @llvm.donothing()
88 ; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
89 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
90 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
91 ; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32
92 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]])
93 ; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
94 ; CHECK-NEXT: ret i32 [[RES]]
96 %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
99 declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone
102 define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) #0 {
103 ; CHECK-LABEL: @test_x86_sse2_comile_sd(
104 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
105 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
106 ; CHECK-NEXT: call void @llvm.donothing()
107 ; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
108 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
109 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
110 ; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32
111 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.comile.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]])
112 ; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
113 ; CHECK-NEXT: ret i32 [[RES]]
115 %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
118 declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone
121 define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) #0 {
122 ; CHECK-LABEL: @test_x86_sse2_comilt_sd(
123 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
124 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
125 ; CHECK-NEXT: call void @llvm.donothing()
126 ; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
127 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
128 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
129 ; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32
130 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]])
131 ; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
132 ; CHECK-NEXT: ret i32 [[RES]]
134 %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
137 declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone
140 define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) #0 {
141 ; CHECK-LABEL: @test_x86_sse2_comineq_sd(
142 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
143 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
144 ; CHECK-NEXT: call void @llvm.donothing()
145 ; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
146 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
147 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
148 ; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32
149 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]])
150 ; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
151 ; CHECK-NEXT: ret i32 [[RES]]
153 %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
156 declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
159 define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) #0 {
160 ; CHECK-LABEL: @test_x86_sse2_cvtpd2dq(
161 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
162 ; CHECK-NEXT: call void @llvm.donothing()
163 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
164 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
165 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1:![0-9]+]]
167 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5:[0-9]+]]
168 ; CHECK-NEXT: unreachable
170 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> [[A0:%.*]])
171 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
172 ; CHECK-NEXT: ret <4 x i32> [[RES]]
174 %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
177 declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
180 define <2 x i64> @test_mm_cvtpd_epi32_zext(<2 x double> %a0) nounwind #0 {
181 ; CHECK-LABEL: @test_mm_cvtpd_epi32_zext(
182 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
183 ; CHECK-NEXT: call void @llvm.donothing()
184 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
185 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
186 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
188 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
189 ; CHECK-NEXT: unreachable
191 ; CHECK-NEXT: [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> [[A0:%.*]])
192 ; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
193 ; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64>
194 ; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
195 ; CHECK-NEXT: ret <2 x i64> [[BC]]
197 %cvt = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
198 %res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
199 %bc = bitcast <4 x i32> %res to <2 x i64>
204 define <2 x i64> @test_mm_cvtpd_epi32_zext_load(ptr %p0) nounwind #0 {
205 ; CHECK-LABEL: @test_mm_cvtpd_epi32_zext_load(
206 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
207 ; CHECK-NEXT: call void @llvm.donothing()
208 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
209 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
211 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
212 ; CHECK-NEXT: unreachable
214 ; CHECK-NEXT: [[A0:%.*]] = load <2 x double>, ptr [[P0:%.*]], align 16
215 ; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P0]] to i64
216 ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080
217 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
218 ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
219 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128
220 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
221 ; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
223 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
224 ; CHECK-NEXT: unreachable
226 ; CHECK-NEXT: [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> [[A0]])
227 ; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
228 ; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64>
229 ; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
230 ; CHECK-NEXT: ret <2 x i64> [[BC]]
232 %a0 = load <2 x double>, ptr %p0
233 %cvt = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
234 %res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
235 %bc = bitcast <4 x i32> %res to <2 x i64>
240 define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) #0 {
241 ; CHECK-LABEL: @test_x86_sse2_cvtpd2ps(
242 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
243 ; CHECK-NEXT: call void @llvm.donothing()
244 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
245 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
246 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
248 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
249 ; CHECK-NEXT: unreachable
251 ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> [[A0:%.*]])
252 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
253 ; CHECK-NEXT: ret <4 x float> [[RES]]
255 %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1]
258 declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
260 define <4 x float> @test_x86_sse2_cvtpd2ps_zext(<2 x double> %a0) nounwind #0 {
261 ; CHECK-LABEL: @test_x86_sse2_cvtpd2ps_zext(
262 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
263 ; CHECK-NEXT: call void @llvm.donothing()
264 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
265 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
266 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
268 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
269 ; CHECK-NEXT: unreachable
271 ; CHECK-NEXT: [[CVT:%.*]] = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> [[A0:%.*]])
272 ; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[CVT]], <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
273 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
274 ; CHECK-NEXT: ret <4 x float> [[RES]]
276 %cvt = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0)
277 %res = shufflevector <4 x float> %cvt, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
281 define <4 x float> @test_x86_sse2_cvtpd2ps_zext_load(ptr %p0) nounwind #0 {
282 ; CHECK-LABEL: @test_x86_sse2_cvtpd2ps_zext_load(
283 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
284 ; CHECK-NEXT: call void @llvm.donothing()
285 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
286 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
288 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
289 ; CHECK-NEXT: unreachable
291 ; CHECK-NEXT: [[A0:%.*]] = load <2 x double>, ptr [[P0:%.*]], align 16
292 ; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P0]] to i64
293 ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080
294 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
295 ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
296 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128
297 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
298 ; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
300 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
301 ; CHECK-NEXT: unreachable
303 ; CHECK-NEXT: [[CVT:%.*]] = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> [[A0]])
304 ; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[CVT]], <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
305 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
306 ; CHECK-NEXT: ret <4 x float> [[RES]]
308 %a0 = load <2 x double>, ptr %p0
309 %cvt = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0)
310 %res = shufflevector <4 x float> %cvt, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
314 define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) #0 {
315 ; CHECK-LABEL: @test_x86_sse2_cvtps2dq(
316 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
317 ; CHECK-NEXT: call void @llvm.donothing()
318 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
319 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
320 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
322 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
323 ; CHECK-NEXT: unreachable
325 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> [[A0:%.*]])
326 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
327 ; CHECK-NEXT: ret <4 x i32> [[RES]]
329 %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
332 declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
335 define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) #0 {
336 ; CHECK-LABEL: @test_x86_sse2_cvtsd2si(
337 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
338 ; CHECK-NEXT: call void @llvm.donothing()
339 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
340 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
341 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
343 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
344 ; CHECK-NEXT: unreachable
346 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> [[A0:%.*]])
347 ; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
348 ; CHECK-NEXT: ret i32 [[RES]]
350 %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1]
353 declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
356 define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) #0 {
357 ; CHECK-LABEL: @test_x86_sse2_cvtsd2ss(
358 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
359 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
360 ; CHECK-NEXT: call void @llvm.donothing()
361 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
362 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i32 0
363 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP3]], 0
364 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
366 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
367 ; CHECK-NEXT: unreachable
369 ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> [[A0:%.*]], <2 x double> [[A1:%.*]])
370 ; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8
371 ; CHECK-NEXT: ret <4 x float> [[RES]]
373 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
376 declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
379 define <4 x float> @test_x86_sse2_cvtsd2ss_load(<4 x float> %a0, ptr %p1) #0 {
380 ; CHECK-LABEL: @test_x86_sse2_cvtsd2ss_load(
381 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
382 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
383 ; CHECK-NEXT: call void @llvm.donothing()
384 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
385 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
387 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
388 ; CHECK-NEXT: unreachable
390 ; CHECK-NEXT: [[A1:%.*]] = load <2 x double>, ptr [[P1:%.*]], align 16
391 ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P1]] to i64
392 ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
393 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
394 ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
395 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[_MSLD]], i32 0
396 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i32 0
397 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP8]], 0
398 ; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
400 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
401 ; CHECK-NEXT: unreachable
403 ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> [[A0:%.*]], <2 x double> [[A1]])
404 ; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8
405 ; CHECK-NEXT: ret <4 x float> [[RES]]
407 %a1 = load <2 x double>, ptr %p1
408 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
413 define <4 x float> @test_x86_sse2_cvtsd2ss_load_optsize(<4 x float> %a0, ptr %p1) optsize #0 {
414 ; CHECK-LABEL: @test_x86_sse2_cvtsd2ss_load_optsize(
415 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
416 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
417 ; CHECK-NEXT: call void @llvm.donothing()
418 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
419 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
421 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
422 ; CHECK-NEXT: unreachable
424 ; CHECK-NEXT: [[A1:%.*]] = load <2 x double>, ptr [[P1:%.*]], align 16
425 ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P1]] to i64
426 ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
427 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
428 ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
429 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[_MSLD]], i32 0
430 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i32 0
431 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP8]], 0
432 ; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
434 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
435 ; CHECK-NEXT: unreachable
437 ; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> [[A0:%.*]], <2 x double> [[A1]])
438 ; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8
439 ; CHECK-NEXT: ret <4 x float> [[RES]]
441 %a1 = load <2 x double>, ptr %p1
442 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
447 define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) #0 {
448 ; CHECK-LABEL: @test_x86_sse2_cvttpd2dq(
449 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
450 ; CHECK-NEXT: call void @llvm.donothing()
451 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
452 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
453 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
455 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
456 ; CHECK-NEXT: unreachable
458 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> [[A0:%.*]])
459 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
460 ; CHECK-NEXT: ret <4 x i32> [[RES]]
462 %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
465 declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
468 define <2 x i64> @test_mm_cvttpd_epi32_zext(<2 x double> %a0) nounwind #0 {
469 ; CHECK-LABEL: @test_mm_cvttpd_epi32_zext(
470 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
471 ; CHECK-NEXT: call void @llvm.donothing()
472 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
473 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
474 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
476 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
477 ; CHECK-NEXT: unreachable
479 ; CHECK-NEXT: [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> [[A0:%.*]])
480 ; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
481 ; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64>
482 ; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
483 ; CHECK-NEXT: ret <2 x i64> [[BC]]
485 %cvt = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0)
486 %res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
487 %bc = bitcast <4 x i32> %res to <2 x i64>
492 define <2 x i64> @test_mm_cvttpd_epi32_zext_load(ptr %p0) nounwind #0 {
493 ; CHECK-LABEL: @test_mm_cvttpd_epi32_zext_load(
494 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
495 ; CHECK-NEXT: call void @llvm.donothing()
496 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
497 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
499 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
500 ; CHECK-NEXT: unreachable
502 ; CHECK-NEXT: [[A0:%.*]] = load <2 x double>, ptr [[P0:%.*]], align 16
503 ; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P0]] to i64
504 ; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080
505 ; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
506 ; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
507 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128
508 ; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
509 ; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
511 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
512 ; CHECK-NEXT: unreachable
514 ; CHECK-NEXT: [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> [[A0]])
515 ; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
516 ; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64>
517 ; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
518 ; CHECK-NEXT: ret <2 x i64> [[BC]]
520 %a0 = load <2 x double>, ptr %p0
521 %cvt = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0)
522 %res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
523 %bc = bitcast <4 x i32> %res to <2 x i64>
528 define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) #0 {
529 ; CHECK-LABEL: @test_x86_sse2_cvttps2dq(
530 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
531 ; CHECK-NEXT: call void @llvm.donothing()
532 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
533 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
534 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
536 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
537 ; CHECK-NEXT: unreachable
539 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> [[A0:%.*]])
540 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
541 ; CHECK-NEXT: ret <4 x i32> [[RES]]
543 %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
546 declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
549 define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) #0 {
550 ; CHECK-LABEL: @test_x86_sse2_cvttsd2si(
551 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
552 ; CHECK-NEXT: call void @llvm.donothing()
553 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
554 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
555 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
557 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
558 ; CHECK-NEXT: unreachable
560 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> [[A0:%.*]])
561 ; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
562 ; CHECK-NEXT: ret i32 [[RES]]
564 %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1]
567 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
570 define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) #0 {
571 ; CHECK-LABEL: @test_x86_sse2_max_pd(
572 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
573 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
574 ; CHECK-NEXT: call void @llvm.donothing()
575 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
576 ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]])
577 ; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
578 ; CHECK-NEXT: ret <2 x double> [[RES]]
580 %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
581 ret <2 x double> %res
583 declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
586 define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) #0 {
587 ; CHECK-LABEL: @test_x86_sse2_max_sd(
588 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
589 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
590 ; CHECK-NEXT: call void @llvm.donothing()
591 ; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
592 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP3]], <2 x i32> <i32 2, i32 1>
593 ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]])
594 ; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr @__msan_retval_tls, align 8
595 ; CHECK-NEXT: ret <2 x double> [[RES]]
597 %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
598 ret <2 x double> %res
600 declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
603 define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) #0 {
604 ; CHECK-LABEL: @test_x86_sse2_min_pd(
605 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
606 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
607 ; CHECK-NEXT: call void @llvm.donothing()
608 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
609 ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]])
610 ; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
611 ; CHECK-NEXT: ret <2 x double> [[RES]]
613 %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
614 ret <2 x double> %res
616 declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
619 define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) #0 {
620 ; CHECK-LABEL: @test_x86_sse2_min_sd(
621 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
622 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
623 ; CHECK-NEXT: call void @llvm.donothing()
624 ; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
625 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP3]], <2 x i32> <i32 2, i32 1>
626 ; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]])
627 ; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr @__msan_retval_tls, align 8
628 ; CHECK-NEXT: ret <2 x double> [[RES]]
630 %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
631 ret <2 x double> %res
633 declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
636 define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) #0 {
637 ; CHECK-LABEL: @test_x86_sse2_movmsk_pd(
638 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
639 ; CHECK-NEXT: call void @llvm.donothing()
640 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
641 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
642 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
644 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
645 ; CHECK-NEXT: unreachable
647 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> [[A0:%.*]])
648 ; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
649 ; CHECK-NEXT: ret i32 [[RES]]
651 %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1]
654 declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
657 define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) #0 {
658 ; CHECK-LABEL: @test_x86_sse2_packssdw_128(
659 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
660 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
661 ; CHECK-NEXT: call void @llvm.donothing()
662 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP1]], zeroinitializer
663 ; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
664 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
665 ; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32>
666 ; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[TMP4]], <4 x i32> [[TMP6]])
667 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]])
668 ; CHECK-NEXT: store <8 x i16> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8
669 ; CHECK-NEXT: ret <8 x i16> [[RES]]
671 %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
674 declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
677 define <8 x i16> @test_x86_sse2_packssdw_128_fold() #0 {
678 ; CHECK-LABEL: @test_x86_sse2_packssdw_128_fold(
679 ; CHECK-NEXT: call void @llvm.donothing()
680 ; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> zeroinitializer, <4 x i32> zeroinitializer)
681 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> zeroinitializer, <4 x i32> <i32 65535, i32 65536, i32 -1, i32 -131072>)
682 ; CHECK-NEXT: store <8 x i16> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8
683 ; CHECK-NEXT: ret <8 x i16> [[RES]]
685 %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> zeroinitializer, <4 x i32> <i32 65535, i32 65536, i32 -1, i32 -131072>)
690 define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) #0 {
691 ; CHECK-LABEL: @test_x86_sse2_packsswb_128(
692 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
693 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
694 ; CHECK-NEXT: call void @llvm.donothing()
695 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP1]], zeroinitializer
696 ; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
697 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
698 ; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16>
699 ; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> [[TMP4]], <8 x i16> [[TMP6]])
700 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]])
701 ; CHECK-NEXT: store <16 x i8> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8
702 ; CHECK-NEXT: ret <16 x i8> [[RES]]
704 %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
707 declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
710 define <16 x i8> @test_x86_sse2_packsswb_128_fold() #0 {
711 ; CHECK-LABEL: @test_x86_sse2_packsswb_128_fold(
712 ; CHECK-NEXT: call void @llvm.donothing()
713 ; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> zeroinitializer, <8 x i16> zeroinitializer)
714 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> <i16 0, i16 255, i16 256, i16 -1, i16 -1, i16 -255, i16 -256, i16 -32678>, <8 x i16> zeroinitializer)
715 ; CHECK-NEXT: store <16 x i8> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8
716 ; CHECK-NEXT: ret <16 x i8> [[RES]]
718 %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> <i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678>, <8 x i16> zeroinitializer)
723 define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) #0 {
724 ; CHECK-LABEL: @test_x86_sse2_packuswb_128(
725 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
726 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
727 ; CHECK-NEXT: call void @llvm.donothing()
728 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP1]], zeroinitializer
729 ; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
730 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
731 ; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16>
732 ; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> [[TMP4]], <8 x i16> [[TMP6]])
733 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]])
734 ; CHECK-NEXT: store <16 x i8> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8
735 ; CHECK-NEXT: ret <16 x i8> [[RES]]
737 %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
740 declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
743 define <16 x i8> @test_x86_sse2_packuswb_128_fold() #0 {
744 ; CHECK-LABEL: @test_x86_sse2_packuswb_128_fold(
745 ; CHECK-NEXT: call void @llvm.donothing()
746 ; CHECK-NEXT: [[_MSPROP_VECTOR_PACK:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> zeroinitializer, <8 x i16> zeroinitializer)
747 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> <i16 0, i16 255, i16 256, i16 -1, i16 -1, i16 -255, i16 -256, i16 -32678>, <8 x i16> zeroinitializer)
748 ; CHECK-NEXT: store <16 x i8> [[_MSPROP_VECTOR_PACK]], ptr @__msan_retval_tls, align 8
749 ; CHECK-NEXT: ret <16 x i8> [[RES]]
751 %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> <i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678>, <8 x i16> zeroinitializer)
756 define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) #0 {
757 ; CHECK-LABEL: @test_x86_sse2_pavg_b(
758 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
759 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
760 ; CHECK-NEXT: call void @llvm.donothing()
761 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP1]], [[TMP2]]
762 ; CHECK-NEXT: [[RES:%.*]] = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> [[A0:%.*]], <16 x i8> [[A1:%.*]])
763 ; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
764 ; CHECK-NEXT: ret <16 x i8> [[RES]]
766 %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
769 declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone
772 define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) #0 {
773 ; CHECK-LABEL: @test_x86_sse2_pavg_w(
774 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
775 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
776 ; CHECK-NEXT: call void @llvm.donothing()
777 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]]
778 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]])
779 ; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
780 ; CHECK-NEXT: ret <8 x i16> [[RES]]
782 %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
785 declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
788 define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) #0 {
789 ; CHECK-LABEL: @test_x86_sse2_pmadd_wd(
790 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
791 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
792 ; CHECK-NEXT: call void @llvm.donothing()
793 ; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]]
794 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <4 x i32>
795 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer
796 ; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32>
797 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]])
798 ; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8
799 ; CHECK-NEXT: ret <4 x i32> [[RES]]
801 %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1]
804 declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
807 define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) #0 {
808 ; CHECK-LABEL: @test_x86_sse2_pmovmskb_128(
809 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
810 ; CHECK-NEXT: call void @llvm.donothing()
811 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
812 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
813 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
815 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
816 ; CHECK-NEXT: unreachable
818 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> [[A0:%.*]])
819 ; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
820 ; CHECK-NEXT: ret i32 [[RES]]
822 %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1]
825 declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
828 define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) #0 {
829 ; CHECK-LABEL: @test_x86_sse2_pmulh_w(
830 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
831 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
832 ; CHECK-NEXT: call void @llvm.donothing()
833 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]]
834 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]])
835 ; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
836 ; CHECK-NEXT: ret <8 x i16> [[RES]]
838 %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
841 declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
844 define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) #0 {
845 ; CHECK-LABEL: @test_x86_sse2_pmulhu_w(
846 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
847 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
848 ; CHECK-NEXT: call void @llvm.donothing()
849 ; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]]
850 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]])
851 ; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
852 ; CHECK-NEXT: ret <8 x i16> [[RES]]
854 %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
857 declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
860 define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) #0 {
861 ; CHECK-LABEL: @test_x86_sse2_psad_bw(
862 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
863 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
864 ; CHECK-NEXT: call void @llvm.donothing()
865 ; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i8> [[TMP1]], [[TMP2]]
866 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x i64>
867 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer
868 ; CHECK-NEXT: [[TMP6:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64>
869 ; CHECK-NEXT: [[TMP7:%.*]] = lshr <2 x i64> [[TMP6]], splat (i64 48)
870 ; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> [[A0:%.*]], <16 x i8> [[A1:%.*]])
871 ; CHECK-NEXT: store <2 x i64> [[TMP7]], ptr @__msan_retval_tls, align 8
872 ; CHECK-NEXT: ret <2 x i64> [[RES]]
874 %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1]
877 declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
880 define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) #0 {
881 ; CHECK-LABEL: @test_x86_sse2_psll_d(
882 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
883 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
884 ; CHECK-NEXT: call void @llvm.donothing()
885 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
886 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
887 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
888 ; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i128
889 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <4 x i32>
890 ; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
891 ; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i32> [[TMP8]], [[TMP7]]
892 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> [[A0:%.*]], <4 x i32> [[A1]])
893 ; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8
894 ; CHECK-NEXT: ret <4 x i32> [[RES]]
896 %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
899 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
902 define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) #0 {
903 ; CHECK-LABEL: @test_x86_sse2_psll_q(
904 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
905 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
906 ; CHECK-NEXT: call void @llvm.donothing()
907 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
908 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
909 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
910 ; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i128
911 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <2 x i64>
912 ; CHECK-NEXT: [[TMP8:%.*]] = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> [[TMP1]], <2 x i64> [[A1:%.*]])
913 ; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i64> [[TMP8]], [[TMP7]]
914 ; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> [[A0:%.*]], <2 x i64> [[A1]])
915 ; CHECK-NEXT: store <2 x i64> [[TMP9]], ptr @__msan_retval_tls, align 8
916 ; CHECK-NEXT: ret <2 x i64> [[RES]]
918 %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
921 declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
924 define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) #0 {
925 ; CHECK-LABEL: @test_x86_sse2_psll_w(
926 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
927 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
928 ; CHECK-NEXT: call void @llvm.donothing()
929 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
930 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
931 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
932 ; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i128
933 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <8 x i16>
934 ; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> [[TMP1]], <8 x i16> [[A1:%.*]])
935 ; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i16> [[TMP8]], [[TMP7]]
936 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> [[A0:%.*]], <8 x i16> [[A1]])
937 ; CHECK-NEXT: store <8 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8
938 ; CHECK-NEXT: ret <8 x i16> [[RES]]
940 %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
943 declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
946 define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) #0 {
947 ; CHECK-LABEL: @test_x86_sse2_pslli_d(
948 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
949 ; CHECK-NEXT: call void @llvm.donothing()
950 ; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> [[TMP1]], i32 7)
951 ; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP2]], zeroinitializer
952 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> [[A0:%.*]], i32 7)
953 ; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8
954 ; CHECK-NEXT: ret <4 x i32> [[RES]]
956 %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
959 declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
962 define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) #0 {
963 ; CHECK-LABEL: @test_x86_sse2_pslli_q(
964 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
965 ; CHECK-NEXT: call void @llvm.donothing()
966 ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> [[TMP1]], i32 7)
967 ; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP2]], zeroinitializer
968 ; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> [[A0:%.*]], i32 7)
969 ; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr @__msan_retval_tls, align 8
970 ; CHECK-NEXT: ret <2 x i64> [[RES]]
972 %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
975 declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
978 define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) #0 {
979 ; CHECK-LABEL: @test_x86_sse2_pslli_w(
980 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
981 ; CHECK-NEXT: call void @llvm.donothing()
982 ; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> [[TMP1]], i32 7)
983 ; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i16> [[TMP2]], zeroinitializer
984 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> [[A0:%.*]], i32 7)
985 ; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr @__msan_retval_tls, align 8
986 ; CHECK-NEXT: ret <8 x i16> [[RES]]
988 %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
991 declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
994 define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) #0 {
995 ; CHECK-LABEL: @test_x86_sse2_psra_d(
996 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
997 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
998 ; CHECK-NEXT: call void @llvm.donothing()
999 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1000 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
1001 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
1002 ; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i128
1003 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <4 x i32>
1004 ; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
1005 ; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i32> [[TMP8]], [[TMP7]]
1006 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> [[A0:%.*]], <4 x i32> [[A1]])
1007 ; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8
1008 ; CHECK-NEXT: ret <4 x i32> [[RES]]
1010 %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1013 declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
1016 define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) #0 {
1017 ; CHECK-LABEL: @test_x86_sse2_psra_w(
1018 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
1019 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1020 ; CHECK-NEXT: call void @llvm.donothing()
1021 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1022 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
1023 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
1024 ; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i128
1025 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <8 x i16>
1026 ; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> [[TMP1]], <8 x i16> [[A1:%.*]])
1027 ; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i16> [[TMP8]], [[TMP7]]
1028 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> [[A0:%.*]], <8 x i16> [[A1]])
1029 ; CHECK-NEXT: store <8 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8
1030 ; CHECK-NEXT: ret <8 x i16> [[RES]]
1032 %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1035 declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
1038 define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) #0 {
1039 ; CHECK-LABEL: @test_x86_sse2_psrai_d(
1040 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
1041 ; CHECK-NEXT: call void @llvm.donothing()
1042 ; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> [[TMP1]], i32 7)
1043 ; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP2]], zeroinitializer
1044 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> [[A0:%.*]], i32 7)
1045 ; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8
1046 ; CHECK-NEXT: ret <4 x i32> [[RES]]
1048 %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
1051 declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
1054 define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) #0 {
1055 ; CHECK-LABEL: @test_x86_sse2_psrai_w(
1056 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
1057 ; CHECK-NEXT: call void @llvm.donothing()
1058 ; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> [[TMP1]], i32 7)
1059 ; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i16> [[TMP2]], zeroinitializer
1060 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> [[A0:%.*]], i32 7)
1061 ; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr @__msan_retval_tls, align 8
1062 ; CHECK-NEXT: ret <8 x i16> [[RES]]
1064 %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
1067 declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
1070 define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) #0 {
1071 ; CHECK-LABEL: @test_x86_sse2_psrl_d(
1072 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
1073 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1074 ; CHECK-NEXT: call void @llvm.donothing()
1075 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
1076 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
1077 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
1078 ; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i128
1079 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <4 x i32>
1080 ; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
1081 ; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i32> [[TMP8]], [[TMP7]]
1082 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> [[A0:%.*]], <4 x i32> [[A1]])
1083 ; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8
1084 ; CHECK-NEXT: ret <4 x i32> [[RES]]
1086 %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
1089 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
1092 define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) #0 {
1093 ; CHECK-LABEL: @test_x86_sse2_psrl_q(
1094 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
1095 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1096 ; CHECK-NEXT: call void @llvm.donothing()
1097 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
1098 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
1099 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
1100 ; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i128
1101 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <2 x i64>
1102 ; CHECK-NEXT: [[TMP8:%.*]] = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> [[TMP1]], <2 x i64> [[A1:%.*]])
1103 ; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i64> [[TMP8]], [[TMP7]]
1104 ; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> [[A0:%.*]], <2 x i64> [[A1]])
1105 ; CHECK-NEXT: store <2 x i64> [[TMP9]], ptr @__msan_retval_tls, align 8
1106 ; CHECK-NEXT: ret <2 x i64> [[RES]]
1108 %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
1111 declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
1114 define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) #0 {
1115 ; CHECK-LABEL: @test_x86_sse2_psrl_w(
1116 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
1117 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1118 ; CHECK-NEXT: call void @llvm.donothing()
1119 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
1120 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
1121 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
1122 ; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i128
1123 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <8 x i16>
1124 ; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> [[TMP1]], <8 x i16> [[A1:%.*]])
1125 ; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i16> [[TMP8]], [[TMP7]]
1126 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> [[A0:%.*]], <8 x i16> [[A1]])
1127 ; CHECK-NEXT: store <8 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8
1128 ; CHECK-NEXT: ret <8 x i16> [[RES]]
1130 %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1133 declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
1136 define <8 x i16> @test_x86_sse2_psrl_w_load(<8 x i16> %a0, ptr %p) #0 {
1137 ; CHECK-LABEL: @test_x86_sse2_psrl_w_load(
1138 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1139 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
1140 ; CHECK-NEXT: call void @llvm.donothing()
1141 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1142 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
1144 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
1145 ; CHECK-NEXT: unreachable
1147 ; CHECK-NEXT: [[A1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 16
1148 ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64
1149 ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
1150 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
1151 ; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
1152 ; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128
1153 ; CHECK-NEXT: [[TMP9:%.*]] = trunc i128 [[TMP8]] to i64
1154 ; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i64 [[TMP9]], 0
1155 ; CHECK-NEXT: [[TMP11:%.*]] = sext i1 [[TMP10]] to i128
1156 ; CHECK-NEXT: [[TMP12:%.*]] = bitcast i128 [[TMP11]] to <8 x i16>
1157 ; CHECK-NEXT: [[TMP13:%.*]] = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> [[TMP2]], <8 x i16> [[A1]])
1158 ; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i16> [[TMP13]], [[TMP12]]
1159 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> [[A0:%.*]], <8 x i16> [[A1]])
1160 ; CHECK-NEXT: store <8 x i16> [[TMP14]], ptr @__msan_retval_tls, align 8
1161 ; CHECK-NEXT: ret <8 x i16> [[RES]]
1163 %a1 = load <8 x i16>, ptr %p
1164 %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
1169 define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) #0 {
1170 ; CHECK-LABEL: @test_x86_sse2_psrli_d(
1171 ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
1172 ; CHECK-NEXT: call void @llvm.donothing()
1173 ; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> [[TMP1]], i32 7)
1174 ; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP2]], zeroinitializer
1175 ; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> [[A0:%.*]], i32 7)
1176 ; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8
1177 ; CHECK-NEXT: ret <4 x i32> [[RES]]
1179 %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
1182 declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
1185 define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) #0 {
1186 ; CHECK-LABEL: @test_x86_sse2_psrli_q(
1187 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
1188 ; CHECK-NEXT: call void @llvm.donothing()
1189 ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> [[TMP1]], i32 7)
1190 ; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP2]], zeroinitializer
1191 ; CHECK-NEXT: [[RES:%.*]] = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> [[A0:%.*]], i32 7)
1192 ; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr @__msan_retval_tls, align 8
1193 ; CHECK-NEXT: ret <2 x i64> [[RES]]
1195 %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
1198 declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
1201 define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) #0 {
1202 ; CHECK-LABEL: @test_x86_sse2_psrli_w(
1203 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
1204 ; CHECK-NEXT: call void @llvm.donothing()
1205 ; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> [[TMP1]], i32 7)
1206 ; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i16> [[TMP2]], zeroinitializer
1207 ; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> [[A0:%.*]], i32 7)
1208 ; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr @__msan_retval_tls, align 8
1209 ; CHECK-NEXT: ret <8 x i16> [[RES]]
1211 %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
1214 declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
1217 define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) #0 {
1218 ; CHECK-LABEL: @test_x86_sse2_ucomieq_sd(
1219 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
1220 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1221 ; CHECK-NEXT: call void @llvm.donothing()
1222 ; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
1223 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
1224 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
1225 ; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32
1226 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]])
1227 ; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
1228 ; CHECK-NEXT: ret i32 [[RES]]
1230 %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1233 declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
1236 define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) #0 {
1237 ; CHECK-LABEL: @test_x86_sse2_ucomige_sd(
1238 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
1239 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1240 ; CHECK-NEXT: call void @llvm.donothing()
1241 ; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
1242 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
1243 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
1244 ; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32
1245 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]])
1246 ; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
1247 ; CHECK-NEXT: ret i32 [[RES]]
1249 %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1252 declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone
1255 define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) #0 {
1256 ; CHECK-LABEL: @test_x86_sse2_ucomigt_sd(
1257 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
1258 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1259 ; CHECK-NEXT: call void @llvm.donothing()
1260 ; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
1261 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
1262 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
1263 ; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32
1264 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]])
1265 ; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
1266 ; CHECK-NEXT: ret i32 [[RES]]
1268 %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1271 declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone
1274 define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) #0 {
1275 ; CHECK-LABEL: @test_x86_sse2_ucomile_sd(
1276 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
1277 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1278 ; CHECK-NEXT: call void @llvm.donothing()
1279 ; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
1280 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
1281 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
1282 ; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32
1283 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]])
1284 ; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
1285 ; CHECK-NEXT: ret i32 [[RES]]
1287 %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1290 declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone
1293 define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) #0 {
1294 ; CHECK-LABEL: @test_x86_sse2_ucomilt_sd(
1295 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
1296 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1297 ; CHECK-NEXT: call void @llvm.donothing()
1298 ; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
1299 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
1300 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
1301 ; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32
1302 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]])
1303 ; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
1304 ; CHECK-NEXT: ret i32 [[RES]]
1306 %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1309 declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone
1312 define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) #0 {
1313 ; CHECK-LABEL: @test_x86_sse2_ucomineq_sd(
1314 ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
1315 ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
1316 ; CHECK-NEXT: call void @llvm.donothing()
1317 ; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
1318 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
1319 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
1320 ; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i32
1321 ; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]])
1322 ; CHECK-NEXT: store i32 [[TMP6]], ptr @__msan_retval_tls, align 8
1323 ; CHECK-NEXT: ret i32 [[RES]]
1325 %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
1328 declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone
1330 define void @test_x86_sse2_pause() #0 {
1331 ; CHECK-LABEL: @test_x86_sse2_pause(
1332 ; CHECK-NEXT: call void @llvm.donothing()
1333 ; CHECK-NEXT: tail call void @llvm.x86.sse2.pause()
1334 ; CHECK-NEXT: ret void
1336 tail call void @llvm.x86.sse2.pause()
1339 declare void @llvm.x86.sse2.pause() nounwind
1341 define void @lfence() nounwind #0 {
1342 ; CHECK-LABEL: @lfence(
1343 ; CHECK-NEXT: call void @llvm.donothing()
1344 ; CHECK-NEXT: tail call void @llvm.x86.sse2.lfence()
1345 ; CHECK-NEXT: ret void
1347 tail call void @llvm.x86.sse2.lfence()
1350 declare void @llvm.x86.sse2.lfence() nounwind
1352 define void @mfence() nounwind #0 {
1353 ; CHECK-LABEL: @mfence(
1354 ; CHECK-NEXT: call void @llvm.donothing()
1355 ; CHECK-NEXT: tail call void @llvm.x86.sse2.mfence()
1356 ; CHECK-NEXT: ret void
1358 tail call void @llvm.x86.sse2.mfence()
1361 declare void @llvm.x86.sse2.mfence() nounwind
1363 define void @clflush(ptr %p) nounwind #0 {
1364 ; CHECK-LABEL: @clflush(
1365 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
1366 ; CHECK-NEXT: call void @llvm.donothing()
1367 ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
1368 ; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
1370 ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
1371 ; CHECK-NEXT: unreachable
1373 ; CHECK-NEXT: tail call void @llvm.x86.sse2.clflush(ptr [[P:%.*]])
1374 ; CHECK-NEXT: ret void
1376 tail call void @llvm.x86.sse2.clflush(ptr %p)
1379 declare void @llvm.x86.sse2.clflush(ptr) nounwind
1381 attributes #0 = { sanitize_memory }