1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt %s -S -passes=msan 2>&1 | FileCheck %s
4 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
5 target triple = "x86_64-unknown-linux-gnu"
7 define void @var_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %b64, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512, i64 %c64, <2 x i64> %c128, <4 x i64> %c256, <8 x i64> %c512) sanitize_memory {
8 ; CHECK-LABEL: @var_funnel_i64(
9 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
10 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 120) to ptr), align 8
11 ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 240) to ptr), align 8
12 ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
13 ; CHECK-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
14 ; CHECK-NEXT: [[TMP6:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 248) to ptr), align 8
15 ; CHECK-NEXT: [[TMP7:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
16 ; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8
17 ; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 264) to ptr), align 8
18 ; CHECK-NEXT: [[TMP10:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8
19 ; CHECK-NEXT: [[TMP11:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 176) to ptr), align 8
20 ; CHECK-NEXT: [[TMP12:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 296) to ptr), align 8
21 ; CHECK-NEXT: call void @llvm.donothing()
22 ; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i64 [[TMP3]], 0
23 ; CHECK-NEXT: [[TMP14:%.*]] = sext i1 [[TMP13]] to i64
24 ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.fshl.i64(i64 [[TMP1]], i64 [[TMP2]], i64 [[C64:%.*]])
25 ; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP15]], [[TMP14]]
26 ; CHECK-NEXT: [[I64:%.*]] = call i64 @llvm.fshl.i64(i64 [[A64:%.*]], i64 [[B64:%.*]], i64 [[C64]])
27 ; CHECK-NEXT: [[TMP17:%.*]] = icmp ne <2 x i64> [[TMP6]], zeroinitializer
28 ; CHECK-NEXT: [[TMP18:%.*]] = sext <2 x i1> [[TMP17]] to <2 x i64>
29 ; CHECK-NEXT: [[TMP19:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> [[TMP4]], <2 x i64> [[TMP5]], <2 x i64> [[C128:%.*]])
30 ; CHECK-NEXT: [[TMP20:%.*]] = or <2 x i64> [[TMP19]], [[TMP18]]
31 ; CHECK-NEXT: [[V2I64:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> [[A128:%.*]], <2 x i64> [[B128:%.*]], <2 x i64> [[C128]])
32 ; CHECK-NEXT: [[TMP21:%.*]] = icmp ne <4 x i64> [[TMP9]], zeroinitializer
33 ; CHECK-NEXT: [[TMP22:%.*]] = sext <4 x i1> [[TMP21]] to <4 x i64>
34 ; CHECK-NEXT: [[TMP23:%.*]] = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> [[TMP7]], <4 x i64> [[TMP8]], <4 x i64> [[C256:%.*]])
35 ; CHECK-NEXT: [[TMP24:%.*]] = or <4 x i64> [[TMP23]], [[TMP22]]
36 ; CHECK-NEXT: [[V4I64:%.*]] = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> [[A256:%.*]], <4 x i64> [[B256:%.*]], <4 x i64> [[C256]])
37 ; CHECK-NEXT: [[TMP25:%.*]] = icmp ne <8 x i64> [[TMP12]], zeroinitializer
38 ; CHECK-NEXT: [[TMP26:%.*]] = sext <8 x i1> [[TMP25]] to <8 x i64>
39 ; CHECK-NEXT: [[TMP27:%.*]] = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> [[TMP10]], <8 x i64> [[TMP11]], <8 x i64> [[C512:%.*]])
40 ; CHECK-NEXT: [[TMP28:%.*]] = or <8 x i64> [[TMP27]], [[TMP26]]
41 ; CHECK-NEXT: [[V8I64:%.*]] = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> [[A512:%.*]], <8 x i64> [[B512:%.*]], <8 x i64> [[C512]])
42 ; CHECK-NEXT: ret void
44 %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 %c64)
45 %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> %c128)
46 %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> %c256)
47 %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> %c512)
51 define void @var_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, i32 %b32, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512, i32 %c32, <4 x i32> %c128, <8 x i32> %c256, <16 x i32> %c512) sanitize_memory {
52 ; CHECK-LABEL: @var_funnel_i32(
53 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
54 ; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 120) to ptr), align 8
55 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 240) to ptr), align 8
56 ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
57 ; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
58 ; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 248) to ptr), align 8
59 ; CHECK-NEXT: [[TMP7:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
60 ; CHECK-NEXT: [[TMP8:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8
61 ; CHECK-NEXT: [[TMP9:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 264) to ptr), align 8
62 ; CHECK-NEXT: [[TMP10:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8
63 ; CHECK-NEXT: [[TMP11:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 176) to ptr), align 8
64 ; CHECK-NEXT: [[TMP12:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 296) to ptr), align 8
65 ; CHECK-NEXT: call void @llvm.donothing()
66 ; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP3]], 0
67 ; CHECK-NEXT: [[TMP14:%.*]] = sext i1 [[TMP13]] to i32
68 ; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.fshl.i32(i32 [[TMP1]], i32 [[TMP2]], i32 [[C32:%.*]])
69 ; CHECK-NEXT: [[TMP16:%.*]] = or i32 [[TMP15]], [[TMP14]]
70 ; CHECK-NEXT: [[I32:%.*]] = call i32 @llvm.fshl.i32(i32 [[A32:%.*]], i32 [[B32:%.*]], i32 [[C32]])
71 ; CHECK-NEXT: [[TMP17:%.*]] = icmp ne <4 x i32> [[TMP6]], zeroinitializer
72 ; CHECK-NEXT: [[TMP18:%.*]] = sext <4 x i1> [[TMP17]] to <4 x i32>
73 ; CHECK-NEXT: [[TMP19:%.*]] = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> [[C128:%.*]])
74 ; CHECK-NEXT: [[TMP20:%.*]] = or <4 x i32> [[TMP19]], [[TMP18]]
75 ; CHECK-NEXT: [[V2I32:%.*]] = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> [[A128:%.*]], <4 x i32> [[B128:%.*]], <4 x i32> [[C128]])
76 ; CHECK-NEXT: [[TMP21:%.*]] = icmp ne <8 x i32> [[TMP9]], zeroinitializer
77 ; CHECK-NEXT: [[TMP22:%.*]] = sext <8 x i1> [[TMP21]] to <8 x i32>
78 ; CHECK-NEXT: [[TMP23:%.*]] = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> [[TMP7]], <8 x i32> [[TMP8]], <8 x i32> [[C256:%.*]])
79 ; CHECK-NEXT: [[TMP24:%.*]] = or <8 x i32> [[TMP23]], [[TMP22]]
80 ; CHECK-NEXT: [[V4I32:%.*]] = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> [[A256:%.*]], <8 x i32> [[B256:%.*]], <8 x i32> [[C256]])
81 ; CHECK-NEXT: [[TMP25:%.*]] = icmp ne <16 x i32> [[TMP12]], zeroinitializer
82 ; CHECK-NEXT: [[TMP26:%.*]] = sext <16 x i1> [[TMP25]] to <16 x i32>
83 ; CHECK-NEXT: [[TMP27:%.*]] = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> [[TMP10]], <16 x i32> [[TMP11]], <16 x i32> [[C512:%.*]])
84 ; CHECK-NEXT: [[TMP28:%.*]] = or <16 x i32> [[TMP27]], [[TMP26]]
85 ; CHECK-NEXT: [[V8I32:%.*]] = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> [[A512:%.*]], <16 x i32> [[B512:%.*]], <16 x i32> [[C512]])
86 ; CHECK-NEXT: ret void
88 %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 %c32)
89 %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> %c128)
90 %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> %c256)
91 %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> %c512)
95 define void @var_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %b16, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512, i16 %c16, <8 x i16> %c128, <16 x i16> %c256, <32 x i16> %c512) sanitize_memory {
96 ; CHECK-LABEL: @var_funnel_i16(
97 ; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr @__msan_param_tls, align 8
98 ; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 120) to ptr), align 8
99 ; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 240) to ptr), align 8
100 ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
101 ; CHECK-NEXT: [[TMP5:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
102 ; CHECK-NEXT: [[TMP6:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 248) to ptr), align 8
103 ; CHECK-NEXT: [[TMP7:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
104 ; CHECK-NEXT: [[TMP8:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8
105 ; CHECK-NEXT: [[TMP9:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 264) to ptr), align 8
106 ; CHECK-NEXT: [[TMP10:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8
107 ; CHECK-NEXT: [[TMP11:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 176) to ptr), align 8
108 ; CHECK-NEXT: [[TMP12:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 296) to ptr), align 8
109 ; CHECK-NEXT: call void @llvm.donothing()
110 ; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i16 [[TMP3]], 0
111 ; CHECK-NEXT: [[TMP14:%.*]] = sext i1 [[TMP13]] to i16
112 ; CHECK-NEXT: [[TMP15:%.*]] = call i16 @llvm.fshl.i16(i16 [[TMP1]], i16 [[TMP2]], i16 [[C16:%.*]])
113 ; CHECK-NEXT: [[TMP16:%.*]] = or i16 [[TMP15]], [[TMP14]]
114 ; CHECK-NEXT: [[I16:%.*]] = call i16 @llvm.fshl.i16(i16 [[A16:%.*]], i16 [[B16:%.*]], i16 [[C16]])
115 ; CHECK-NEXT: [[TMP17:%.*]] = icmp ne <8 x i16> [[TMP6]], zeroinitializer
116 ; CHECK-NEXT: [[TMP18:%.*]] = sext <8 x i1> [[TMP17]] to <8 x i16>
117 ; CHECK-NEXT: [[TMP19:%.*]] = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i16> [[C128:%.*]])
118 ; CHECK-NEXT: [[TMP20:%.*]] = or <8 x i16> [[TMP19]], [[TMP18]]
119 ; CHECK-NEXT: [[V8I16:%.*]] = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> [[A128:%.*]], <8 x i16> [[B128:%.*]], <8 x i16> [[C128]])
120 ; CHECK-NEXT: [[TMP21:%.*]] = icmp ne <16 x i16> [[TMP9]], zeroinitializer
121 ; CHECK-NEXT: [[TMP22:%.*]] = sext <16 x i1> [[TMP21]] to <16 x i16>
122 ; CHECK-NEXT: [[TMP23:%.*]] = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> [[TMP7]], <16 x i16> [[TMP8]], <16 x i16> [[C256:%.*]])
123 ; CHECK-NEXT: [[TMP24:%.*]] = or <16 x i16> [[TMP23]], [[TMP22]]
124 ; CHECK-NEXT: [[V16I16:%.*]] = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> [[A256:%.*]], <16 x i16> [[B256:%.*]], <16 x i16> [[C256]])
125 ; CHECK-NEXT: [[TMP25:%.*]] = icmp ne <32 x i16> [[TMP12]], zeroinitializer
126 ; CHECK-NEXT: [[TMP26:%.*]] = sext <32 x i1> [[TMP25]] to <32 x i16>
127 ; CHECK-NEXT: [[TMP27:%.*]] = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> [[TMP10]], <32 x i16> [[TMP11]], <32 x i16> [[C512:%.*]])
128 ; CHECK-NEXT: [[TMP28:%.*]] = or <32 x i16> [[TMP27]], [[TMP26]]
129 ; CHECK-NEXT: [[V32I16:%.*]] = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> [[A512:%.*]], <32 x i16> [[B512:%.*]], <32 x i16> [[C512]])
130 ; CHECK-NEXT: ret void
132 %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 %c16)
133 %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> %c128)
134 %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> %c256)
135 %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> %c512)
139 define void @var_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512, i8 %c8, <16 x i8> %c128, <32 x i8> %c256, <64 x i8> %c512) sanitize_memory {
140 ; CHECK-LABEL: @var_funnel_i8(
141 ; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr @__msan_param_tls, align 8
142 ; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 120) to ptr), align 8
143 ; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 240) to ptr), align 8
144 ; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
145 ; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
146 ; CHECK-NEXT: [[TMP6:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 248) to ptr), align 8
147 ; CHECK-NEXT: [[TMP7:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
148 ; CHECK-NEXT: [[TMP8:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8
149 ; CHECK-NEXT: [[TMP9:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 264) to ptr), align 8
150 ; CHECK-NEXT: [[TMP10:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8
151 ; CHECK-NEXT: [[TMP11:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 176) to ptr), align 8
152 ; CHECK-NEXT: [[TMP12:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 296) to ptr), align 8
153 ; CHECK-NEXT: call void @llvm.donothing()
154 ; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i8 [[TMP3]], 0
155 ; CHECK-NEXT: [[TMP14:%.*]] = sext i1 [[TMP13]] to i8
156 ; CHECK-NEXT: [[TMP15:%.*]] = call i8 @llvm.fshl.i8(i8 [[TMP1]], i8 [[TMP2]], i8 [[C8:%.*]])
157 ; CHECK-NEXT: [[TMP16:%.*]] = or i8 [[TMP15]], [[TMP14]]
158 ; CHECK-NEXT: [[I8:%.*]] = call i8 @llvm.fshl.i8(i8 [[A8:%.*]], i8 [[B8:%.*]], i8 [[C8]])
159 ; CHECK-NEXT: [[TMP17:%.*]] = icmp ne <16 x i8> [[TMP6]], zeroinitializer
160 ; CHECK-NEXT: [[TMP18:%.*]] = sext <16 x i1> [[TMP17]] to <16 x i8>
161 ; CHECK-NEXT: [[TMP19:%.*]] = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[C128:%.*]])
162 ; CHECK-NEXT: [[TMP20:%.*]] = or <16 x i8> [[TMP19]], [[TMP18]]
163 ; CHECK-NEXT: [[V16I8:%.*]] = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> [[A128:%.*]], <16 x i8> [[B128:%.*]], <16 x i8> [[C128]])
164 ; CHECK-NEXT: [[TMP21:%.*]] = icmp ne <32 x i8> [[TMP9]], zeroinitializer
165 ; CHECK-NEXT: [[TMP22:%.*]] = sext <32 x i1> [[TMP21]] to <32 x i8>
166 ; CHECK-NEXT: [[TMP23:%.*]] = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> [[TMP7]], <32 x i8> [[TMP8]], <32 x i8> [[C256:%.*]])
167 ; CHECK-NEXT: [[TMP24:%.*]] = or <32 x i8> [[TMP23]], [[TMP22]]
168 ; CHECK-NEXT: [[V32I8:%.*]] = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> [[A256:%.*]], <32 x i8> [[B256:%.*]], <32 x i8> [[C256]])
169 ; CHECK-NEXT: [[TMP25:%.*]] = icmp ne <64 x i8> [[TMP12]], zeroinitializer
170 ; CHECK-NEXT: [[TMP26:%.*]] = sext <64 x i1> [[TMP25]] to <64 x i8>
171 ; CHECK-NEXT: [[TMP27:%.*]] = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> [[TMP10]], <64 x i8> [[TMP11]], <64 x i8> [[C512:%.*]])
172 ; CHECK-NEXT: [[TMP28:%.*]] = or <64 x i8> [[TMP27]], [[TMP26]]
173 ; CHECK-NEXT: [[V64I8:%.*]] = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> [[A512:%.*]], <64 x i8> [[B512:%.*]], <64 x i8> [[C512]])
174 ; CHECK-NEXT: ret void
176 %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 %c8)
177 %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> %c128)
178 %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> %c256)
179 %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> %c512)
183 define void @var_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %c64, <2 x i64> %c128, <4 x i64> %c256, <8 x i64> %c512) sanitize_memory {
184 ; CHECK-LABEL: @var_rotate_i64(
185 ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
186 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 120) to ptr), align 8
187 ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
188 ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
189 ; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
190 ; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8
191 ; CHECK-NEXT: [[TMP7:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8
192 ; CHECK-NEXT: [[TMP8:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 176) to ptr), align 8
193 ; CHECK-NEXT: call void @llvm.donothing()
194 ; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i64 [[TMP2]], 0
195 ; CHECK-NEXT: [[TMP10:%.*]] = sext i1 [[TMP9]] to i64
196 ; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.fshl.i64(i64 [[TMP1]], i64 [[TMP1]], i64 [[C64:%.*]])
197 ; CHECK-NEXT: [[TMP12:%.*]] = or i64 [[TMP11]], [[TMP10]]
198 ; CHECK-NEXT: [[I64:%.*]] = call i64 @llvm.fshl.i64(i64 [[A64:%.*]], i64 [[A64]], i64 [[C64]])
199 ; CHECK-NEXT: [[TMP13:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer
200 ; CHECK-NEXT: [[TMP14:%.*]] = sext <2 x i1> [[TMP13]] to <2 x i64>
201 ; CHECK-NEXT: [[TMP15:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> [[TMP3]], <2 x i64> [[TMP3]], <2 x i64> [[C128:%.*]])
202 ; CHECK-NEXT: [[TMP16:%.*]] = or <2 x i64> [[TMP15]], [[TMP14]]
203 ; CHECK-NEXT: [[V2I64:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> [[A128:%.*]], <2 x i64> [[A128]], <2 x i64> [[C128]])
204 ; CHECK-NEXT: [[TMP17:%.*]] = icmp ne <4 x i64> [[TMP6]], zeroinitializer
205 ; CHECK-NEXT: [[TMP18:%.*]] = sext <4 x i1> [[TMP17]] to <4 x i64>
206 ; CHECK-NEXT: [[TMP19:%.*]] = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> [[TMP5]], <4 x i64> [[TMP5]], <4 x i64> [[C256:%.*]])
207 ; CHECK-NEXT: [[TMP20:%.*]] = or <4 x i64> [[TMP19]], [[TMP18]]
208 ; CHECK-NEXT: [[V4I64:%.*]] = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> [[A256:%.*]], <4 x i64> [[A256]], <4 x i64> [[C256]])
209 ; CHECK-NEXT: [[TMP21:%.*]] = icmp ne <8 x i64> [[TMP8]], zeroinitializer
210 ; CHECK-NEXT: [[TMP22:%.*]] = sext <8 x i1> [[TMP21]] to <8 x i64>
211 ; CHECK-NEXT: [[TMP23:%.*]] = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> [[TMP7]], <8 x i64> [[TMP7]], <8 x i64> [[C512:%.*]])
212 ; CHECK-NEXT: [[TMP24:%.*]] = or <8 x i64> [[TMP23]], [[TMP22]]
213 ; CHECK-NEXT: [[V8I64:%.*]] = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> [[A512:%.*]], <8 x i64> [[A512]], <8 x i64> [[C512]])
214 ; CHECK-NEXT: ret void
216 %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 %c64)
217 %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> %c128)
218 %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> %c256)
219 %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> %c512)
223 define void @var_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, i32 %c32, <4 x i32> %c128, <8 x i32> %c256, <16 x i32> %c512) sanitize_memory {
224 ; CHECK-LABEL: @var_rotate_i32(
225 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
226 ; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 120) to ptr), align 8
227 ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
228 ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
229 ; CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
230 ; CHECK-NEXT: [[TMP6:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8
231 ; CHECK-NEXT: [[TMP7:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8
232 ; CHECK-NEXT: [[TMP8:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 176) to ptr), align 8
233 ; CHECK-NEXT: call void @llvm.donothing()
234 ; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP2]], 0
235 ; CHECK-NEXT: [[TMP10:%.*]] = sext i1 [[TMP9]] to i32
236 ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.fshl.i32(i32 [[TMP1]], i32 [[TMP1]], i32 [[C32:%.*]])
237 ; CHECK-NEXT: [[TMP12:%.*]] = or i32 [[TMP11]], [[TMP10]]
238 ; CHECK-NEXT: [[I32:%.*]] = call i32 @llvm.fshl.i32(i32 [[A32:%.*]], i32 [[A32]], i32 [[C32]])
239 ; CHECK-NEXT: [[TMP13:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer
240 ; CHECK-NEXT: [[TMP14:%.*]] = sext <4 x i1> [[TMP13]] to <4 x i32>
241 ; CHECK-NEXT: [[TMP15:%.*]] = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> [[TMP3]], <4 x i32> [[TMP3]], <4 x i32> [[C128:%.*]])
242 ; CHECK-NEXT: [[TMP16:%.*]] = or <4 x i32> [[TMP15]], [[TMP14]]
243 ; CHECK-NEXT: [[V2I32:%.*]] = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> [[A128:%.*]], <4 x i32> [[A128]], <4 x i32> [[C128]])
244 ; CHECK-NEXT: [[TMP17:%.*]] = icmp ne <8 x i32> [[TMP6]], zeroinitializer
245 ; CHECK-NEXT: [[TMP18:%.*]] = sext <8 x i1> [[TMP17]] to <8 x i32>
246 ; CHECK-NEXT: [[TMP19:%.*]] = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> [[TMP5]], <8 x i32> [[TMP5]], <8 x i32> [[C256:%.*]])
247 ; CHECK-NEXT: [[TMP20:%.*]] = or <8 x i32> [[TMP19]], [[TMP18]]
248 ; CHECK-NEXT: [[V4I32:%.*]] = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> [[A256:%.*]], <8 x i32> [[A256]], <8 x i32> [[C256]])
249 ; CHECK-NEXT: [[TMP21:%.*]] = icmp ne <16 x i32> [[TMP8]], zeroinitializer
250 ; CHECK-NEXT: [[TMP22:%.*]] = sext <16 x i1> [[TMP21]] to <16 x i32>
251 ; CHECK-NEXT: [[TMP23:%.*]] = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> [[TMP7]], <16 x i32> [[TMP7]], <16 x i32> [[C512:%.*]])
252 ; CHECK-NEXT: [[TMP24:%.*]] = or <16 x i32> [[TMP23]], [[TMP22]]
253 ; CHECK-NEXT: [[V8I32:%.*]] = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> [[A512:%.*]], <16 x i32> [[A512]], <16 x i32> [[C512]])
254 ; CHECK-NEXT: ret void
256 %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 %c32)
257 %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> %c128)
258 %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> %c256)
259 %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> %c512)
263 define void @var_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %c16, <8 x i16> %c128, <16 x i16> %c256, <32 x i16> %c512) sanitize_memory {
264 ; CHECK-LABEL: @var_rotate_i16(
265 ; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr @__msan_param_tls, align 8
266 ; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 120) to ptr), align 8
267 ; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
268 ; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
269 ; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
270 ; CHECK-NEXT: [[TMP6:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8
271 ; CHECK-NEXT: [[TMP7:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8
272 ; CHECK-NEXT: [[TMP8:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 176) to ptr), align 8
273 ; CHECK-NEXT: call void @llvm.donothing()
274 ; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i16 [[TMP2]], 0
275 ; CHECK-NEXT: [[TMP10:%.*]] = sext i1 [[TMP9]] to i16
276 ; CHECK-NEXT: [[TMP11:%.*]] = call i16 @llvm.fshl.i16(i16 [[TMP1]], i16 [[TMP1]], i16 [[C16:%.*]])
277 ; CHECK-NEXT: [[TMP12:%.*]] = or i16 [[TMP11]], [[TMP10]]
278 ; CHECK-NEXT: [[I16:%.*]] = call i16 @llvm.fshl.i16(i16 [[A16:%.*]], i16 [[A16]], i16 [[C16]])
279 ; CHECK-NEXT: [[TMP13:%.*]] = icmp ne <8 x i16> [[TMP4]], zeroinitializer
280 ; CHECK-NEXT: [[TMP14:%.*]] = sext <8 x i1> [[TMP13]] to <8 x i16>
281 ; CHECK-NEXT: [[TMP15:%.*]] = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i16> [[C128:%.*]])
282 ; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i16> [[TMP15]], [[TMP14]]
283 ; CHECK-NEXT: [[V8I16:%.*]] = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> [[A128:%.*]], <8 x i16> [[A128]], <8 x i16> [[C128]])
284 ; CHECK-NEXT: [[TMP17:%.*]] = icmp ne <16 x i16> [[TMP6]], zeroinitializer
285 ; CHECK-NEXT: [[TMP18:%.*]] = sext <16 x i1> [[TMP17]] to <16 x i16>
286 ; CHECK-NEXT: [[TMP19:%.*]] = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> [[TMP5]], <16 x i16> [[TMP5]], <16 x i16> [[C256:%.*]])
287 ; CHECK-NEXT: [[TMP20:%.*]] = or <16 x i16> [[TMP19]], [[TMP18]]
288 ; CHECK-NEXT: [[V16I16:%.*]] = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> [[A256:%.*]], <16 x i16> [[A256]], <16 x i16> [[C256]])
289 ; CHECK-NEXT: [[TMP21:%.*]] = icmp ne <32 x i16> [[TMP8]], zeroinitializer
290 ; CHECK-NEXT: [[TMP22:%.*]] = sext <32 x i1> [[TMP21]] to <32 x i16>
291 ; CHECK-NEXT: [[TMP23:%.*]] = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> [[TMP7]], <32 x i16> [[TMP7]], <32 x i16> [[C512:%.*]])
292 ; CHECK-NEXT: [[TMP24:%.*]] = or <32 x i16> [[TMP23]], [[TMP22]]
293 ; CHECK-NEXT: [[V32I16:%.*]] = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> [[A512:%.*]], <32 x i16> [[A512]], <32 x i16> [[C512]])
294 ; CHECK-NEXT: ret void
296 %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 %c16)
297 %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> %c128)
298 %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> %c256)
299 %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> %c512)
303 define void @var_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %c8, <16 x i8> %c128, <32 x i8> %c256, <64 x i8> %c512) sanitize_memory {
304 ; CHECK-LABEL: @var_rotate_i8(
305 ; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr @__msan_param_tls, align 8
306 ; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 120) to ptr), align 8
307 ; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
308 ; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
309 ; CHECK-NEXT: [[TMP5:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
310 ; CHECK-NEXT: [[TMP6:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8
311 ; CHECK-NEXT: [[TMP7:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 56) to ptr), align 8
312 ; CHECK-NEXT: [[TMP8:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 176) to ptr), align 8
313 ; CHECK-NEXT: call void @llvm.donothing()
314 ; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i8 [[TMP2]], 0
315 ; CHECK-NEXT: [[TMP10:%.*]] = sext i1 [[TMP9]] to i8
316 ; CHECK-NEXT: [[TMP11:%.*]] = call i8 @llvm.fshl.i8(i8 [[TMP1]], i8 [[TMP1]], i8 [[C8:%.*]])
317 ; CHECK-NEXT: [[TMP12:%.*]] = or i8 [[TMP11]], [[TMP10]]
318 ; CHECK-NEXT: [[I8:%.*]] = call i8 @llvm.fshl.i8(i8 [[A8:%.*]], i8 [[A8]], i8 [[C8]])
319 ; CHECK-NEXT: [[TMP13:%.*]] = icmp ne <16 x i8> [[TMP4]], zeroinitializer
320 ; CHECK-NEXT: [[TMP14:%.*]] = sext <16 x i1> [[TMP13]] to <16 x i8>
321 ; CHECK-NEXT: [[TMP15:%.*]] = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> [[TMP3]], <16 x i8> [[TMP3]], <16 x i8> [[C128:%.*]])
322 ; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i8> [[TMP15]], [[TMP14]]
323 ; CHECK-NEXT: [[V16I8:%.*]] = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> [[A128:%.*]], <16 x i8> [[A128]], <16 x i8> [[C128]])
324 ; CHECK-NEXT: [[TMP17:%.*]] = icmp ne <32 x i8> [[TMP6]], zeroinitializer
325 ; CHECK-NEXT: [[TMP18:%.*]] = sext <32 x i1> [[TMP17]] to <32 x i8>
326 ; CHECK-NEXT: [[TMP19:%.*]] = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> [[TMP5]], <32 x i8> [[TMP5]], <32 x i8> [[C256:%.*]])
327 ; CHECK-NEXT: [[TMP20:%.*]] = or <32 x i8> [[TMP19]], [[TMP18]]
328 ; CHECK-NEXT: [[V32I8:%.*]] = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> [[A256:%.*]], <32 x i8> [[A256]], <32 x i8> [[C256]])
329 ; CHECK-NEXT: [[TMP21:%.*]] = icmp ne <64 x i8> [[TMP8]], zeroinitializer
330 ; CHECK-NEXT: [[TMP22:%.*]] = sext <64 x i1> [[TMP21]] to <64 x i8>
331 ; CHECK-NEXT: [[TMP23:%.*]] = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> [[TMP7]], <64 x i8> [[TMP7]], <64 x i8> [[C512:%.*]])
332 ; CHECK-NEXT: [[TMP24:%.*]] = or <64 x i8> [[TMP23]], [[TMP22]]
333 ; CHECK-NEXT: [[V64I8:%.*]] = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> [[A512:%.*]], <64 x i8> [[A512]], <64 x i8> [[C512]])
334 ; CHECK-NEXT: ret void
336 %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 %c8)
337 %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> %c128)
338 %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> %c256)
339 %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> %c512)
344 declare i64 @llvm.fshl.i64(i64, i64, i64)
345 declare i32 @llvm.fshl.i32(i32, i32, i32)
346 declare i16 @llvm.fshl.i16(i16, i16, i16)
347 declare i8 @llvm.fshl.i8 (i8, i8, i8)
349 declare <2 x i64> @llvm.fshl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
350 declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
351 declare <8 x i16> @llvm.fshl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
352 declare <16 x i8> @llvm.fshl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
354 declare <4 x i64> @llvm.fshl.v4i64(<4 x i64>, <4 x i64>, <4 x i64>)
355 declare <8 x i32> @llvm.fshl.v8i32(<8 x i32>, <8 x i32>, <8 x i32>)
356 declare <16 x i16> @llvm.fshl.v16i16(<16 x i16>, <16 x i16>, <16 x i16>)
357 declare <32 x i8> @llvm.fshl.v32i8(<32 x i8>, <32 x i8>, <32 x i8>)
359 declare <8 x i64> @llvm.fshl.v8i64(<8 x i64>, <8 x i64>, <8 x i64>)
360 declare <16 x i32> @llvm.fshl.v16i32(<16 x i32>, <16 x i32>, <16 x i32>)
361 declare <32 x i16> @llvm.fshl.v32i16(<32 x i16>, <32 x i16>, <32 x i16>)
362 declare <64 x i8> @llvm.fshl.v64i8(<64 x i8>, <64 x i8>, <64 x i8>)
365 declare i64 @llvm.fshr.i64(i64, i64, i64)
366 declare i32 @llvm.fshr.i32(i32, i32, i32)
367 declare i16 @llvm.fshr.i16(i16, i16, i16)
368 declare i8 @llvm.fshr.i8 (i8, i8, i8)
370 declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
371 declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
372 declare <8 x i16> @llvm.fshr.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
373 declare <16 x i8> @llvm.fshr.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
375 declare <4 x i64> @llvm.fshr.v4i64(<4 x i64>, <4 x i64>, <4 x i64>)
376 declare <8 x i32> @llvm.fshr.v8i32(<8 x i32>, <8 x i32>, <8 x i32>)
377 declare <16 x i16> @llvm.fshr.v16i16(<16 x i16>, <16 x i16>, <16 x i16>)
378 declare <32 x i8> @llvm.fshr.v32i8(<32 x i8>, <32 x i8>, <32 x i8>)
380 declare <8 x i64> @llvm.fshr.v8i64(<8 x i64>, <8 x i64>, <8 x i64>)
381 declare <16 x i32> @llvm.fshr.v16i32(<16 x i32>, <16 x i32>, <16 x i32>)
382 declare <32 x i16> @llvm.fshr.v32i16(<32 x i16>, <32 x i16>, <32 x i16>)
383 declare <64 x i8> @llvm.fshr.v64i8(<64 x i8>, <64 x i8>, <64 x i8>)