1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=CHECK
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=CHECK-AVX2
5 %"class.failing::DataBuffer.2.12.22.41.65.87.96.105.114.123.132.141.186.204.213.222.330.429.438.447.718" = type {
6 ptr, %"class.failing::TrackedAllocation.1.11.21.40.64.86.95.104.113.122.131.140.185.203.212.221.329.428.437.446.717", i64
9 %"class.failing::TrackedAllocation.1.11.21.40.64.86.95.104.113.122.131.140.185.203.212.221.329.428.437.446.717" = type <{
10 ptr, i64, %"union.failing::RefcountOrTracker.0.10.20.39.63.85.94.103.112.121.130.139.184.202.211.220.328.427.436.445.716", i8, [7 x i8]
14 %"union.failing::RefcountOrTracker.0.10.20.39.63.85.94.103.112.121.130.139.184.202.211.220.328.427.436.445.716" = type {
18 %"class.failingel::standard_function_call_evaluator_internal::ComputeFnDispatcher.1964.9.19.29.47.71.93.102.111.120.129.138.147.192.210.219.228.336.435.444.453.724" = type {
19 %"class.failingel::builtin_registry::(anonymous namespace)::Between.1939.5.15.25.43.67.89.98.107.116.125.134.143.188.206.215.224.332.431.440.449.720", %"class.std::__u::tuple.1961.8.18.28.46.70.92.101.110.119.128.137.146.191.209.218.227.335.434.443.452.723", ptr
22 %"class.failingel::builtin_registry::(anonymous namespace)::Between.1939.5.15.25.43.67.89.98.107.116.125.134.143.188.206.215.224.332.431.440.449.720" = type {
23 %"class.absl::int128.4.14.24.42.66.88.97.106.115.124.133.142.187.205.214.223.331.430.439.448.719", %"class.absl::int128.4.14.24.42.66.88.97.106.115.124.133.142.187.205.214.223.331.430.439.448.719"
26 %"class.absl::int128.4.14.24.42.66.88.97.106.115.124.133.142.187.205.214.223.331.430.439.448.719" = type {
30 %"class.std::__u::tuple.1961.8.18.28.46.70.92.101.110.119.128.137.146.191.209.218.227.335.434.443.452.723" = type {
31 %"struct.std::__u::__tuple_impl.1962.7.17.27.45.69.91.100.109.118.127.136.145.190.208.217.226.334.433.442.451.722"
34 %"struct.std::__u::__tuple_impl.1962.7.17.27.45.69.91.100.109.118.127.136.145.190.208.217.226.334.433.442.451.722" = type {
35 %"class.std::__u::__tuple_leaf.1963.6.16.26.44.68.90.99.108.117.126.135.144.189.207.216.225.333.432.441.450.721"
38 %"class.std::__u::__tuple_leaf.1963.6.16.26.44.68.90.99.108.117.126.135.144.189.207.216.225.333.432.441.450.721" = type {
42 define void @failing(ptr %0, ptr %1) nounwind {
43 ; CHECK-LABEL: failing:
45 ; CHECK-NEXT: movq 8(%rdi), %rax
46 ; CHECK-NEXT: movq 24(%rsi), %rcx
47 ; CHECK-NEXT: movq 32(%rsi), %rdx
48 ; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0]
49 ; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [1,1]
50 ; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [2,2]
51 ; CHECK-NEXT: .p2align 4, 0x90
52 ; CHECK-NEXT: .LBB0_1: # %vector.ph
53 ; CHECK-NEXT: # =>This Loop Header: Depth=1
54 ; CHECK-NEXT: # Child Loop BB0_2 Depth 2
55 ; CHECK-NEXT: xorpd %xmm3, %xmm3
56 ; CHECK-NEXT: movq $-1024, %rsi # imm = 0xFC00
57 ; CHECK-NEXT: movdqa %xmm0, %xmm4
58 ; CHECK-NEXT: .p2align 4, 0x90
59 ; CHECK-NEXT: .LBB0_2: # %vector.body
60 ; CHECK-NEXT: # Parent Loop BB0_1 Depth=1
61 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2
62 ; CHECK-NEXT: movdqu 1024(%rdx,%rsi), %xmm5
63 ; CHECK-NEXT: movdqu 1040(%rdx,%rsi), %xmm6
64 ; CHECK-NEXT: movq %xmm5, %rdi
65 ; CHECK-NEXT: movq %xmm6, %r8
66 ; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,2,3]
67 ; CHECK-NEXT: movq %xmm5, %r9
68 ; CHECK-NEXT: pshufd {{.*#+}} xmm5 = xmm6[2,3,2,3]
69 ; CHECK-NEXT: movq %xmm5, %r10
70 ; CHECK-NEXT: negq %r8
71 ; CHECK-NEXT: movq %rcx, %r8
72 ; CHECK-NEXT: sbbq %r10, %r8
73 ; CHECK-NEXT: setge %r8b
74 ; CHECK-NEXT: movzbl %r8b, %r8d
75 ; CHECK-NEXT: negq %r8
76 ; CHECK-NEXT: movq %r8, %xmm5
77 ; CHECK-NEXT: negq %rdi
78 ; CHECK-NEXT: movq %rcx, %rdi
79 ; CHECK-NEXT: sbbq %r9, %rdi
80 ; CHECK-NEXT: setge %dil
81 ; CHECK-NEXT: movzbl %dil, %edi
82 ; CHECK-NEXT: negq %rdi
83 ; CHECK-NEXT: movq %rdi, %xmm6
84 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm5[0]
85 ; CHECK-NEXT: movdqa %xmm1, %xmm5
86 ; CHECK-NEXT: psllq %xmm4, %xmm5
87 ; CHECK-NEXT: pshufd {{.*#+}} xmm7 = xmm4[2,3,2,3]
88 ; CHECK-NEXT: movdqa %xmm1, %xmm8
89 ; CHECK-NEXT: psllq %xmm7, %xmm8
90 ; CHECK-NEXT: movsd {{.*#+}} xmm8 = xmm5[0],xmm8[1]
91 ; CHECK-NEXT: andpd %xmm6, %xmm8
92 ; CHECK-NEXT: orpd %xmm8, %xmm3
93 ; CHECK-NEXT: paddq %xmm2, %xmm4
94 ; CHECK-NEXT: addq $32, %rsi
95 ; CHECK-NEXT: jne .LBB0_2
96 ; CHECK-NEXT: # %bb.3: # %middle.block
97 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
98 ; CHECK-NEXT: pshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
99 ; CHECK-NEXT: por %xmm3, %xmm4
100 ; CHECK-NEXT: movq %xmm4, (%rax)
101 ; CHECK-NEXT: jmp .LBB0_1
103 ; CHECK-AVX2-LABEL: failing:
104 ; CHECK-AVX2: # %bb.0:
105 ; CHECK-AVX2-NEXT: movq 8(%rdi), %rax
106 ; CHECK-AVX2-NEXT: movq 24(%rsi), %rcx
107 ; CHECK-AVX2-NEXT: movq 32(%rsi), %rdx
108 ; CHECK-AVX2-NEXT: vpmovsxbq {{.*#+}} xmm0 = [0,1]
109 ; CHECK-AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [1,1]
110 ; CHECK-AVX2-NEXT: vpmovsxbq {{.*#+}} xmm2 = [2,2]
111 ; CHECK-AVX2-NEXT: .p2align 4, 0x90
112 ; CHECK-AVX2-NEXT: .LBB0_1: # %vector.ph
113 ; CHECK-AVX2-NEXT: # =>This Loop Header: Depth=1
114 ; CHECK-AVX2-NEXT: # Child Loop BB0_2 Depth 2
115 ; CHECK-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
116 ; CHECK-AVX2-NEXT: movq $-1024, %rsi # imm = 0xFC00
117 ; CHECK-AVX2-NEXT: vmovdqa %xmm0, %xmm4
118 ; CHECK-AVX2-NEXT: .p2align 4, 0x90
119 ; CHECK-AVX2-NEXT: .LBB0_2: # %vector.body
120 ; CHECK-AVX2-NEXT: # Parent Loop BB0_1 Depth=1
121 ; CHECK-AVX2-NEXT: # => This Inner Loop Header: Depth=2
122 ; CHECK-AVX2-NEXT: vmovdqu 1024(%rdx,%rsi), %xmm5
123 ; CHECK-AVX2-NEXT: vmovdqu 1040(%rdx,%rsi), %xmm6
124 ; CHECK-AVX2-NEXT: vpextrq $1, %xmm5, %rdi
125 ; CHECK-AVX2-NEXT: vpextrq $1, %xmm6, %r8
126 ; CHECK-AVX2-NEXT: vmovq %xmm5, %r9
127 ; CHECK-AVX2-NEXT: vmovq %xmm6, %r10
128 ; CHECK-AVX2-NEXT: negq %r10
129 ; CHECK-AVX2-NEXT: movq %rcx, %r10
130 ; CHECK-AVX2-NEXT: sbbq %r8, %r10
131 ; CHECK-AVX2-NEXT: setge %r8b
132 ; CHECK-AVX2-NEXT: movzbl %r8b, %r8d
133 ; CHECK-AVX2-NEXT: negq %r8
134 ; CHECK-AVX2-NEXT: vmovq %r8, %xmm5
135 ; CHECK-AVX2-NEXT: negq %r9
136 ; CHECK-AVX2-NEXT: movq %rcx, %r8
137 ; CHECK-AVX2-NEXT: sbbq %rdi, %r8
138 ; CHECK-AVX2-NEXT: setge %dil
139 ; CHECK-AVX2-NEXT: movzbl %dil, %edi
140 ; CHECK-AVX2-NEXT: negq %rdi
141 ; CHECK-AVX2-NEXT: vmovq %rdi, %xmm6
142 ; CHECK-AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm5 = xmm6[0],xmm5[0]
143 ; CHECK-AVX2-NEXT: vpsllvq %xmm4, %xmm1, %xmm6
144 ; CHECK-AVX2-NEXT: vpand %xmm6, %xmm5, %xmm5
145 ; CHECK-AVX2-NEXT: vpor %xmm3, %xmm5, %xmm3
146 ; CHECK-AVX2-NEXT: vpaddq %xmm2, %xmm4, %xmm4
147 ; CHECK-AVX2-NEXT: addq $32, %rsi
148 ; CHECK-AVX2-NEXT: jne .LBB0_2
149 ; CHECK-AVX2-NEXT: # %bb.3: # %middle.block
150 ; CHECK-AVX2-NEXT: # in Loop: Header=BB0_1 Depth=1
151 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,3,2,3]
152 ; CHECK-AVX2-NEXT: vpor %xmm4, %xmm3, %xmm3
153 ; CHECK-AVX2-NEXT: vmovq %xmm3, (%rax)
154 ; CHECK-AVX2-NEXT: jmp .LBB0_1
155 %3 = getelementptr inbounds %"class.failing::DataBuffer.2.12.22.41.65.87.96.105.114.123.132.141.186.204.213.222.330.429.438.447.718", ptr %0, i64 0, i32 1
156 %4 = load ptr, ptr %3, align 8
157 %5 = getelementptr inbounds %"class.failingel::standard_function_call_evaluator_internal::ComputeFnDispatcher.1964.9.19.29.47.71.93.102.111.120.129.138.147.192.210.219.228.336.435.444.453.724", ptr %1, i64 0, i32 1
158 %6 = load ptr, ptr %5, align 16
159 %7 = getelementptr inbounds i8, ptr %1, i64 24
160 %8 = load i64, ptr %7, align 8
161 %9 = zext i64 %8 to i128
162 %10 = shl nuw i128 %9, 64
163 %broadcast.splatinsert = insertelement <2 x i128> poison, i128 %10, i64 0
164 %broadcast.splat = shufflevector <2 x i128> %broadcast.splatinsert, <2 x i128> poison, <2 x i32> zeroinitializer
167 br label %vector.body
169 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
170 %vec.ind = phi <2 x i64> [ <i64 0, i64 1>, %vector.ph ], [ %vec.ind.next, %vector.body ]
171 %vec.phi = phi <2 x i64> [ zeroinitializer, %vector.ph ], [ %20, %vector.body ]
172 %11 = getelementptr inbounds %"class.absl::int128.4.14.24.42.66.88.97.106.115.124.133.142.187.205.214.223.331.430.439.448.719", ptr %6, i64 %index
173 %wide.vec = load <4 x i64>, ptr %11, align 8
174 %strided.vec = shufflevector <4 x i64> %wide.vec, <4 x i64> poison, <2 x i32> <i32 0, i32 2>
175 %strided.vec1 = shufflevector <4 x i64> %wide.vec, <4 x i64> poison, <2 x i32> <i32 1, i32 3>
176 %12 = zext <2 x i64> %strided.vec1 to <2 x i128>
177 %13 = shl nuw <2 x i128> %12, <i128 64, i128 64>
178 %14 = zext <2 x i64> %strided.vec to <2 x i128>
179 %15 = or <2 x i128> %13, %14
180 %16 = icmp sle <2 x i128> %15, %broadcast.splat
181 %17 = shl nuw <2 x i64> <i64 1, i64 1>, %vec.ind
182 %18 = freeze <2 x i1> %16
183 %19 = select <2 x i1> %18, <2 x i64> %17, <2 x i64> zeroinitializer
184 %20 = or <2 x i64> %19, %vec.phi
185 %index.next = add nuw i64 %index, 2
186 %vec.ind.next = add <2 x i64> %vec.ind, <i64 2, i64 2>
187 %21 = icmp eq i64 %index.next, 64
188 br i1 %21, label %middle.block, label %vector.body
189 middle.block: ; preds = %vector.body
190 %22 = tail call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %20)
191 store i64 %22, ptr %4, align 8
195 declare i64 @llvm.vector.reduce.or.v2i64(<2 x i64>) #1