1 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 # RUN: llc -mtriple=x86_64-- -mattr=+amx-tile -run-pass=fastpretileconfig -o - %s | FileCheck %s
4 # This case test tile phi is nested accessed, but the its def block is
8 # %6 = phi(%3, b%10) <-----
19 # void foo(int cond, char *buf) {
20 # __tile1024i a = {16, 64};
21 # __tile1024i b = {16, 64};
22 # __tile1024i c = {16, 64};
27 # __tile_loadd(&c, buf, STRIDE);
31 # for(int i = 0; i < 10; i++) {
32 # __tile_dpbssd(&c, a, b);
36 # __tile_loadd(&c, buf, STRIDE);
39 # __tile_stored(buf, STRIDE, c);
44 tracksRegLiveness: true
46 - { id: 0, class: gr8 }
47 - { id: 1, class: tile }
48 - { id: 2, class: tile }
49 - { id: 3, class: tile }
50 - { id: 4, class: tile }
51 - { id: 5, class: tile }
52 - { id: 6, class: gr32 }
53 - { id: 7, class: tile }
54 - { id: 8, class: tile }
55 - { id: 9, class: tile }
56 - { id: 10, class: tile }
57 - { id: 11, class: gr32 }
58 - { id: 12, class: gr32 }
59 - { id: 13, class: gr32 }
60 - { id: 14, class: gr64 }
61 - { id: 15, class: gr64 }
62 - { id: 16, class: gr8 }
63 - { id: 17, class: gr16 }
64 - { id: 18, class: gr16 }
65 - { id: 19, class: gr64_nosp }
66 - { id: 20, class: gr16 }
67 - { id: 21, class: gr16 }
68 - { id: 22, class: gr32 }
69 - { id: 23, class: gr16 }
70 - { id: 24, class: gr16 }
71 - { id: 25, class: gr16 }
72 - { id: 26, class: gr16 }
73 - { id: 27, class: gr16 }
74 - { id: 28, class: gr16 }
75 - { id: 29, class: tile }
76 - { id: 30, class: gr16 }
77 - { id: 31, class: gr16 }
78 - { id: 32, class: gr64_nosp }
79 - { id: 33, class: gr16 }
80 - { id: 34, class: gr16 }
81 - { id: 35, class: gr32 }
82 - { id: 36, class: gr64_nosp }
83 - { id: 37, class: gr16 }
84 - { id: 38, class: gr16 }
86 - { reg: '$edi', virtual-reg: '%12' }
87 - { reg: '$rsi', virtual-reg: '%14' }
91 amxProgModel: ManagedRA
93 ; CHECK-LABEL: name: foo
95 ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
96 ; CHECK-NEXT: liveins: $edi, $rsi
98 ; CHECK-NEXT: [[V_SET0_:%[0-9]+]]:vr128 = V_SET0
99 ; CHECK-NEXT: MOVUPSmr %stack.1, 1, $noreg, 0, $noreg, [[V_SET0_]] :: (store (s512) into %stack.1, align 4)
100 ; CHECK-NEXT: MOVUPSmr %stack.1, 1, $noreg, 16, $noreg, [[V_SET0_]] :: (store (s512) into %stack.1 + 16, align 4)
101 ; CHECK-NEXT: MOVUPSmr %stack.1, 1, $noreg, 32, $noreg, [[V_SET0_]] :: (store (s512) into %stack.1 + 32, align 4)
102 ; CHECK-NEXT: MOVUPSmr %stack.1, 1, $noreg, 48, $noreg, [[V_SET0_]] :: (store (s512) into %stack.1 + 48, align 4)
103 ; CHECK-NEXT: MOV8mi %stack.1, 1, $noreg, 0, $noreg, 1 :: (store (s512) into %stack.1, align 4)
104 ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rsi
105 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $edi
106 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY killed [[COPY1]]
107 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr64 = COPY killed [[COPY]]
108 ; CHECK-NEXT: CMP32ri8 [[COPY2]], 0, implicit-def $eflags
109 ; CHECK-NEXT: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 4, implicit $eflags
110 ; CHECK-NEXT: TEST8ri [[SETCCr]], 1, implicit-def $eflags
111 ; CHECK-NEXT: JCC_1 %bb.2, 5, implicit $eflags
114 ; CHECK-NEXT: successors: %bb.3(0x80000000)
116 ; CHECK-NEXT: [[MOV16ri:%[0-9]+]]:gr16 = MOV16ri 64
117 ; CHECK-NEXT: [[MOV16ri1:%[0-9]+]]:gr16 = MOV16ri 16
118 ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
119 ; CHECK-NEXT: [[LEA64r:%[0-9]+]]:gr64_nosp = LEA64r %stack.0, 1, $noreg, 0, $noreg
120 ; CHECK-NEXT: [[PTILEZEROV:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri1]], [[MOV16ri]]
121 ; CHECK-NEXT: [[MOV64ri:%[0-9]+]]:gr64_nosp = MOV64ri 64
122 ; CHECK-NEXT: TILESTORED %stack.0, 1, killed [[MOV64ri]], 0, $noreg, [[PTILEZEROV]] :: (store (s8192) into %stack.0)
123 ; CHECK-NEXT: JMP_1 %bb.3
126 ; CHECK-NEXT: successors: %bb.3(0x80000000)
128 ; CHECK-NEXT: [[MOV32ri64_:%[0-9]+]]:gr64_nosp = MOV32ri64 32
129 ; CHECK-NEXT: [[MOV16ri2:%[0-9]+]]:gr16 = MOV16ri 64
130 ; CHECK-NEXT: [[MOV16ri3:%[0-9]+]]:gr16 = MOV16ri 16
131 ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
132 ; CHECK-NEXT: [[LEA64r1:%[0-9]+]]:gr64_nosp = LEA64r %stack.2, 1, $noreg, 0, $noreg
133 ; CHECK-NEXT: [[PTILELOADDV:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri3]], [[MOV16ri2]], [[COPY3]], 1, killed [[MOV32ri64_]], 0, $noreg
134 ; CHECK-NEXT: [[MOV64ri1:%[0-9]+]]:gr64_nosp = MOV64ri 64
135 ; CHECK-NEXT: TILESTORED %stack.2, 1, killed [[MOV64ri1]], 0, $noreg, [[PTILELOADDV]] :: (store (s8192) into %stack.2)
138 ; CHECK-NEXT: successors: %bb.5(0x80000000)
140 ; CHECK-NEXT: [[PHI:%[0-9]+]]:gr16 = PHI [[MOV16ri]], %bb.1, [[MOV16ri2]], %bb.2
141 ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gr16 = PHI [[MOV16ri1]], %bb.1, [[MOV16ri3]], %bb.2
142 ; CHECK-NEXT: [[PHI2:%[0-9]+]]:gr64_nosp = PHI [[LEA64r]], %bb.1, [[LEA64r1]], %bb.2
143 ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
144 ; CHECK-NEXT: [[MOV64ri2:%[0-9]+]]:gr64_nosp = MOV64ri 64
145 ; CHECK-NEXT: [[LEA64r2:%[0-9]+]]:gr64_nosp = LEA64r %stack.5, 1, $noreg, 0, $noreg
146 ; CHECK-NEXT: [[PTILELOADDV1:%[0-9]+]]:tile = PTILELOADDV [[PHI1]], [[PHI]], [[PHI2]], 1, killed [[MOV64ri2]], 0, $noreg
147 ; CHECK-NEXT: [[MOV64ri3:%[0-9]+]]:gr64_nosp = MOV64ri 64
148 ; CHECK-NEXT: TILESTORED %stack.5, 1, killed [[MOV64ri3]], 0, $noreg, [[PTILELOADDV1]] :: (store (s8192) into %stack.5)
149 ; CHECK-NEXT: [[MOV16ri4:%[0-9]+]]:gr16 = MOV16ri 64
150 ; CHECK-NEXT: [[MOV16ri5:%[0-9]+]]:gr16 = MOV16ri 16
151 ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
152 ; CHECK-NEXT: [[PTILEZEROV1:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri5]], [[MOV16ri4]]
153 ; CHECK-NEXT: [[MOV64ri4:%[0-9]+]]:gr64_nosp = MOV64ri 64
154 ; CHECK-NEXT: TILESTORED %stack.4, 1, killed [[MOV64ri4]], 0, $noreg, [[PTILEZEROV1]] :: (store (s8192) into %stack.4)
155 ; CHECK-NEXT: [[MOV16ri6:%[0-9]+]]:gr16 = MOV16ri 64
156 ; CHECK-NEXT: [[MOV16ri7:%[0-9]+]]:gr16 = MOV16ri 16
157 ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
158 ; CHECK-NEXT: [[PTILEZEROV2:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri7]], [[MOV16ri6]]
159 ; CHECK-NEXT: [[MOV64ri5:%[0-9]+]]:gr64_nosp = MOV64ri 64
160 ; CHECK-NEXT: TILESTORED %stack.3, 1, killed [[MOV64ri5]], 0, $noreg, [[PTILEZEROV2]] :: (store (s8192) into %stack.3)
161 ; CHECK-NEXT: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def $eflags
162 ; CHECK-NEXT: JMP_1 %bb.5
165 ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
166 ; CHECK-NEXT: [[MOV32ri64_1:%[0-9]+]]:gr64_nosp = MOV32ri64 32
167 ; CHECK-NEXT: [[MOV16ri8:%[0-9]+]]:gr16 = MOV16ri 64
168 ; CHECK-NEXT: [[MOV16ri9:%[0-9]+]]:gr16 = MOV16ri 16
169 ; CHECK-NEXT: [[MOV64ri6:%[0-9]+]]:gr64_nosp = MOV64ri 64
170 ; CHECK-NEXT: [[PTILELOADDV2:%[0-9]+]]:tile = PTILELOADDV %59, %60, %stack.9, 1, killed [[MOV64ri6]], 0, $noreg :: (load (s8192) from %stack.9)
171 ; CHECK-NEXT: PTILESTOREDV killed [[MOV16ri9]], killed [[MOV16ri8]], [[COPY3]], 1, killed [[MOV32ri64_1]], 0, $noreg, [[PTILELOADDV2]]
175 ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000)
177 ; CHECK-NEXT: [[PHI3:%[0-9]+]]:gr32 = PHI [[MOV32r0_]], %bb.3, %35, %bb.8
178 ; CHECK-NEXT: [[PHI4:%[0-9]+]]:gr16 = PHI [[PHI]], %bb.3, %60, %bb.8
179 ; CHECK-NEXT: [[PHI5:%[0-9]+]]:gr16 = PHI [[PHI1]], %bb.3, %59, %bb.8
180 ; CHECK-NEXT: [[PHI6:%[0-9]+]]:gr64_nosp = PHI [[LEA64r2]], %bb.3, %58, %bb.8
181 ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
182 ; CHECK-NEXT: [[MOV64ri7:%[0-9]+]]:gr64_nosp = MOV64ri 64
183 ; CHECK-NEXT: [[PTILELOADDV3:%[0-9]+]]:tile = PTILELOADDV [[PHI5]], [[PHI4]], [[PHI6]], 1, killed [[MOV64ri7]], 0, $noreg
184 ; CHECK-NEXT: [[MOV64ri8:%[0-9]+]]:gr64_nosp = MOV64ri 64
185 ; CHECK-NEXT: TILESTORED %stack.8, 1, killed [[MOV64ri8]], 0, $noreg, [[PTILELOADDV3]] :: (store (s8192) into %stack.8)
186 ; CHECK-NEXT: [[MOV16ri10:%[0-9]+]]:gr16 = MOV16ri 64
187 ; CHECK-NEXT: [[MOV16ri11:%[0-9]+]]:gr16 = MOV16ri 16
188 ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
189 ; CHECK-NEXT: [[MOV64ri9:%[0-9]+]]:gr64_nosp = MOV64ri 64
190 ; CHECK-NEXT: [[PTILELOADDV4:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri7]], [[MOV16ri6]], %stack.3, 1, killed [[MOV64ri9]], 0, $noreg :: (load (s8192) from %stack.3)
191 ; CHECK-NEXT: [[MOV64ri10:%[0-9]+]]:gr64_nosp = MOV64ri 64
192 ; CHECK-NEXT: [[PTILELOADDV5:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri5]], [[MOV16ri4]], %stack.4, 1, killed [[MOV64ri10]], 0, $noreg :: (load (s8192) from %stack.4)
193 ; CHECK-NEXT: [[MOV64ri11:%[0-9]+]]:gr64_nosp = MOV64ri 64
194 ; CHECK-NEXT: [[PTILELOADDV6:%[0-9]+]]:tile = PTILELOADDV [[PHI5]], [[PHI4]], %stack.8, 1, killed [[MOV64ri11]], 0, $noreg :: (load (s8192) from %stack.8)
195 ; CHECK-NEXT: [[PTDPBSSDV:%[0-9]+]]:tile = PTDPBSSDV killed [[MOV16ri11]], [[MOV16ri10]], [[MOV16ri10]], [[PTILELOADDV6]], [[PTILELOADDV5]], [[PTILELOADDV4]]
196 ; CHECK-NEXT: TEST8ri [[SETCCr]], 1, implicit-def $eflags
197 ; CHECK-NEXT: JCC_1 %bb.7, 5, implicit $eflags
200 ; CHECK-NEXT: successors: %bb.8(0x80000000)
202 ; CHECK-NEXT: [[MOV16ri12:%[0-9]+]]:gr16 = MOV16ri 64
203 ; CHECK-NEXT: [[MOV16ri13:%[0-9]+]]:gr16 = MOV16ri 16
204 ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
205 ; CHECK-NEXT: [[LEA64r3:%[0-9]+]]:gr64_nosp = LEA64r %stack.6, 1, $noreg, 0, $noreg
206 ; CHECK-NEXT: [[PTILEZEROV3:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri13]], [[MOV16ri12]]
207 ; CHECK-NEXT: [[MOV64ri12:%[0-9]+]]:gr64_nosp = MOV64ri 64
208 ; CHECK-NEXT: TILESTORED %stack.6, 1, killed [[MOV64ri12]], 0, $noreg, [[PTILEZEROV3]] :: (store (s8192) into %stack.6)
209 ; CHECK-NEXT: JMP_1 %bb.8
212 ; CHECK-NEXT: successors: %bb.8(0x80000000)
214 ; CHECK-NEXT: [[MOV32ri64_2:%[0-9]+]]:gr64_nosp = MOV32ri64 32
215 ; CHECK-NEXT: [[MOV16ri14:%[0-9]+]]:gr16 = MOV16ri 64
216 ; CHECK-NEXT: [[MOV16ri15:%[0-9]+]]:gr16 = MOV16ri 16
217 ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
218 ; CHECK-NEXT: [[LEA64r4:%[0-9]+]]:gr64_nosp = LEA64r %stack.7, 1, $noreg, 0, $noreg
219 ; CHECK-NEXT: [[PTILELOADDV7:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri15]], [[MOV16ri14]], [[COPY3]], 1, killed [[MOV32ri64_2]], 0, $noreg
220 ; CHECK-NEXT: [[MOV64ri13:%[0-9]+]]:gr64_nosp = MOV64ri 64
221 ; CHECK-NEXT: TILESTORED %stack.7, 1, killed [[MOV64ri13]], 0, $noreg, [[PTILELOADDV7]] :: (store (s8192) into %stack.7)
224 ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.5(0x40000000)
226 ; CHECK-NEXT: [[PHI7:%[0-9]+]]:gr16 = PHI [[MOV16ri12]], %bb.6, [[MOV16ri14]], %bb.7
227 ; CHECK-NEXT: [[PHI8:%[0-9]+]]:gr16 = PHI [[MOV16ri13]], %bb.6, [[MOV16ri15]], %bb.7
228 ; CHECK-NEXT: [[PHI9:%[0-9]+]]:gr64_nosp = PHI [[LEA64r3]], %bb.6, [[LEA64r4]], %bb.7
229 ; CHECK-NEXT: PLDTILECFGV %stack.1, 1, $noreg, 0, $noreg, implicit-def $tmm0, implicit-def $tmm1, implicit-def $tmm2, implicit-def $tmm3, implicit-def $tmm4, implicit-def $tmm5, implicit-def $tmm6, implicit-def $tmm7 :: (load (s512) from %stack.1, align 4)
230 ; CHECK-NEXT: [[MOV64ri14:%[0-9]+]]:gr64_nosp = MOV64ri 64
231 ; CHECK-NEXT: [[PTILELOADDV8:%[0-9]+]]:tile = PTILELOADDV [[PHI8]], [[PHI7]], [[PHI9]], 1, killed [[MOV64ri14]], 0, $noreg
232 ; CHECK-NEXT: [[MOV64ri15:%[0-9]+]]:gr64_nosp = MOV64ri 64
233 ; CHECK-NEXT: TILESTORED %stack.9, 1, killed [[MOV64ri15]], 0, $noreg, [[PTILELOADDV8]] :: (store (s8192) into %stack.9)
234 ; CHECK-NEXT: [[ADD32ri8_:%[0-9]+]]:gr32 = ADD32ri8 [[PHI3]], 1, implicit-def $eflags
235 ; CHECK-NEXT: CMP32ri8 [[ADD32ri8_]], 10, implicit-def $eflags
236 ; CHECK-NEXT: JCC_1 %bb.4, 4, implicit $eflags
237 ; CHECK-NEXT: JMP_1 %bb.5
243 %13:gr32 = COPY killed %12
244 %15:gr64 = COPY killed %14
245 CMP32ri8 %13, 0, implicit-def $eflags
246 %16:gr8 = SETCCr 4, implicit $eflags
247 TEST8ri %16, 1, implicit-def $eflags
248 JCC_1 %bb.2, 5, implicit $eflags
251 %17:gr16 = MOV16ri 64
252 %18:gr16 = MOV16ri 16
253 %1:tile = PTILEZEROV killed %18, killed %17
257 %19:gr64_nosp = MOV32ri64 32
258 %20:gr16 = MOV16ri 64
259 %21:gr16 = MOV16ri 16
260 %2:tile = PTILELOADDV killed %21, killed %20, %15, 1, killed %19, 0, $noreg
264 %3:tile = PHI %1, %bb.1, %2, %bb.2
265 %25:gr16 = MOV16ri 64
266 %26:gr16 = MOV16ri 16
267 %4:tile = PTILEZEROV killed %26, killed %25
268 %23:gr16 = MOV16ri 64
269 %24:gr16 = MOV16ri 16
270 %5:tile = PTILEZEROV killed %24, killed %23
271 %22:gr32 = MOV32r0 implicit-def $eflags
275 %36:gr64_nosp = MOV32ri64 32
276 %37:gr16 = MOV16ri 64
277 %38:gr16 = MOV16ri 16
278 PTILESTOREDV killed %38, killed %37, %15, 1, killed %36, 0, $noreg, %10
283 %6:gr32 = PHI %22, %bb.3, %35, %bb.8
284 %7:tile = PHI %3, %bb.3, %10, %bb.8
285 %27:gr16 = MOV16ri 64
286 %28:gr16 = MOV16ri 16
287 %29:tile = PTDPBSSDV killed %28, %27, %27, %7, %4, %5
288 TEST8ri %16, 1, implicit-def $eflags
289 JCC_1 %bb.7, 5, implicit $eflags
292 %30:gr16 = MOV16ri 64
293 %31:gr16 = MOV16ri 16
294 %8:tile = PTILEZEROV killed %31, killed %30
298 %32:gr64_nosp = MOV32ri64 32
299 %33:gr16 = MOV16ri 64
300 %34:gr16 = MOV16ri 16
301 %9:tile = PTILELOADDV killed %34, killed %33, %15, 1, killed %32, 0, $noreg
305 %10:tile = PHI %8, %bb.6, %9, %bb.7
306 %35:gr32 = ADD32ri8 %6, 1, implicit-def $eflags
307 CMP32ri8 %35, 10, implicit-def $eflags
308 JCC_1 %bb.4, 4, implicit $eflags