1 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 # RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -run-pass=instruction-select -mcpu=tahiti -o - %s | FileCheck -check-prefix=GFX6 %s
3 # RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -run-pass=instruction-select -mcpu=fiji -o - %s | FileCheck -check-prefix=GFX8 %s
4 # RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -run-pass=instruction-select -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10 %s
7 name: atomic_cmpswap_i32_1d
10 tracksRegLiveness: true
14 liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2
16 ; GFX6-LABEL: name: atomic_cmpswap_i32_1d
17 ; GFX6: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2
18 ; GFX6: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
19 ; GFX6: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
20 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
21 ; GFX6: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource")
22 ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V1_V1_si]].sub0
23 ; GFX6: $vgpr0 = COPY [[COPY3]]
24 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0
25 ; GFX8-LABEL: name: atomic_cmpswap_i32_1d
26 ; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2
27 ; GFX8: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
28 ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
29 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
30 ; GFX8: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource")
31 ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V1_V1_vi]].sub0
32 ; GFX8: $vgpr0 = COPY [[COPY3]]
33 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0
34 ; GFX10-LABEL: name: atomic_cmpswap_i32_1d
35 ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2
36 ; GFX10: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
37 ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
38 ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
39 ; GFX10: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource")
40 ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10_]].sub0
41 ; GFX10: $vgpr0 = COPY [[COPY3]]
42 ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0
43 %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
44 %1:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1
45 %2:vgpr(s32) = COPY $vgpr2
46 %3:vgpr(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), %1(<2 x s32>), $noreg, %2(s32), %0(<8 x s32>), 0, 0, 0 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
48 SI_RETURN_TO_EPILOG implicit $vgpr0
52 name: atomic_cmpswap_i32_1d_no_return
55 tracksRegLiveness: true
59 liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2
61 ; GFX6-LABEL: name: atomic_cmpswap_i32_1d_no_return
62 ; GFX6: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2
63 ; GFX6: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
64 ; GFX6: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
65 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
66 ; GFX6: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource")
68 ; GFX8-LABEL: name: atomic_cmpswap_i32_1d_no_return
69 ; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2
70 ; GFX8: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
71 ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
72 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
73 ; GFX8: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource")
75 ; GFX10-LABEL: name: atomic_cmpswap_i32_1d_no_return
76 ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2
77 ; GFX10: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
78 ; GFX10: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
79 ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
80 ; GFX10: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource")
82 %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
83 %1:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1
84 %2:vgpr(s32) = COPY $vgpr2
85 %3:vgpr(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), %1(<2 x s32>), $noreg, %2(s32), %0(<8 x s32>), 0, 0, 0 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
90 name: atomic_cmpswap_i64_1d
93 tracksRegLiveness: true
97 liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
99 ; GFX6-LABEL: name: atomic_cmpswap_i64_1d
100 ; GFX6: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
101 ; GFX6: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
102 ; GFX6: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
103 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
104 ; GFX6: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource")
105 ; GFX6: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si]].sub0_sub1
106 ; GFX6: $vgpr0_vgpr1 = COPY [[COPY3]]
107 ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1
108 ; GFX8-LABEL: name: atomic_cmpswap_i64_1d
109 ; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
110 ; GFX8: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
111 ; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
112 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
113 ; GFX8: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource")
114 ; GFX8: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi]].sub0_sub1
115 ; GFX8: $vgpr0_vgpr1 = COPY [[COPY3]]
116 ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1
117 ; GFX10-LABEL: name: atomic_cmpswap_i64_1d
118 ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
119 ; GFX10: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
120 ; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
121 ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
122 ; GFX10: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource")
123 ; GFX10: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_]].sub0_sub1
124 ; GFX10: $vgpr0_vgpr1 = COPY [[COPY3]]
125 ; GFX10: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1
126 %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
127 %1:vgpr(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
128 %2:vgpr(s32) = COPY $vgpr4
129 %3:vgpr(s64) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), %1(<2 x s64>), $noreg, %2(s32), %0(<8 x s32>), 0, 0, 0 :: (volatile dereferenceable load store (s64) on custom "ImageResource")
130 $vgpr0_vgpr1 = COPY %3(s64)
131 SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1
135 name: atomic_cmpswap_i64_1d_no_return
137 regBankSelected: true
138 tracksRegLiveness: true
142 liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
144 ; GFX6-LABEL: name: atomic_cmpswap_i64_1d_no_return
145 ; GFX6: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
146 ; GFX6: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
147 ; GFX6: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
148 ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
149 ; GFX6: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource")
151 ; GFX8-LABEL: name: atomic_cmpswap_i64_1d_no_return
152 ; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
153 ; GFX8: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
154 ; GFX8: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
155 ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
156 ; GFX8: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource")
158 ; GFX10-LABEL: name: atomic_cmpswap_i64_1d_no_return
159 ; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
160 ; GFX10: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
161 ; GFX10: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
162 ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
163 ; GFX10: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource")
165 %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
166 %1:vgpr(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
167 %2:vgpr(s32) = COPY $vgpr4
168 %3:vgpr(s64) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), %1(<2 x s64>), $noreg, %2(s32), %0(<8 x s32>), 0, 0, 0 :: (volatile dereferenceable load store (s64) on custom "ImageResource")