1 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 ; RUN: llc -global-isel -march=amdgcn -mcpu=hawaii -stop-after=instruction-select -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
4 ; NOTE: llvm.amdgcn.wwm is deprecated, use llvm.amdgcn.strict.wwm instead.
6 define amdgpu_ps float @wwm_f32(float %val) {
7 ; GCN-LABEL: name: wwm_f32
8 ; GCN: bb.1 (%ir-block.0):
10 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
11 ; GCN: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY]], implicit $exec
12 ; GCN: $vgpr0 = COPY [[STRICT_WWM]]
13 ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0
14 %ret = call float @llvm.amdgcn.wwm.f32(float %val)
18 define amdgpu_ps float @wwm_v2f16(float %arg) {
19 ; GCN-LABEL: name: wwm_v2f16
20 ; GCN: bb.1 (%ir-block.0):
21 ; GCN: liveins: $vgpr0
22 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
23 ; GCN: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY]], implicit $exec
24 ; GCN: $vgpr0 = COPY [[STRICT_WWM]]
25 ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0
26 %val = bitcast float %arg to <2 x half>
27 %ret = call <2 x half> @llvm.amdgcn.wwm.v2f16(<2 x half> %val)
28 %bc = bitcast <2 x half> %ret to float
32 define amdgpu_ps <2 x float> @wwm_f64(double %val) {
33 ; GCN-LABEL: name: wwm_f64
34 ; GCN: bb.1 (%ir-block.0):
35 ; GCN: liveins: $vgpr0, $vgpr1
36 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
37 ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
38 ; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
39 ; GCN: [[STRICT_WWM:%[0-9]+]]:vreg_64 = STRICT_WWM [[REG_SEQUENCE]], implicit $exec
40 ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[STRICT_WWM]].sub0
41 ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[STRICT_WWM]].sub1
42 ; GCN: $vgpr0 = COPY [[COPY2]]
43 ; GCN: $vgpr1 = COPY [[COPY3]]
44 ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
45 %ret = call double @llvm.amdgcn.wwm.f64(double %val)
46 %bitcast = bitcast double %ret to <2 x float>
47 ret <2 x float> %bitcast
51 ; define amdgpu_ps float @wwm_i1_vcc(float %val) {
52 ; %vcc = fcmp oeq float %val, 0.0
53 ; %ret = call i1 @llvm.amdgcn.wwm.i1(i1 %vcc)
54 ; %select = select i1 %ret, float 1.0, float 0.0
58 define amdgpu_ps <3 x float> @wwm_v3f32(<3 x float> %val) {
59 ; GCN-LABEL: name: wwm_v3f32
60 ; GCN: bb.1 (%ir-block.0):
61 ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2
62 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
63 ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
64 ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
65 ; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2
66 ; GCN: [[STRICT_WWM:%[0-9]+]]:vreg_96 = STRICT_WWM [[REG_SEQUENCE]], implicit $exec
67 ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[STRICT_WWM]].sub0
68 ; GCN: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[STRICT_WWM]].sub1
69 ; GCN: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[STRICT_WWM]].sub2
70 ; GCN: $vgpr0 = COPY [[COPY3]]
71 ; GCN: $vgpr1 = COPY [[COPY4]]
72 ; GCN: $vgpr2 = COPY [[COPY5]]
73 ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
74 %ret = call <3 x float> @llvm.amdgcn.wwm.v3f32(<3 x float> %val)
78 define amdgpu_ps float @strict_wwm_f32(float %val) {
79 ; GCN-LABEL: name: strict_wwm_f32
80 ; GCN: bb.1 (%ir-block.0):
81 ; GCN: liveins: $vgpr0
82 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
83 ; GCN: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY]], implicit $exec
84 ; GCN: $vgpr0 = COPY [[STRICT_WWM]]
85 ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0
86 %ret = call float @llvm.amdgcn.strict.wwm.f32(float %val)
90 define amdgpu_ps float @strict_wwm_v2f16(float %arg) {
91 ; GCN-LABEL: name: strict_wwm_v2f16
92 ; GCN: bb.1 (%ir-block.0):
93 ; GCN: liveins: $vgpr0
94 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
95 ; GCN: [[STRICT_WWM:%[0-9]+]]:vgpr_32 = STRICT_WWM [[COPY]], implicit $exec
96 ; GCN: $vgpr0 = COPY [[STRICT_WWM]]
97 ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0
98 %val = bitcast float %arg to <2 x half>
99 %ret = call <2 x half> @llvm.amdgcn.strict.wwm.v2f16(<2 x half> %val)
100 %bc = bitcast <2 x half> %ret to float
104 define amdgpu_ps <2 x float> @strict_wwm_f64(double %val) {
105 ; GCN-LABEL: name: strict_wwm_f64
106 ; GCN: bb.1 (%ir-block.0):
107 ; GCN: liveins: $vgpr0, $vgpr1
108 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
109 ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
110 ; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
111 ; GCN: [[STRICT_WWM:%[0-9]+]]:vreg_64 = STRICT_WWM [[REG_SEQUENCE]], implicit $exec
112 ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[STRICT_WWM]].sub0
113 ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[STRICT_WWM]].sub1
114 ; GCN: $vgpr0 = COPY [[COPY2]]
115 ; GCN: $vgpr1 = COPY [[COPY3]]
116 ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
117 %ret = call double @llvm.amdgcn.strict.wwm.f64(double %val)
118 %bitcast = bitcast double %ret to <2 x float>
119 ret <2 x float> %bitcast
123 ; define amdgpu_ps float @strict_wwm_i1_vcc(float %val) {
124 ; %vcc = fcmp oeq float %val, 0.0
125 ; %ret = call i1 @llvm.amdgcn.strict.wwm.i1(i1 %vcc)
126 ; %select = select i1 %ret, float 1.0, float 0.0
130 define amdgpu_ps <3 x float> @strict_wwm_v3f32(<3 x float> %val) {
131 ; GCN-LABEL: name: strict_wwm_v3f32
132 ; GCN: bb.1 (%ir-block.0):
133 ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2
134 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
135 ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
136 ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
137 ; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2
138 ; GCN: [[STRICT_WWM:%[0-9]+]]:vreg_96 = STRICT_WWM [[REG_SEQUENCE]], implicit $exec
139 ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[STRICT_WWM]].sub0
140 ; GCN: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[STRICT_WWM]].sub1
141 ; GCN: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[STRICT_WWM]].sub2
142 ; GCN: $vgpr0 = COPY [[COPY3]]
143 ; GCN: $vgpr1 = COPY [[COPY4]]
144 ; GCN: $vgpr2 = COPY [[COPY5]]
145 ; GCN: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
146 %ret = call <3 x float> @llvm.amdgcn.strict.wwm.v3f32(<3 x float> %val)
150 declare i1 @llvm.amdgcn.wwm.i1(i1) #0
151 declare float @llvm.amdgcn.wwm.f32(float) #0
152 declare <2 x half> @llvm.amdgcn.wwm.v2f16(<2 x half>) #0
153 declare <3 x float> @llvm.amdgcn.wwm.v3f32(<3 x float>) #0
154 declare double @llvm.amdgcn.wwm.f64(double) #0
155 declare i1 @llvm.amdgcn.strict.wwm.i1(i1) #0
156 declare float @llvm.amdgcn.strict.wwm.f32(float) #0
157 declare <2 x half> @llvm.amdgcn.strict.wwm.v2f16(<2 x half>) #0
158 declare <3 x float> @llvm.amdgcn.strict.wwm.v3f32(<3 x float>) #0
159 declare double @llvm.amdgcn.strict.wwm.f64(double) #0
161 attributes #0 = { nounwind readnone speculatable }