1 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2 ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FASTF16,GFX90A-FASTF64 %s
3 ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=NOPACKEDF32,FASTF16,FASTF64 %s
4 ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=NOPACKEDF32,SLOWF64 %s
5 ; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FASTF16-SIZE,GFX90A-FASTF64-SIZE %s
6 ; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=NOPACKEDF32-SIZE,FASTF16-SIZE %s
7 ; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=NOPACKEDF32-SIZE,SLOWF64-SIZE %s
10 define amdgpu_kernel void @fsub_f32() #0 {
11 ; GFX90A-FASTF64-LABEL: 'fsub_f32'
12 ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fsub float undef, undef
13 ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = fsub <2 x float> undef, undef
14 ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fsub <3 x float> undef, undef
15 ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fsub <4 x float> undef, undef
16 ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fsub <5 x float> undef, undef
17 ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fsub <8 x float> undef, undef
18 ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v9f32 = fsub <9 x float> undef, undef
19 ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
21 ; NOPACKEDF32-LABEL: 'fsub_f32'
22 ; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fsub float undef, undef
23 ; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fsub <2 x float> undef, undef
24 ; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fsub <3 x float> undef, undef
25 ; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fsub <4 x float> undef, undef
26 ; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fsub <5 x float> undef, undef
27 ; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = fsub <8 x float> undef, undef
28 ; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = fsub <9 x float> undef, undef
29 ; NOPACKEDF32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
31 ; GFX90A-FASTF64-SIZE-LABEL: 'fsub_f32'
32 ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fsub float undef, undef
33 ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = fsub <2 x float> undef, undef
34 ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fsub <3 x float> undef, undef
35 ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fsub <4 x float> undef, undef
36 ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fsub <5 x float> undef, undef
37 ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fsub <8 x float> undef, undef
38 ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v9f32 = fsub <9 x float> undef, undef
39 ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
41 ; NOPACKEDF32-SIZE-LABEL: 'fsub_f32'
42 ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fsub float undef, undef
43 ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fsub <2 x float> undef, undef
44 ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fsub <3 x float> undef, undef
45 ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fsub <4 x float> undef, undef
46 ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fsub <5 x float> undef, undef
47 ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = fsub <8 x float> undef, undef
48 ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = fsub <9 x float> undef, undef
49 ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
51 %f32 = fsub float undef, undef
52 %v2f32 = fsub <2 x float> undef, undef
53 %v3f32 = fsub <3 x float> undef, undef
54 %v4f32 = fsub <4 x float> undef, undef
55 %v5f32 = fsub <5 x float> undef, undef
56 %v8f32 = fsub <8 x float> undef, undef
57 %v9f32 = fsub <9 x float> undef, undef
61 define amdgpu_kernel void @fsub_f64() #0 {
62 ; GFX90A-FASTF64-LABEL: 'fsub_f64'
63 ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f64 = fsub double undef, undef
64 ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = fsub <2 x double> undef, undef
65 ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = fsub <3 x double> undef, undef
66 ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = fsub <4 x double> undef, undef
67 ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v5f64 = fsub <5 x double> undef, undef
68 ; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
70 ; FASTF64-LABEL: 'fsub_f64'
71 ; FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f64 = fsub double undef, undef
72 ; FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = fsub <2 x double> undef, undef
73 ; FASTF64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = fsub <3 x double> undef, undef
74 ; FASTF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = fsub <4 x double> undef, undef
75 ; FASTF64-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5f64 = fsub <5 x double> undef, undef
76 ; FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
78 ; SLOWF64-LABEL: 'fsub_f64'
79 ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f64 = fsub double undef, undef
80 ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = fsub <2 x double> undef, undef
81 ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f64 = fsub <3 x double> undef, undef
82 ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f64 = fsub <4 x double> undef, undef
83 ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v5f64 = fsub <5 x double> undef, undef
84 ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
86 ; GFX90A-FASTF64-SIZE-LABEL: 'fsub_f64'
87 ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f64 = fsub double undef, undef
88 ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = fsub <2 x double> undef, undef
89 ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = fsub <3 x double> undef, undef
90 ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = fsub <4 x double> undef, undef
91 ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v5f64 = fsub <5 x double> undef, undef
92 ; GFX90A-FASTF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
94 ; NOPACKEDF32-SIZE-LABEL: 'fsub_f64'
95 ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f64 = fsub double undef, undef
96 ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = fsub <2 x double> undef, undef
97 ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = fsub <3 x double> undef, undef
98 ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = fsub <4 x double> undef, undef
99 ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5f64 = fsub <5 x double> undef, undef
100 ; NOPACKEDF32-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
102 %f64 = fsub double undef, undef
103 %v2f64 = fsub <2 x double> undef, undef
104 %v3f64 = fsub <3 x double> undef, undef
105 %v4f64 = fsub <4 x double> undef, undef
106 %v5f64 = fsub <5 x double> undef, undef
110 define amdgpu_kernel void @fsub_f16() #0 {
111 ; FASTF16-LABEL: 'fsub_f16'
112 ; FASTF16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = fsub half undef, undef
113 ; FASTF16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fsub <2 x half> undef, undef
114 ; FASTF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = fsub <3 x half> undef, undef
115 ; FASTF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fsub <4 x half> undef, undef
116 ; FASTF16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = fsub <5 x half> undef, undef
117 ; FASTF16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = fsub <16 x half> undef, undef
118 ; FASTF16-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v17f16 = fsub <17 x half> undef, undef
119 ; FASTF16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
121 ; SLOWF64-LABEL: 'fsub_f16'
122 ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = fsub half undef, undef
123 ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fsub <2 x half> undef, undef
124 ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = fsub <3 x half> undef, undef
125 ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = fsub <4 x half> undef, undef
126 ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fsub <5 x half> undef, undef
127 ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16f16 = fsub <16 x half> undef, undef
128 ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v17f16 = fsub <17 x half> undef, undef
129 ; SLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void
131 ; FASTF16-SIZE-LABEL: 'fsub_f16'
132 ; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = fsub half undef, undef
133 ; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fsub <2 x half> undef, undef
134 ; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = fsub <3 x half> undef, undef
135 ; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fsub <4 x half> undef, undef
136 ; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = fsub <5 x half> undef, undef
137 ; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = fsub <16 x half> undef, undef
138 ; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v17f16 = fsub <17 x half> undef, undef
139 ; FASTF16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
141 ; SLOWF64-SIZE-LABEL: 'fsub_f16'
142 ; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = fsub half undef, undef
143 ; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fsub <2 x half> undef, undef
144 ; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = fsub <3 x half> undef, undef
145 ; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = fsub <4 x half> undef, undef
146 ; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fsub <5 x half> undef, undef
147 ; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16f16 = fsub <16 x half> undef, undef
148 ; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v17f16 = fsub <17 x half> undef, undef
149 ; SLOWF64-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
151 %f16 = fsub half undef, undef
152 %v2f16 = fsub <2 x half> undef, undef
153 %v3f16 = fsub <3 x half> undef, undef
154 %v4f16 = fsub <4 x half> undef, undef
155 %v5f16 = fsub <5 x half> undef, undef
156 %v16f16 = fsub <16 x half> undef, undef
157 %v17f16 = fsub <17 x half> undef, undef