1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 | FileCheck %s
4 declare half @llvm.minimum.f16(half, half)
5 declare half @llvm.maximum.f16(half, half)
6 declare <8 x half> @llvm.minimum.v8f16(<8 x half>, <8 x half>)
7 declare <8 x half> @llvm.maximum.v8f16(<8 x half>, <8 x half>)
9 define half @test_fminimum(half %x, half %y) {
10 ; CHECK-LABEL: test_fminimum:
12 ; CHECK-NEXT: vmovw %xmm0, %eax
13 ; CHECK-NEXT: testw %ax, %ax
14 ; CHECK-NEXT: sets %al
15 ; CHECK-NEXT: kmovd %eax, %k1
16 ; CHECK-NEXT: vmovaps %xmm1, %xmm2
17 ; CHECK-NEXT: vmovsh %xmm0, %xmm0, %xmm2 {%k1}
18 ; CHECK-NEXT: vmovsh %xmm1, %xmm0, %xmm0 {%k1}
19 ; CHECK-NEXT: vminsh %xmm2, %xmm0, %xmm1
20 ; CHECK-NEXT: vcmpunordsh %xmm0, %xmm0, %k1
21 ; CHECK-NEXT: vmovsh %xmm0, %xmm0, %xmm1 {%k1}
22 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
24 %z = call half @llvm.minimum.f16(half %x, half %y)
28 define <8 x half> @test_fminimum_scalarize(<8 x half> %x, <8 x half> %y) "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" {
29 ; CHECK-LABEL: test_fminimum_scalarize:
31 ; CHECK-NEXT: vpsrldq {{.*#+}} xmm2 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
32 ; CHECK-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
33 ; CHECK-NEXT: vminsh %xmm2, %xmm3, %xmm2
34 ; CHECK-NEXT: vshufps {{.*#+}} xmm3 = xmm1[3,3,3,3]
35 ; CHECK-NEXT: vshufps {{.*#+}} xmm4 = xmm0[3,3,3,3]
36 ; CHECK-NEXT: vminsh %xmm3, %xmm4, %xmm3
37 ; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
38 ; CHECK-NEXT: vpsrldq {{.*#+}} xmm3 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
39 ; CHECK-NEXT: vpsrldq {{.*#+}} xmm4 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
40 ; CHECK-NEXT: vminsh %xmm3, %xmm4, %xmm3
41 ; CHECK-NEXT: vshufpd {{.*#+}} xmm4 = xmm1[1,0]
42 ; CHECK-NEXT: vshufpd {{.*#+}} xmm5 = xmm0[1,0]
43 ; CHECK-NEXT: vminsh %xmm4, %xmm5, %xmm4
44 ; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
45 ; CHECK-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
46 ; CHECK-NEXT: vpsrlq $48, %xmm1, %xmm3
47 ; CHECK-NEXT: vpsrlq $48, %xmm0, %xmm4
48 ; CHECK-NEXT: vminsh %xmm3, %xmm4, %xmm3
49 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm4 = xmm1[1,1,3,3]
50 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm5 = xmm0[1,1,3,3]
51 ; CHECK-NEXT: vminsh %xmm4, %xmm5, %xmm4
52 ; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
53 ; CHECK-NEXT: vminsh %xmm1, %xmm0, %xmm4
54 ; CHECK-NEXT: vpsrld $16, %xmm1, %xmm1
55 ; CHECK-NEXT: vpsrld $16, %xmm0, %xmm0
56 ; CHECK-NEXT: vminsh %xmm1, %xmm0, %xmm0
57 ; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
58 ; CHECK-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
59 ; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
61 %r = call <8 x half> @llvm.minimum.v8f16(<8 x half> %x, <8 x half> %y)
65 define half @test_fminimum_nnan(half %x, half %y) "no-nans-fp-math"="true" {
66 ; CHECK-LABEL: test_fminimum_nnan:
68 ; CHECK-NEXT: vfpclasssh $5, %xmm1, %k1
69 ; CHECK-NEXT: vmovaps %xmm0, %xmm2
70 ; CHECK-NEXT: vmovsh %xmm1, %xmm0, %xmm2 {%k1}
71 ; CHECK-NEXT: vmovsh %xmm0, %xmm0, %xmm1 {%k1}
72 ; CHECK-NEXT: vminsh %xmm2, %xmm1, %xmm0
74 %1 = tail call half @llvm.minimum.f16(half %x, half %y)
78 define half @test_fminimum_zero(half %x, half %y) {
79 ; CHECK-LABEL: test_fminimum_zero:
81 ; CHECK-NEXT: vcmpunordsh %xmm1, %xmm1, %k1
82 ; CHECK-NEXT: vminsh {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
83 ; CHECK-NEXT: vmovsh %xmm1, %xmm0, %xmm0 {%k1}
85 %1 = tail call half @llvm.minimum.f16(half -0.0, half %y)
89 define half @test_fminimum_nsz(half %x, half %y) {
90 ; CHECK-LABEL: test_fminimum_nsz:
92 ; CHECK-NEXT: vminsh %xmm1, %xmm0, %xmm1
93 ; CHECK-NEXT: vcmpunordsh %xmm0, %xmm0, %k1
94 ; CHECK-NEXT: vmovsh %xmm0, %xmm0, %xmm1 {%k1}
95 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
97 %1 = tail call nsz half @llvm.minimum.f16(half %x, half %y)
101 define half @test_fminimum_combine_cmps(half %x, half %y) {
102 ; CHECK-LABEL: test_fminimum_combine_cmps:
104 ; CHECK-NEXT: vdivsh %xmm0, %xmm1, %xmm1
105 ; CHECK-NEXT: vfpclasssh $5, %xmm0, %k1
106 ; CHECK-NEXT: vmovaps %xmm1, %xmm2
107 ; CHECK-NEXT: vmovsh %xmm0, %xmm0, %xmm2 {%k1}
108 ; CHECK-NEXT: vmovsh %xmm1, %xmm0, %xmm0 {%k1}
109 ; CHECK-NEXT: vminsh %xmm2, %xmm0, %xmm0
111 %1 = fdiv nnan half %y, %x
112 %2 = tail call half @llvm.minimum.f16(half %x, half %1)
116 define half @test_fmaximum(half %x, half %y) {
117 ; CHECK-LABEL: test_fmaximum:
119 ; CHECK-NEXT: vmovw %xmm0, %eax
120 ; CHECK-NEXT: testw %ax, %ax
121 ; CHECK-NEXT: sets %al
122 ; CHECK-NEXT: kmovd %eax, %k1
123 ; CHECK-NEXT: vmovaps %xmm0, %xmm2
124 ; CHECK-NEXT: vmovsh %xmm1, %xmm0, %xmm2 {%k1}
125 ; CHECK-NEXT: vmovsh %xmm0, %xmm0, %xmm1 {%k1}
126 ; CHECK-NEXT: vmaxsh %xmm2, %xmm1, %xmm0
127 ; CHECK-NEXT: vcmpunordsh %xmm1, %xmm1, %k1
128 ; CHECK-NEXT: vmovsh %xmm1, %xmm0, %xmm0 {%k1}
130 %r = call half @llvm.maximum.f16(half %x, half %y)
134 define <8 x half> @test_fmaximum_scalarize(<8 x half> %x, <8 x half> %y) "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" {
135 ; CHECK-LABEL: test_fmaximum_scalarize:
137 ; CHECK-NEXT: vpsrldq {{.*#+}} xmm2 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
138 ; CHECK-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
139 ; CHECK-NEXT: vmaxsh %xmm2, %xmm3, %xmm2
140 ; CHECK-NEXT: vshufps {{.*#+}} xmm3 = xmm1[3,3,3,3]
141 ; CHECK-NEXT: vshufps {{.*#+}} xmm4 = xmm0[3,3,3,3]
142 ; CHECK-NEXT: vmaxsh %xmm3, %xmm4, %xmm3
143 ; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
144 ; CHECK-NEXT: vpsrldq {{.*#+}} xmm3 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
145 ; CHECK-NEXT: vpsrldq {{.*#+}} xmm4 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
146 ; CHECK-NEXT: vmaxsh %xmm3, %xmm4, %xmm3
147 ; CHECK-NEXT: vshufpd {{.*#+}} xmm4 = xmm1[1,0]
148 ; CHECK-NEXT: vshufpd {{.*#+}} xmm5 = xmm0[1,0]
149 ; CHECK-NEXT: vmaxsh %xmm4, %xmm5, %xmm4
150 ; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
151 ; CHECK-NEXT: vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
152 ; CHECK-NEXT: vpsrlq $48, %xmm1, %xmm3
153 ; CHECK-NEXT: vpsrlq $48, %xmm0, %xmm4
154 ; CHECK-NEXT: vmaxsh %xmm3, %xmm4, %xmm3
155 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm4 = xmm1[1,1,3,3]
156 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm5 = xmm0[1,1,3,3]
157 ; CHECK-NEXT: vmaxsh %xmm4, %xmm5, %xmm4
158 ; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
159 ; CHECK-NEXT: vmaxsh %xmm1, %xmm0, %xmm4
160 ; CHECK-NEXT: vpsrld $16, %xmm1, %xmm1
161 ; CHECK-NEXT: vpsrld $16, %xmm0, %xmm0
162 ; CHECK-NEXT: vmaxsh %xmm1, %xmm0, %xmm0
163 ; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
164 ; CHECK-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
165 ; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
167 %r = call <8 x half> @llvm.maximum.v8f16(<8 x half> %x, <8 x half> %y)
171 define half @test_fmaximum_nnan(half %x, half %y) {
172 ; CHECK-LABEL: test_fmaximum_nnan:
174 ; CHECK-NEXT: vaddsh %xmm1, %xmm0, %xmm2
175 ; CHECK-NEXT: vsubsh %xmm1, %xmm0, %xmm0
176 ; CHECK-NEXT: vfpclasssh $3, %xmm0, %k1
177 ; CHECK-NEXT: vmovaps %xmm2, %xmm1
178 ; CHECK-NEXT: vmovsh %xmm0, %xmm0, %xmm1 {%k1}
179 ; CHECK-NEXT: vmovsh %xmm2, %xmm0, %xmm0 {%k1}
180 ; CHECK-NEXT: vmaxsh %xmm1, %xmm0, %xmm0
182 %1 = fadd nnan half %x, %y
183 %2 = fsub nnan half %x, %y
184 %3 = tail call half @llvm.maximum.f16(half %1, half %2)
188 define half @test_fmaximum_zero(half %x, half %y) {
189 ; CHECK-LABEL: test_fmaximum_zero:
191 ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
192 ; CHECK-NEXT: vmaxsh %xmm0, %xmm1, %xmm0
193 ; CHECK-NEXT: vcmpunordsh %xmm1, %xmm1, %k1
194 ; CHECK-NEXT: vmovsh %xmm1, %xmm0, %xmm0 {%k1}
196 %1 = tail call half @llvm.maximum.f16(half 0.0, half %y)
200 define half @test_fmaximum_nsz(half %x, half %y) "no-signed-zeros-fp-math"="true" {
201 ; CHECK-LABEL: test_fmaximum_nsz:
203 ; CHECK-NEXT: vmaxsh %xmm1, %xmm0, %xmm1
204 ; CHECK-NEXT: vcmpunordsh %xmm0, %xmm0, %k1
205 ; CHECK-NEXT: vmovsh %xmm0, %xmm0, %xmm1 {%k1}
206 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
208 %1 = tail call half @llvm.maximum.f16(half %x, half %y)
212 define half @test_fmaximum_combine_cmps(half %x, half %y) {
213 ; CHECK-LABEL: test_fmaximum_combine_cmps:
215 ; CHECK-NEXT: vdivsh %xmm0, %xmm1, %xmm1
216 ; CHECK-NEXT: vfpclasssh $3, %xmm0, %k1
217 ; CHECK-NEXT: vmovaps %xmm1, %xmm2
218 ; CHECK-NEXT: vmovsh %xmm0, %xmm0, %xmm2 {%k1}
219 ; CHECK-NEXT: vmovsh %xmm1, %xmm0, %xmm0 {%k1}
220 ; CHECK-NEXT: vmaxsh %xmm2, %xmm0, %xmm0
222 %1 = fdiv nnan half %y, %x
223 %2 = tail call half @llvm.maximum.f16(half %x, half %1)