1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE
3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
4 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
6 define <8 x i16> @sdiv_vec8x16(<8 x i16> %var) {
7 ; SSE-LABEL: sdiv_vec8x16:
8 ; SSE: # %bb.0: # %entry
9 ; SSE-NEXT: movdqa %xmm0, %xmm1
10 ; SSE-NEXT: psraw $15, %xmm1
11 ; SSE-NEXT: psrlw $11, %xmm1
12 ; SSE-NEXT: paddw %xmm1, %xmm0
13 ; SSE-NEXT: psraw $5, %xmm0
16 ; AVX-LABEL: sdiv_vec8x16:
17 ; AVX: # %bb.0: # %entry
18 ; AVX-NEXT: vpsraw $15, %xmm0, %xmm1
19 ; AVX-NEXT: vpsrlw $11, %xmm1, %xmm1
20 ; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0
21 ; AVX-NEXT: vpsraw $5, %xmm0, %xmm0
24 %0 = sdiv <8 x i16> %var, <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32>
28 define <8 x i16> @sdiv_vec8x16_minsize(<8 x i16> %var) minsize {
29 ; SSE-LABEL: sdiv_vec8x16_minsize:
30 ; SSE: # %bb.0: # %entry
31 ; SSE-NEXT: movdqa %xmm0, %xmm1
32 ; SSE-NEXT: psraw $15, %xmm1
33 ; SSE-NEXT: psrlw $11, %xmm1
34 ; SSE-NEXT: paddw %xmm1, %xmm0
35 ; SSE-NEXT: psraw $5, %xmm0
38 ; AVX-LABEL: sdiv_vec8x16_minsize:
39 ; AVX: # %bb.0: # %entry
40 ; AVX-NEXT: vpsraw $15, %xmm0, %xmm1
41 ; AVX-NEXT: vpsrlw $11, %xmm1, %xmm1
42 ; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0
43 ; AVX-NEXT: vpsraw $5, %xmm0, %xmm0
46 %0 = sdiv <8 x i16> %var, <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32>
50 define <4 x i32> @sdiv_vec4x32(<4 x i32> %var) {
51 ; SSE-LABEL: sdiv_vec4x32:
52 ; SSE: # %bb.0: # %entry
53 ; SSE-NEXT: movdqa %xmm0, %xmm1
54 ; SSE-NEXT: psrad $31, %xmm1
55 ; SSE-NEXT: psrld $28, %xmm1
56 ; SSE-NEXT: paddd %xmm1, %xmm0
57 ; SSE-NEXT: psrad $4, %xmm0
60 ; AVX-LABEL: sdiv_vec4x32:
61 ; AVX: # %bb.0: # %entry
62 ; AVX-NEXT: vpsrad $31, %xmm0, %xmm1
63 ; AVX-NEXT: vpsrld $28, %xmm1, %xmm1
64 ; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
65 ; AVX-NEXT: vpsrad $4, %xmm0, %xmm0
68 %0 = sdiv <4 x i32> %var, <i32 16, i32 16, i32 16, i32 16>
72 define <4 x i32> @sdiv_negative(<4 x i32> %var) {
73 ; SSE-LABEL: sdiv_negative:
74 ; SSE: # %bb.0: # %entry
75 ; SSE-NEXT: movdqa %xmm0, %xmm1
76 ; SSE-NEXT: psrad $31, %xmm1
77 ; SSE-NEXT: psrld $28, %xmm1
78 ; SSE-NEXT: paddd %xmm0, %xmm1
79 ; SSE-NEXT: psrad $4, %xmm1
80 ; SSE-NEXT: pxor %xmm0, %xmm0
81 ; SSE-NEXT: psubd %xmm1, %xmm0
84 ; AVX-LABEL: sdiv_negative:
85 ; AVX: # %bb.0: # %entry
86 ; AVX-NEXT: vpsrad $31, %xmm0, %xmm1
87 ; AVX-NEXT: vpsrld $28, %xmm1, %xmm1
88 ; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
89 ; AVX-NEXT: vpsrad $4, %xmm0, %xmm0
90 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
91 ; AVX-NEXT: vpsubd %xmm0, %xmm1, %xmm0
94 %0 = sdiv <4 x i32> %var, <i32 -16, i32 -16, i32 -16, i32 -16>
98 define <8 x i32> @sdiv8x32(<8 x i32> %var) {
99 ; SSE-LABEL: sdiv8x32:
100 ; SSE: # %bb.0: # %entry
101 ; SSE-NEXT: movdqa %xmm0, %xmm2
102 ; SSE-NEXT: psrad $31, %xmm2
103 ; SSE-NEXT: psrld $26, %xmm2
104 ; SSE-NEXT: paddd %xmm2, %xmm0
105 ; SSE-NEXT: psrad $6, %xmm0
106 ; SSE-NEXT: movdqa %xmm1, %xmm2
107 ; SSE-NEXT: psrad $31, %xmm2
108 ; SSE-NEXT: psrld $26, %xmm2
109 ; SSE-NEXT: paddd %xmm2, %xmm1
110 ; SSE-NEXT: psrad $6, %xmm1
113 ; AVX1-LABEL: sdiv8x32:
114 ; AVX1: # %bb.0: # %entry
115 ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm1
116 ; AVX1-NEXT: vpsrld $26, %xmm1, %xmm1
117 ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1
118 ; AVX1-NEXT: vpsrad $6, %xmm1, %xmm1
119 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
120 ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm2
121 ; AVX1-NEXT: vpsrld $26, %xmm2, %xmm2
122 ; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm0
123 ; AVX1-NEXT: vpsrad $6, %xmm0, %xmm0
124 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
127 ; AVX2-LABEL: sdiv8x32:
128 ; AVX2: # %bb.0: # %entry
129 ; AVX2-NEXT: vpsrad $31, %ymm0, %ymm1
130 ; AVX2-NEXT: vpsrld $26, %ymm1, %ymm1
131 ; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
132 ; AVX2-NEXT: vpsrad $6, %ymm0, %ymm0
135 %0 = sdiv <8 x i32> %var, <i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64>
139 define <16 x i16> @sdiv16x16(<16 x i16> %var) {
140 ; SSE-LABEL: sdiv16x16:
141 ; SSE: # %bb.0: # %entry
142 ; SSE-NEXT: movdqa %xmm0, %xmm2
143 ; SSE-NEXT: psraw $15, %xmm2
144 ; SSE-NEXT: psrlw $14, %xmm2
145 ; SSE-NEXT: paddw %xmm2, %xmm0
146 ; SSE-NEXT: psraw $2, %xmm0
147 ; SSE-NEXT: movdqa %xmm1, %xmm2
148 ; SSE-NEXT: psraw $15, %xmm2
149 ; SSE-NEXT: psrlw $14, %xmm2
150 ; SSE-NEXT: paddw %xmm2, %xmm1
151 ; SSE-NEXT: psraw $2, %xmm1
154 ; AVX1-LABEL: sdiv16x16:
155 ; AVX1: # %bb.0: # %entry
156 ; AVX1-NEXT: vpsraw $15, %xmm0, %xmm1
157 ; AVX1-NEXT: vpsrlw $14, %xmm1, %xmm1
158 ; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm1
159 ; AVX1-NEXT: vpsraw $2, %xmm1, %xmm1
160 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
161 ; AVX1-NEXT: vpsraw $15, %xmm0, %xmm2
162 ; AVX1-NEXT: vpsrlw $14, %xmm2, %xmm2
163 ; AVX1-NEXT: vpaddw %xmm2, %xmm0, %xmm0
164 ; AVX1-NEXT: vpsraw $2, %xmm0, %xmm0
165 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
168 ; AVX2-LABEL: sdiv16x16:
169 ; AVX2: # %bb.0: # %entry
170 ; AVX2-NEXT: vpsraw $15, %ymm0, %ymm1
171 ; AVX2-NEXT: vpsrlw $14, %ymm1, %ymm1
172 ; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0
173 ; AVX2-NEXT: vpsraw $2, %ymm0, %ymm0
176 %a0 = sdiv <16 x i16> %var, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
180 ; Div-by-0 in any lane is UB.
182 define <4 x i32> @sdiv_non_splat(<4 x i32> %x) {
183 ; SSE-LABEL: sdiv_non_splat:
187 ; AVX-LABEL: sdiv_non_splat:
190 %y = sdiv <4 x i32> %x, <i32 2, i32 0, i32 0, i32 0>