1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE
3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
4 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
6 define <8 x i16> @sdiv_vec8x16(<8 x i16> %var) {
7 ; SSE-LABEL: sdiv_vec8x16:
8 ; SSE: # %bb.0: # %entry
9 ; SSE-NEXT: movdqa %xmm0, %xmm1
10 ; SSE-NEXT: psraw $15, %xmm1
11 ; SSE-NEXT: psrlw $11, %xmm1
12 ; SSE-NEXT: paddw %xmm0, %xmm1
13 ; SSE-NEXT: psraw $5, %xmm1
14 ; SSE-NEXT: movdqa %xmm1, %xmm0
17 ; AVX-LABEL: sdiv_vec8x16:
18 ; AVX: # %bb.0: # %entry
19 ; AVX-NEXT: vpsraw $15, %xmm0, %xmm1
20 ; AVX-NEXT: vpsrlw $11, %xmm1, %xmm1
21 ; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0
22 ; AVX-NEXT: vpsraw $5, %xmm0, %xmm0
25 %0 = sdiv <8 x i16> %var, <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32>
29 define <8 x i16> @sdiv_vec8x16_minsize(<8 x i16> %var) minsize {
30 ; SSE-LABEL: sdiv_vec8x16_minsize:
31 ; SSE: # %bb.0: # %entry
32 ; SSE-NEXT: movdqa %xmm0, %xmm1
33 ; SSE-NEXT: psraw $15, %xmm1
34 ; SSE-NEXT: psrlw $11, %xmm1
35 ; SSE-NEXT: paddw %xmm0, %xmm1
36 ; SSE-NEXT: psraw $5, %xmm1
37 ; SSE-NEXT: movdqa %xmm1, %xmm0
40 ; AVX-LABEL: sdiv_vec8x16_minsize:
41 ; AVX: # %bb.0: # %entry
42 ; AVX-NEXT: vpsraw $15, %xmm0, %xmm1
43 ; AVX-NEXT: vpsrlw $11, %xmm1, %xmm1
44 ; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0
45 ; AVX-NEXT: vpsraw $5, %xmm0, %xmm0
48 %0 = sdiv <8 x i16> %var, <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32>
52 define <4 x i32> @sdiv_vec4x32(<4 x i32> %var) {
53 ; SSE-LABEL: sdiv_vec4x32:
54 ; SSE: # %bb.0: # %entry
55 ; SSE-NEXT: movdqa %xmm0, %xmm1
56 ; SSE-NEXT: psrad $31, %xmm1
57 ; SSE-NEXT: psrld $28, %xmm1
58 ; SSE-NEXT: paddd %xmm0, %xmm1
59 ; SSE-NEXT: psrad $4, %xmm1
60 ; SSE-NEXT: movdqa %xmm1, %xmm0
63 ; AVX-LABEL: sdiv_vec4x32:
64 ; AVX: # %bb.0: # %entry
65 ; AVX-NEXT: vpsrad $31, %xmm0, %xmm1
66 ; AVX-NEXT: vpsrld $28, %xmm1, %xmm1
67 ; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
68 ; AVX-NEXT: vpsrad $4, %xmm0, %xmm0
71 %0 = sdiv <4 x i32> %var, <i32 16, i32 16, i32 16, i32 16>
75 define <4 x i32> @sdiv_negative(<4 x i32> %var) {
76 ; SSE-LABEL: sdiv_negative:
77 ; SSE: # %bb.0: # %entry
78 ; SSE-NEXT: movdqa %xmm0, %xmm1
79 ; SSE-NEXT: psrad $31, %xmm1
80 ; SSE-NEXT: psrld $28, %xmm1
81 ; SSE-NEXT: paddd %xmm0, %xmm1
82 ; SSE-NEXT: psrad $4, %xmm1
83 ; SSE-NEXT: pxor %xmm0, %xmm0
84 ; SSE-NEXT: psubd %xmm1, %xmm0
87 ; AVX-LABEL: sdiv_negative:
88 ; AVX: # %bb.0: # %entry
89 ; AVX-NEXT: vpsrad $31, %xmm0, %xmm1
90 ; AVX-NEXT: vpsrld $28, %xmm1, %xmm1
91 ; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
92 ; AVX-NEXT: vpsrad $4, %xmm0, %xmm0
93 ; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
94 ; AVX-NEXT: vpsubd %xmm0, %xmm1, %xmm0
97 %0 = sdiv <4 x i32> %var, <i32 -16, i32 -16, i32 -16, i32 -16>
101 define <8 x i32> @sdiv8x32(<8 x i32> %var) {
102 ; SSE-LABEL: sdiv8x32:
103 ; SSE: # %bb.0: # %entry
104 ; SSE-NEXT: movdqa %xmm0, %xmm2
105 ; SSE-NEXT: psrad $31, %xmm2
106 ; SSE-NEXT: psrld $26, %xmm2
107 ; SSE-NEXT: paddd %xmm0, %xmm2
108 ; SSE-NEXT: psrad $6, %xmm2
109 ; SSE-NEXT: movdqa %xmm1, %xmm3
110 ; SSE-NEXT: psrad $31, %xmm3
111 ; SSE-NEXT: psrld $26, %xmm3
112 ; SSE-NEXT: paddd %xmm1, %xmm3
113 ; SSE-NEXT: psrad $6, %xmm3
114 ; SSE-NEXT: movdqa %xmm2, %xmm0
115 ; SSE-NEXT: movdqa %xmm3, %xmm1
118 ; AVX1-LABEL: sdiv8x32:
119 ; AVX1: # %bb.0: # %entry
120 ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm1
121 ; AVX1-NEXT: vpsrld $26, %xmm1, %xmm1
122 ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1
123 ; AVX1-NEXT: vpsrad $6, %xmm1, %xmm1
124 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
125 ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm2
126 ; AVX1-NEXT: vpsrld $26, %xmm2, %xmm2
127 ; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm0
128 ; AVX1-NEXT: vpsrad $6, %xmm0, %xmm0
129 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
132 ; AVX2-LABEL: sdiv8x32:
133 ; AVX2: # %bb.0: # %entry
134 ; AVX2-NEXT: vpsrad $31, %ymm0, %ymm1
135 ; AVX2-NEXT: vpsrld $26, %ymm1, %ymm1
136 ; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
137 ; AVX2-NEXT: vpsrad $6, %ymm0, %ymm0
140 %0 = sdiv <8 x i32> %var, <i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64>
144 define <16 x i16> @sdiv16x16(<16 x i16> %var) {
145 ; SSE-LABEL: sdiv16x16:
146 ; SSE: # %bb.0: # %entry
147 ; SSE-NEXT: movdqa %xmm0, %xmm2
148 ; SSE-NEXT: psraw $15, %xmm2
149 ; SSE-NEXT: psrlw $14, %xmm2
150 ; SSE-NEXT: paddw %xmm0, %xmm2
151 ; SSE-NEXT: psraw $2, %xmm2
152 ; SSE-NEXT: movdqa %xmm1, %xmm3
153 ; SSE-NEXT: psraw $15, %xmm3
154 ; SSE-NEXT: psrlw $14, %xmm3
155 ; SSE-NEXT: paddw %xmm1, %xmm3
156 ; SSE-NEXT: psraw $2, %xmm3
157 ; SSE-NEXT: movdqa %xmm2, %xmm0
158 ; SSE-NEXT: movdqa %xmm3, %xmm1
161 ; AVX1-LABEL: sdiv16x16:
162 ; AVX1: # %bb.0: # %entry
163 ; AVX1-NEXT: vpsraw $15, %xmm0, %xmm1
164 ; AVX1-NEXT: vpsrlw $14, %xmm1, %xmm1
165 ; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm1
166 ; AVX1-NEXT: vpsraw $2, %xmm1, %xmm1
167 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
168 ; AVX1-NEXT: vpsraw $15, %xmm0, %xmm2
169 ; AVX1-NEXT: vpsrlw $14, %xmm2, %xmm2
170 ; AVX1-NEXT: vpaddw %xmm2, %xmm0, %xmm0
171 ; AVX1-NEXT: vpsraw $2, %xmm0, %xmm0
172 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
175 ; AVX2-LABEL: sdiv16x16:
176 ; AVX2: # %bb.0: # %entry
177 ; AVX2-NEXT: vpsraw $15, %ymm0, %ymm1
178 ; AVX2-NEXT: vpsrlw $14, %ymm1, %ymm1
179 ; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0
180 ; AVX2-NEXT: vpsraw $2, %ymm0, %ymm0
183 %a0 = sdiv <16 x i16> %var, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
187 ; Div-by-0 in any lane is UB.
189 define <4 x i32> @sdiv_non_splat(<4 x i32> %x) {
190 ; SSE-LABEL: sdiv_non_splat:
194 ; AVX-LABEL: sdiv_non_splat:
197 %y = sdiv <4 x i32> %x, <i32 2, i32 0, i32 0, i32 0>